diff --git a/data/algorithms/vin_pretrained_16x16.pth b/data/algorithms/vin_pretrained_16x16.pth
new file mode 100644
index 000000000..574bc7b03
Binary files /dev/null and b/data/algorithms/vin_pretrained_16x16.pth differ
diff --git a/data/algorithms/vin_pretrained_28x28.pth b/data/algorithms/vin_pretrained_28x28.pth
new file mode 100644
index 000000000..d3ac1b05e
Binary files /dev/null and b/data/algorithms/vin_pretrained_28x28.pth differ
diff --git a/data/algorithms/vin_pretrained_8x8.pth b/data/algorithms/vin_pretrained_8x8.pth
new file mode 100644
index 000000000..a8ad96a37
Binary files /dev/null and b/data/algorithms/vin_pretrained_8x8.pth differ
diff --git a/src/algorithms/algorithm_manager.py b/src/algorithms/algorithm_manager.py
index 1447e7cc9..bf69dbe9e 100644
--- a/src/algorithms/algorithm_manager.py
+++ b/src/algorithms/algorithm_manager.py
@@ -30,9 +30,10 @@
from algorithms.classic.sample_based.rrt_star import RRT_Star
from algorithms.classic.sample_based.rrt_connect import RRT_Connect
from algorithms.classic.graph_based.wavefront import Wavefront
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
-from algorithms.lstm.a_star_waypoint import WayPointNavigation
-from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.a_star_waypoint import WayPointNavigation
+from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM
+from algorithms.learning.VIN.VIN import VINAlgorithm
if HAS_OMPL:
from algorithms.classic.sample_based.ompl_rrt import OMPL_RRT
@@ -103,7 +104,8 @@ def _static_init_(cls):
"Dijkstra": (Dijkstra, DijkstraTesting, ([], {})),
"Bug1": (Bug1, BasicTesting, ([], {})),
"Bug2": (Bug2, BasicTesting, ([], {})),
- "Potential Field": (PotentialField, BasicTesting, ([], {}))
+ "Potential Field": (PotentialField, BasicTesting, ([], {})),
+ "VIN": (VINAlgorithm, BasicTesting, ([], {"load_name": "vin_pretrained"}))
}
if HAS_OMPL:
diff --git a/src/algorithms/classic/testing/way_point_navigation_testing.py b/src/algorithms/classic/testing/way_point_navigation_testing.py
index 39cdaae92..dbce9939c 100644
--- a/src/algorithms/classic/testing/way_point_navigation_testing.py
+++ b/src/algorithms/classic/testing/way_point_navigation_testing.py
@@ -3,7 +3,7 @@
import numpy as np
from algorithms.basic_testing import BasicTesting
-from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM
+from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM
from simulator.services.debug import DebugLevel
diff --git a/src/algorithms/configuration/configuration.py b/src/algorithms/configuration/configuration.py
index c62ca1be2..8c1978fdd 100644
--- a/src/algorithms/configuration/configuration.py
+++ b/src/algorithms/configuration/configuration.py
@@ -4,8 +4,8 @@
from algorithms.algorithm import Algorithm
from algorithms.basic_testing import BasicTesting
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule
-from algorithms.lstm.ML_model import MLModel
+from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule
+from algorithms.learning.ML_model import MLModel
from simulator.services.debug import DebugLevel
from structures import Point
diff --git a/src/algorithms/lstm/LSTM_CAE_tile_by_tile.py b/src/algorithms/learning/LSTM_CAE_tile_by_tile.py
similarity index 98%
rename from src/algorithms/lstm/LSTM_CAE_tile_by_tile.py
rename to src/algorithms/learning/LSTM_CAE_tile_by_tile.py
index 8812271e0..282969a8c 100644
--- a/src/algorithms/lstm/LSTM_CAE_tile_by_tile.py
+++ b/src/algorithms/learning/LSTM_CAE_tile_by_tile.py
@@ -15,9 +15,9 @@
from algorithms.basic_testing import BasicTesting
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM
-from algorithms.lstm.ML_model import MLModel, EvaluationResults
-from algorithms.lstm.map_processing import MapProcessing
+from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM
+from algorithms.learning.ML_model import MLModel, EvaluationResults
+from algorithms.learning.map_processing import MapProcessing
from simulator.services.services import Services
from utility.constants import DATA_PATH
diff --git a/src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py b/src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py
similarity index 98%
rename from src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py
rename to src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py
index 359ba691d..b52a1bddb 100644
--- a/src/algorithms/lstm/LSTM_CNN_tile_by_tile_obsolete.py
+++ b/src/algorithms/learning/LSTM_CNN_tile_by_tile_obsolete.py
@@ -10,7 +10,7 @@
from torch import nn
from algorithms.basic_testing import BasicTesting
-from algorithms.lstm.online_lstm import BasicLSTMModule, OnlineLSTM
+from algorithms.learning.online_lstm import BasicLSTMModule, OnlineLSTM
from simulator.services.services import Services
diff --git a/src/algorithms/lstm/LSTM_tile_by_tile.py b/src/algorithms/learning/LSTM_tile_by_tile.py
similarity index 98%
rename from src/algorithms/lstm/LSTM_tile_by_tile.py
rename to src/algorithms/learning/LSTM_tile_by_tile.py
index 13ca0fd13..2f8520fb6 100644
--- a/src/algorithms/lstm/LSTM_tile_by_tile.py
+++ b/src/algorithms/learning/LSTM_tile_by_tile.py
@@ -10,8 +10,8 @@
from algorithms.basic_testing import BasicTesting
from algorithms.configuration.entities.goal import Goal
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.ML_model import MLModel, SingleTensorDataset, PackedDataset
-from algorithms.lstm.map_processing import MapProcessing
+from algorithms.learning.ML_model import MLModel, SingleTensorDataset, PackedDataset
+from algorithms.learning.map_processing import MapProcessing
from simulator.services.services import Services
from simulator.views.map.display.entities_map_display import EntitiesMapDisplay
from simulator.views.map.display.online_lstm_map_display import OnlineLSTMMapDisplay
diff --git a/src/algorithms/lstm/ML_model.py b/src/algorithms/learning/ML_model.py
similarity index 99%
rename from src/algorithms/lstm/ML_model.py
rename to src/algorithms/learning/ML_model.py
index d28587346..c1d2b5e74 100644
--- a/src/algorithms/lstm/ML_model.py
+++ b/src/algorithms/learning/ML_model.py
@@ -12,7 +12,7 @@
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence, pack_sequence, PackedSequence
from torch.utils import data
from torch.utils.data import DataLoader, TensorDataset, Dataset, Subset
-from algorithms.lstm.map_processing import MapProcessing
+from algorithms.learning.map_processing import MapProcessing
from simulator.services.debug import DebugLevel
from simulator.services.services import Services
@@ -154,7 +154,7 @@ class PackedDataset(Dataset):
lengths: torch.Tensor
def __init__(self, seq: List[torch.Tensor]) -> None:
- from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule
+ from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule
ls = list(map(lambda el: el.shape[0], seq))
self.perm = BasicLSTMModule.get_sort_by_lengths_indices(ls)
diff --git a/src/algorithms/learning/VIN/.gitignore b/src/algorithms/learning/VIN/.gitignore
new file mode 100755
index 000000000..80247da36
--- /dev/null
+++ b/src/algorithms/learning/VIN/.gitignore
@@ -0,0 +1,135 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# npz
+*.npz
+
+# pth
+*.pth
+*.pth.1
+
+vin_my_implementation.code-workspace
+
+
+.vscode/launch.json
+/resources/training_maps/
+/resources/testing_maps/
+
+/resources/logs/
+/resources/test_maps/
+/resources/16_100k/
+/resources/16_60k/
+/resources/16_60k_pt2/
+
+/resources/house_expo/
+/resources/house_expo_old/
+/resources/house_expo_100x100/
+
+
+
+
+
+
+resources/100k_no_block.tar.gz
diff --git a/src/algorithms/learning/VIN/LICENSE b/src/algorithms/learning/VIN/LICENSE
new file mode 100644
index 000000000..07a851703
--- /dev/null
+++ b/src/algorithms/learning/VIN/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2020, Hussein Ali Jaafar
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/algorithms/learning/VIN/README.md b/src/algorithms/learning/VIN/README.md
new file mode 100644
index 000000000..101577f46
--- /dev/null
+++ b/src/algorithms/learning/VIN/README.md
@@ -0,0 +1,143 @@
+
+## Introduction
+
+This is a modified implementation of Kent Sommer's PyTorch Value Iteration Networks implementation, meant to work with PathBench.
+
+Read about Kent Sommers Implementation: [Here](https://github.com/kentsommer/pytorch-value-iteration-networks "Pytorch implementation of Value Iteration Networks")
+
+Read about PathBench: [Here](https://github.com/husseinalijaafar/PathBench "A Benchmarking Platform for Classic and Learned Path Planning Algorithms")
+
+Read the original paper:[Here](https://arxiv.org/abs/1602.02867)
+
+Read about similar implemenations, which have made this modified implemenation possible
+* [@kentsommer](https://github.com/kentsommer) ([PyTorch implementation](https://github.com/kentsommer/pytorch-value-iteration-networks))
+* [@avivt](https://github.com/avivt) ([Paper Author](https://arxiv.org/abs/1602.02867), [MATLAB implementation](https://github.com/avivt/VIN))
+* [@zuoxingdong](https://github.com/zuoxingdong) ([Tensorflow implementation](https://github.com/zuoxingdong/VIN_TensorFlow), [Pytorch implementation](https://github.com/zuoxingdong/VIN_PyTorch_Visdom))
+* [@TheAbhiKumar](https://github.com/TheAbhiKumar) ([Tensorflow implementation](https://github.com/TheAbhiKumar/tensorflow-value-iteration-networks))
+* [@onlytailei](https://github.com/onlytailei) ([Pytorch implementation](https://github.com/onlytailei/Value-Iteration-Networks-PyTorch))
+
+## What has been modified?
+In order to run on PathBench generated maps, I had to modify a few areas of the code. The main difference is within the `test.py` file and `gridworld` class. The maps (resources>maps), which are JSON files structured with the goal position, agent position and grid.
+I have also added a few metrics, such as path deviation from optimal path, and time.
+
+The PathBench maps are structured where 0 is obstacle and 1 is free space.
+Sample Map:
+
+## Installation
+Instructions:
+1. Install Packages
+2. Download Training Data
+3. Train on Training Data
+4. Run `test.py`.
+
+This repository requires following packages:
+- [SciPy](https://www.scipy.org/install.html) >= 0.19.0
+- [Python](https://www.python.org/) >= 2.7 (if using Python 3.x: python3-tk should be installed)
+- [Numpy](https://pypi.python.org/pypi/numpy) >= 1.12.1
+- [Matplotlib](https://matplotlib.org/users/installing.html) >= 2.0.0
+- [PyTorch](http://pytorch.org/) >= 0.1.11
+
+Use `pip` to install the necessary dependencies:
+```
+pip install -U -r requirements.txt
+```
+Note that PyTorch cannot be installed directly from PyPI; refer to http://pytorch.org/ for custom installation instructions specific to your needs.
+
+#### Downloading Training Data
+WIP
+
+To aquire the training data (NOT for PathBench Maps), either run the training data generator `make_training_data.py` in the dataset folder (resource intensive), or run the shell script to download them:
+1. `cd` into main directory
+2. `chmod +x download_weights_and_datasets.sh`
+3. `./download_weights_and_datasets`
+
+It should download the training data.
+#### PathBench Training Data
+
+*Outdated- Please await updated training files*
+
+To download PathBench training data, visit [GDrive](https://drive.google.com/file/d/11D-QCf5qZ4qusv66XhxOqqdcyHl5RLHk/view?usp=sharing)
+Currently, only the 90000 map training data is uploaded. I will upload more as I generate them.
+
+To generate your own training data, add the json files with correct structure to a specified path, and pass that path in the `make_training_data.py` file. It is fairly straight forward.
+
+## How to train
+#### 8x8 gridworld
+```bash
+python train.py --datafile dataset/gridworld_8x8.npz --imsize 8 --lr 0.005 --epochs 30 --k 10 --batch_size 128
+```
+#### 16x16 gridworld
+```bash
+python train.py --datafile dataset/gridworld_16x16.npz --imsize 16 --lr 0.002 --epochs 30 --k 20 --batch_size 128
+```
+#### 28x28 gridworld
+```bash
+python train.py --datafile dataset/gridworld_28x28.npz --imsize 28 --lr 0.002 --epochs 30 --k 36 --batch_size 128
+```
+**Flags**:
+- `datafile`: The path to the data files.
+- `imsize`: The size of input images. One of: [8, 16, 28]
+- `lr`: Learning rate with RMSProp optimizer. Recommended: [0.01, 0.005, 0.002, 0.001]
+- `epochs`: Number of epochs to train. Default: 30
+- `k`: Number of Value Iterations. Recommended: [10 for 8x8, 20 for 16x16, 36 for 28x28]
+- `l_i`: Number of channels in input layer. Default: 2, i.e. obstacles image and goal image.
+- `l_h`: Number of channels in first convolutional layer. Default: 150, described in paper.
+- `l_q`: Number of channels in q layer (~actions) in VI-module. Default: 10, described in paper.
+- `batch_size`: Batch size. Default: 128
+
+## How to test / visualize paths (requires training first)
+#### 8x8 gridworld
+```bash
+python test.py --weights trained/vin_8x8.pth --imsize 8 --k 10
+```
+#### 16x16 gridworld
+```bash
+python test.py --weights trained/vin_16x16.pth --imsize 16 --k 20
+```
+#### 28x28 gridworld
+```bash
+python test.py --weights trained/vin_28x28.pth --imsize 28 --k 36
+```
+#### 64x64 gridworld
+```bash
+python test.py --weights trained/vin_28x28.pth --imsize 28 --k 36
+```
+(64x64 still uses 28x28 trained data, we haven't trained VIN on 64x64 maps yet.)
+
+To visualize the optimal and predicted paths simply pass:
+```bash
+--plot
+```
+
+**Flags**:
+- `weights`: Path to trained weights.
+- `imsize`: The size of input images. One of: [8, 16, 28]
+- `plot`: If supplied, the optimal and predicted paths will be plotted
+- `k`: Number of Value Iterations. Recommended: [10 for 8x8, 20 for 16x16, 36 for 28x28]
+- `l_i`: Number of channels in input layer. Default: 2, i.e. obstacles image and goal image.
+- `l_h`: Number of channels in first convolutional layer. Default: 150, described in paper.
+- `l_q`: Number of channels in q layer (~actions) in VI-module. Default: 10, described in paper.
+
+## Results
+The maps that VIN trains on are NOT the PathBench maps, rather they are maps generated from Kent's implementation. This is still WIP
+Therefore, when running VIN w/ PathBench maps, it is running on untrained style maps (Block map and Uniform Random Fill are unfamiliar to
+the algorithm).
+
+Logs are saved in `resources>logs`. You can change the logging behaviour (debug vs info) in `test_pb.py`. Ensure logs don't overwrite eachother by changing the name at each run.
+
+### Sample Maps:
+Block Map:
+
+
+
+Uniform Random Fill Map
+
+
+House Map
+
+
+
+### Training
+WIP
+
+
diff --git a/src/algorithms/learning/VIN/VIN.py b/src/algorithms/learning/VIN/VIN.py
new file mode 100644
index 000000000..e15187a3a
--- /dev/null
+++ b/src/algorithms/learning/VIN/VIN.py
@@ -0,0 +1,286 @@
+import sys
+import random
+import numpy as np
+import torch
+import logging
+import time
+import math
+from torch.autograd import Variable
+from typing import Dict, List, Type
+
+from .utility.utils import *
+
+from .model import *
+from .domains.gridworld import *
+from .generators.obstacle_gen import *
+
+from algorithms.algorithm import Algorithm
+from simulator.services.services import Services
+from algorithms.basic_testing import BasicTesting
+from structures.point import Point
+
+class VINConfig:
+ def __init__(self, l_i=2, l_h=150, l_q=10, k=10):
+ self.l_i = l_i
+ self.l_h = l_h
+ self.l_q = l_q
+ self.k = k
+
+class VINAlgorithm(Algorithm):
+ cached_models: Dict[int, Type[VIN]]
+ use_GPU: bool
+
+ def __init__(self, services: Services, testing: BasicTesting = None, config: VINConfig = VINConfig(), load_name: str = "VIN"):
+ super().__init__(services, testing)
+ self.cached_models = {}
+ self.use_GPU = torch.cuda.is_available()
+ self._load_name = load_name
+ self.config = config
+
+ def set_display_info(self):
+ return super().set_display_info() + [
+
+ ]
+
+ def _find_path_internal(self) -> None:
+ mp = self._get_grid()
+ assert mp.size[0] == mp.size[1] and len(mp.size) == 2, \
+ f"VIN only accepts square 2D maps, map size {mp.size}"
+ imsize = mp.size[0]
+ grid = np.copy(mp.grid)
+ self.config.imsize = imsize
+ model: VIN = self.load_VIN(mp.size[0])
+ start: Tuple[int] = (mp.agent.position.x, mp.agent.position.y)
+ goal: Tuple[int] = (mp.goal.position.x, mp.goal.position.y)
+
+ grid[mp.agent.position.x, mp.agent.position.y] = 0 #Set the start position as freespace too
+ grid[mp.goal.position.x, mp.goal.position.y] = 0 #Set the goal position as freespace too
+
+ obs = obstacles([imsize, imsize], goal)
+ obs.dom = grid
+
+ im = obs.get_final()
+ G = gridworld(im, goal[0], goal[1])
+ # =======
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, 1, start, False) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+
+ i = 0
+ if len(states_xy[i]) > 1:
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, imsize,
+ imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, imsize,
+ imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if self.use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = model(X_in, S1_in, S2_in, self.config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ self.move_agent(Point(nr, nc))
+ self.key_frame(True)
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ self.move_agent(self._get_grid().goal.position)
+ self.key_frame(True)
+ return
+ self.key_frame(True)
+
+ def load_VIN(self, size):
+ if size in self.cached_models: return self.cached_models[size]
+ load_fname = f"{self._load_name}_{size}x{size}.pth"
+ load_path = self._services.resources.model_dir._full_path() + load_fname
+ vin = VIN(self.config)
+ vin.load_state_dict(torch.load(load_path, map_location=None if self.use_GPU else torch.device("cpu")))
+ if self.use_GPU: vin = vin.cuda()
+ self.cached_models[size] = vin
+ return vin
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/vin_8x8.pth',
+ help='Path to trained weights')
+ parser.add_argument(
+ '--maps',
+ type=str,
+ default='resources/testing_maps/16x16',
+ help='Path to maps')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
diff --git a/src/algorithms/learning/VIN/dataset/README.MD b/src/algorithms/learning/VIN/dataset/README.MD
new file mode 100644
index 000000000..d67709e69
--- /dev/null
+++ b/src/algorithms/learning/VIN/dataset/README.MD
@@ -0,0 +1,8 @@
+# Gridworld datasets
+To use the gridworld datasets you have two choices:
+1. Download and place the .npz dataset files here
+ * gridworld_8x8.npz
+ * gridworld_16x16.npz
+ * gridworld_28x28.npz
+2. Use the dataset generation script
+ * ```make_training_data.py```
diff --git a/src/algorithms/lstm/__init__.py b/src/algorithms/learning/VIN/dataset/__init__.py
similarity index 100%
rename from src/algorithms/lstm/__init__.py
rename to src/algorithms/learning/VIN/dataset/__init__.py
diff --git a/src/algorithms/learning/VIN/dataset/dataset.py b/src/algorithms/learning/VIN/dataset/dataset.py
new file mode 100644
index 000000000..c2f579bf6
--- /dev/null
+++ b/src/algorithms/learning/VIN/dataset/dataset.py
@@ -0,0 +1,67 @@
+import numpy as np
+
+import torch
+import torch.utils.data as data
+
+
+class GridworldData(data.Dataset):
+ def __init__(self,
+ file,
+ imsize,
+ train=True,
+ transform=None,
+ target_transform=None):
+ assert file.endswith('.npz') # Must be .npz format
+ self.file = file
+ self.imsize = imsize
+ self.transform = transform
+ self.target_transform = target_transform
+ self.train = train # training set or test set
+
+ self.images, self.S1, self.S2, self.labels = \
+ self._process(file, self.train)
+
+ def __getitem__(self, index):
+ img = self.images[index]
+ s1 = self.S1[index]
+ s2 = self.S2[index]
+ label = self.labels[index]
+ # Apply transform if we have one
+ if self.transform is not None:
+ img = self.transform(img)
+ else: # Internal default transform: Just to Tensor
+ img = torch.from_numpy(img)
+ # Apply target transform if we have one
+ if self.target_transform is not None:
+ label = self.target_transform(label)
+ return img, int(s1), int(s2), int(label)
+
+ def __len__(self):
+ return self.images.shape[0]
+
+ def _process(self, file, train):
+ """Data format: A list, [train data, test data]
+ Each data sample: label, S1, S2, Images, in this order.
+ """
+ with np.load(file, mmap_mode='r') as f:
+ if train:
+ images = f['arr_0']
+ S1 = f['arr_1']
+ S2 = f['arr_2']
+ labels = f['arr_3']
+ else:
+ images = f['arr_4']
+ S1 = f['arr_5']
+ S2 = f['arr_6']
+ labels = f['arr_7']
+ # Set proper datatypes
+ images = images.astype(np.float32)
+ S1 = S1.astype(int) # (S1, S2) location are integers
+ S2 = S2.astype(int)
+ labels = labels.astype(int) # labels are integers
+ # Print number of samples
+ if train:
+ print("Number of Train Samples: {0}".format(images.shape[0]))
+ else:
+ print("Number of Test Samples: {0}".format(images.shape[0]))
+ return images, S1, S2, labels
diff --git a/src/algorithms/learning/VIN/dataset/make_training_data.py b/src/algorithms/learning/VIN/dataset/make_training_data.py
new file mode 100644
index 000000000..6baf94347
--- /dev/null
+++ b/src/algorithms/learning/VIN/dataset/make_training_data.py
@@ -0,0 +1,142 @@
+import sys
+import json
+import numpy as np
+from dataset import *
+
+sys.path.append('.')
+from domains.gridworld import *
+from generators.obstacle_gen import *
+sys.path.remove('.')
+
+
+def extract_action(traj):
+ # Given a trajectory, outputs a 1D vector of
+ # actions corresponding to the trajectory.
+ n_actions = 8
+ action_vecs = np.asarray([[-1., 0.], [1., 0.], [0., 1.], [0., -1.],
+ [-1., 1.], [-1., -1.], [1., 1.], [1., -1.]])
+ action_vecs[4:] = 1 / np.sqrt(2) * action_vecs[4:]
+ action_vecs = action_vecs.T
+ state_diff = np.diff(traj, axis=0)
+ norm_state_diff = state_diff * np.tile(
+ 1 / np.sqrt(np.sum(np.square(state_diff), axis=1)), (2, 1)).T
+ prj_state_diff = np.dot(norm_state_diff, action_vecs)
+ actions_one_hot = np.abs(prj_state_diff - 1) < 0.00001
+ actions = np.dot(actions_one_hot, np.arange(n_actions).T)
+ return actions
+
+
+def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj,
+ state_batch_size,testing):
+
+ X_l = []
+ S1_l = []
+ S2_l = []
+ Labels_l = []
+
+ dom = 0.0
+ while dom <= n_domains:
+ # goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])]
+ # Generate obstacle map
+ if testing:
+ # path = './resources/training_maps/8x8/testing/'
+ path = '/home/hussein/Desktop/git-projects/hjaafar_vin/value-iteration-networks-pb/resources/training_maps/8x8_150000/50000_uniform_seed50/'
+ mp, goal, start = open_map(dom+100000,path)
+ else:
+ # path = './resources/training_maps/8x8/training/'
+ path = "/home/hussein/Desktop/git-projects/hjaafar_vin/value-iteration-networks-pb/resources/training_maps/8x8_150000/50000_uniform_seed50/"
+ mp, goal, start = open_map(dom,path)
+
+
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+
+ obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size)
+ obs.dom = mp
+ # Add obstacles to map
+ # n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ # border_res = obs.add_border()
+ # Ensure we have valid map
+ # if n_obs == 0 or not border_res:
+ # continue
+ # Get final map
+ im = obs.get_final()
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.t_get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen=False)
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ # Get optimal actions for each state
+ actions = extract_action(states_xy[i])
+ ns = states_xy[i].shape[0] - 1
+ # Invert domain image => 0 = free, 1 = obstacle
+ image = 1 - im
+ # Resize domain and goal images and concate
+ image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1]))
+ value_data = np.resize(value_prior,
+ (1, 1, dom_size[0], dom_size[1]))
+ iv_mixed = np.concatenate((image_data, value_data), axis=1)
+ X_current = np.tile(iv_mixed, (ns, 1, 1, 1))
+ # Resize states
+ S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1)
+ S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1)
+ # Resize labels
+ Labels_current = np.expand_dims(actions, axis=1)
+ # Append to output list
+ X_l.append(X_current)
+ S1_l.append(S1_current)
+ S2_l.append(S2_current)
+ Labels_l.append(Labels_current)
+ dom += 1
+ sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%")
+ sys.stdout.flush()
+ sys.stdout.write("\n")
+ # Concat all outputs
+ X_f = np.concatenate(X_l)
+ S1_f = np.concatenate(S1_l)
+ S2_f = np.concatenate(S2_l)
+ Labels_f = np.concatenate(Labels_l)
+ return X_f, S1_f, S2_f, Labels_f
+
+
+def open_map(dom,path):
+ # print('dom', dom)
+ # print('path', path)
+ dom = int(dom)
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def main(dom_size=[8, 8],
+ n_domains=100000,
+ max_obs=50,
+ max_obs_size=2,
+ n_traj=1, #This basically uses 7 diff start positions , but you need to have that in the map or else it throws an error
+ state_batch_size=1):
+ # Get path to save dataset
+ save_path = "dataset/new_gridworld_{0}x{1}".format(dom_size[0], dom_size[1])
+ # Get training data
+ print("Now making training data...")
+ X_out_tr, S1_out_tr, S2_out_tr, Labels_out_tr = make_data(
+ dom_size, n_domains, max_obs, max_obs_size, n_traj, state_batch_size, testing=False)
+ # Get testing data
+ print("\nNow making testing data...")
+ X_out_ts, S1_out_ts, S2_out_ts, Labels_out_ts = make_data(
+ dom_size, n_domains / 6, max_obs, max_obs_size, n_traj,
+ state_batch_size,testing = True)
+ # Save dataset
+ np.savez_compressed(save_path, X_out_tr, S1_out_tr, S2_out_tr,
+ Labels_out_tr, X_out_ts, S1_out_ts, S2_out_ts,
+ Labels_out_ts)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/algorithms/learning/VIN/dataset/make_training_data_og.py b/src/algorithms/learning/VIN/dataset/make_training_data_og.py
new file mode 100644
index 000000000..cf31fd857
--- /dev/null
+++ b/src/algorithms/learning/VIN/dataset/make_training_data_og.py
@@ -0,0 +1,116 @@
+import sys
+
+import numpy as np
+from dataset import *
+
+sys.path.append('.')
+from domains.gridworld import *
+from generators.obstacle_gen import *
+sys.path.remove('.')
+
+
+def extract_action(traj):
+ # Given a trajectory, outputs a 1D vector of
+ # actions corresponding to the trajectory.
+ n_actions = 8
+ action_vecs = np.asarray([[-1., 0.], [1., 0.], [0., 1.], [0., -1.],
+ [-1., 1.], [-1., -1.], [1., 1.], [1., -1.]])
+ action_vecs[4:] = 1 / np.sqrt(2) * action_vecs[4:]
+ action_vecs = action_vecs.T
+ state_diff = np.diff(traj, axis=0)
+ norm_state_diff = state_diff * np.tile(
+ 1 / np.sqrt(np.sum(np.square(state_diff), axis=1)), (2, 1)).T
+ prj_state_diff = np.dot(norm_state_diff, action_vecs)
+ actions_one_hot = np.abs(prj_state_diff - 1) < 0.00001
+ actions = np.dot(actions_one_hot, np.arange(n_actions).T)
+ return actions
+
+
+def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj,
+ state_batch_size):
+
+ X_l = []
+ S1_l = []
+ S2_l = []
+ Labels_l = []
+
+ dom = 0.0
+ while dom <= n_domains:
+ goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])]
+ # Generate obstacle map
+ obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ # Get final map
+ im = obs.get_final()
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.t_get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj)
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ # Get optimal actions for each state
+ actions = extract_action(states_xy[i])
+ ns = states_xy[i].shape[0] - 1
+ # Invert domain image => 0 = free, 1 = obstacle
+ image = 1 - im
+ # Resize domain and goal images and concate
+ image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1]))
+ value_data = np.resize(value_prior,
+ (1, 1, dom_size[0], dom_size[1]))
+ iv_mixed = np.concatenate((image_data, value_data), axis=1)
+ X_current = np.tile(iv_mixed, (ns, 1, 1, 1))
+ # Resize states
+ S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1)
+ S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1)
+ # Resize labels
+ Labels_current = np.expand_dims(actions, axis=1)
+ # Append to output list
+ X_l.append(X_current)
+ S1_l.append(S1_current)
+ S2_l.append(S2_current)
+ Labels_l.append(Labels_current)
+ dom += 1
+ sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%")
+ sys.stdout.flush()
+ sys.stdout.write("\n")
+ # Concat all outputs
+ X_f = np.concatenate(X_l)
+ S1_f = np.concatenate(S1_l)
+ S2_f = np.concatenate(S2_l)
+ Labels_f = np.concatenate(Labels_l)
+ return X_f, S1_f, S2_f, Labels_f
+
+
+def main(dom_size=[28, 28],
+ n_domains=5000,
+ max_obs=50,
+ max_obs_size=2,
+ n_traj=7,
+ state_batch_size=1):
+ # Get path to save dataset
+ save_path = "dataset/gridworld_{0}x{1}".format(dom_size[0], dom_size[1])
+ # Get training data
+ print("Now making training data...")
+ X_out_tr, S1_out_tr, S2_out_tr, Labels_out_tr = make_data(
+ dom_size, n_domains, max_obs, max_obs_size, n_traj, state_batch_size)
+ # Get testing data
+ print("\nNow making testing data...")
+ X_out_ts, S1_out_ts, S2_out_ts, Labels_out_ts = make_data(
+ dom_size, n_domains / 6, max_obs, max_obs_size, n_traj,
+ state_batch_size)
+ # Save dataset
+ np.savez_compressed(save_path, X_out_tr, S1_out_tr, S2_out_tr,
+ Labels_out_tr, X_out_ts, S1_out_ts, S2_out_ts,
+ Labels_out_ts)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/algorithms/learning/VIN/domains/__init__.py b/src/algorithms/learning/VIN/domains/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/algorithms/learning/VIN/domains/gridworld.py b/src/algorithms/learning/VIN/domains/gridworld.py
new file mode 100644
index 000000000..9f42cb539
--- /dev/null
+++ b/src/algorithms/learning/VIN/domains/gridworld.py
@@ -0,0 +1,452 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import dijkstra
+import logging
+import gc
+
+
+class gridworld:
+ """A class for making gridworlds"""
+
+ def __init__(self, image, targetx, targety):
+ self.image = image
+ self.n_row = image.shape[0]
+ self.n_col = image.shape[1]
+ self.obstacles = []
+ self.freespace = []
+ self.targetx = targetx
+ self.targety = targety
+ self.G = []
+ self.W = []
+ self.R = []
+ self.P = []
+ self.A = []
+ self.n_states = 0
+ self.n_actions = 0
+ self.state_map_col = []
+ self.state_map_row = []
+ self.set_vals()
+
+ def set_vals(self):
+ # Setup function to initialize all necessary
+ # data
+ row_obs, col_obs = np.where(self.image == 0)
+ row_free, col_free = np.where(self.image != 0)
+ self.obstacles = [row_obs, col_obs]
+ self.freespace = [row_free, col_free]
+
+ n_states = self.n_row * self.n_col
+ n_actions = 8
+ self.n_states = n_states
+ self.n_actions = n_actions
+
+ p_n = np.zeros((self.n_states, self.n_states),np.int8)
+ p_s = np.zeros((self.n_states, self.n_states),np.int8)
+ p_e = np.zeros((self.n_states, self.n_states),np.int8)
+ p_w = np.zeros((self.n_states, self.n_states),np.int8)
+ p_ne = np.zeros((self.n_states, self.n_states),np.int8)
+ p_nw = np.zeros((self.n_states, self.n_states),np.int8)
+ p_se = np.zeros((self.n_states, self.n_states),np.int8)
+ p_sw = np.zeros((self.n_states, self.n_states),np.int8)
+
+ R = -1 * np.ones((self.n_states, self.n_actions))
+ R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2)
+ target = np.ravel_multi_index(
+ [self.targetx, self.targety], (self.n_row, self.n_col), order='F')
+ R[target, :] = 0
+
+
+ for row in range(0, self.n_row):
+ for col in range(0, self.n_col):
+
+ curpos = np.ravel_multi_index(
+ [row, col], (self.n_row, self.n_col), order='F')
+
+ rows, cols = self.neighbors(row, col)
+
+ neighbor_inds = np.ravel_multi_index(
+ [rows, cols], (self.n_row, self.n_col), order='F')
+
+ p_n[curpos, neighbor_inds[
+ 0]] = p_n[curpos, neighbor_inds[0]] + 1
+ p_s[curpos, neighbor_inds[
+ 1]] = p_s[curpos, neighbor_inds[1]] + 1
+ p_e[curpos, neighbor_inds[
+ 2]] = p_e[curpos, neighbor_inds[2]] + 1
+ p_w[curpos, neighbor_inds[
+ 3]] = p_w[curpos, neighbor_inds[3]] + 1
+ p_ne[curpos, neighbor_inds[
+ 4]] = p_ne[curpos, neighbor_inds[4]] + 1
+ p_nw[curpos, neighbor_inds[
+ 5]] = p_nw[curpos, neighbor_inds[5]] + 1
+ p_se[curpos, neighbor_inds[
+ 6]] = p_se[curpos, neighbor_inds[6]] + 1
+ p_sw[curpos, neighbor_inds[
+ 7]] = p_sw[curpos, neighbor_inds[7]] + 1
+
+ #NSEW bool matrix
+ Q_1 = np.logical_or.reduce((p_n, p_s))
+ Q_1 = np.logical_or.reduce((Q_1, p_e))
+ Q_1 = np.logical_or.reduce((Q_1, p_w))
+ #Those were all the N-S-E-W matrix
+ # Now for the diagonal matrix (ne, nw, se, sw)
+ Q_rt2 = np.logical_or.reduce((p_nw, p_ne))
+ Q_rt2 = np.logical_or.reduce((Q_rt2, p_se))
+ Q_rt2 = np.logical_or.reduce((Q_rt2, p_sw))
+
+ #Now combine the two
+ G = np.logical_or.reduce((Q_1,Q_rt2)) #This one is G like before
+ gc.collect()
+ # combines the diagonals and the vertical-horizontals
+ #This is the array with true replaced with 1
+ W= np.array(Q_1, dtype=np.float32)
+ W_rt2 = np.array(Q_rt2, dtype=np.float32)
+ # W_and = np.logical_and.reduce((W_1, W_rt2))
+
+ #This will remove all common obstacles
+ W+= - W_rt2
+ W = np.clip(W,0,1) #This will remove all negative 1 from the interesected ones
+ # So the resulting matrix will have the intersected portions removed.
+ W_rt2 *= np.sqrt(2)
+ # W_1 = W_1 - W_and
+ #combine both
+ W += W_rt2
+ print(W)
+
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+
+ non_obstacles = np.sort(non_obstacles)
+ p_n = p_n[non_obstacles, :]
+ p_n = np.expand_dims(p_n[:, non_obstacles], axis=2)
+ p_s = p_s[non_obstacles, :]
+ p_s = np.expand_dims(p_s[:, non_obstacles], axis=2)
+ p_e = p_e[non_obstacles, :]
+ p_e = np.expand_dims(p_e[:, non_obstacles], axis=2)
+ p_w = p_w[non_obstacles, :]
+ p_w = np.expand_dims(p_w[:, non_obstacles], axis=2)
+ p_ne = p_ne[non_obstacles, :]
+ p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2)
+ p_nw = p_nw[non_obstacles, :]
+ p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2)
+ p_se = p_se[non_obstacles, :]
+ p_se = np.expand_dims(p_se[:, non_obstacles], axis=2)
+ p_sw = p_sw[non_obstacles, :]
+ p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2)
+ G = G[non_obstacles, :]
+ G = G[:, non_obstacles]
+ W = W[non_obstacles, :]
+ W = W[:, non_obstacles]
+ R = R[non_obstacles, :]
+
+ P = np.concatenate(
+ (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2)
+
+ self.G = G
+ self.W = W
+ self.P = P
+ self.R = R
+ state_map_col, state_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ self.state_map_col = state_map_col.flatten('F')[non_obstacles]
+
+ self.state_map_row = state_map_row.flatten('F')[non_obstacles] #see what self.statemaprow is before flattening
+
+
+
+
+ def get_graph(self):
+ # Returns graph
+ G = self.G
+ W = self.W[self.W != 0]
+ return G, W
+
+ def get_graph_inv(self):
+ # Returns transpose of graph
+ G = self.G.T
+ W = self.W.T
+ return G, W
+
+ def val_2_image(self, val):
+ # Zeros for obstacles, val for free space
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.freespace[0], self.freespace[1]] = val
+ return im
+
+ def get_value_prior(self):
+ # Returns value prior for gridworld
+ s_map_col, s_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ im = np.sqrt(
+ np.square(s_map_col - self.targety) +
+ np.square(s_map_row - self.targetx))
+ return im
+
+ def get_reward_prior(self):
+ # Returns reward prior for gridworld
+ im = -1 * np.ones((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def t_get_reward_prior(self):
+ # Returns reward prior as needed for
+ # dataset generation
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def get_state_image(self, row, col):
+ # Zeros everywhere except [row,col]
+ im = np.zeros((self.n_row, self.n_col))
+ im[row, col] = 1
+ return im
+
+ def map_ind_to_state(self, row, col):
+ # Takes [row, col] and maps to a state
+ '''
+ Finds the position of the two integers passed in the freespace, (self.state_map_row and self.state_map_col),
+ and returns the intersection of the two (row and col, to give the coordinate) as a index of self.state_map_row.
+ '''
+ logging.debug('Trying to find row %s', row)
+ logging.debug('Trying to find col %s', col)
+
+ rw = np.where(self.state_map_row == row) #in the list self.state_map_row , what position (1,2,3...) is equal to row (int)
+ cl = np.where(self.state_map_col == col) #i.e where is value col in the aarray self.state_map_col
+ '''
+
+ The above acts as np.nonzero, i.e where in the available space is the targetx and target y (row col)
+ So you find where target_x is available, and where target_y is available, and then you find the intersect,
+ which should be the position of the goal. so self.state_map_row[16] and self.state_map_row[16], i.e 16th element
+ of self.state_map_row, and 16th element of self.state_map_col
+
+ '''
+ logging.debug('self.state_map_row = : %s', self.state_map_row)
+ logging.debug('self.state_map_col = : %s', self.state_map_col)
+
+
+ logging.debug('rw = : %s ', rw)
+ logging.debug('cl = : %s', cl)
+
+
+ return np.intersect1d(rw, cl)[0]
+
+ def get_coords(self, states):
+ # Given a state or states, returns
+ # [row,col] pairs for the state(s)
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+ non_obstacles = np.sort(non_obstacles)
+ states = states.astype(int)
+ r, c = np.unravel_index(
+ non_obstacles[states], (self.n_col, self.n_row), order='F')
+ return r, c
+
+ def rand_choose(self, in_vec):
+ # Samples
+ if len(in_vec.shape) > 1:
+ if in_vec.shape[1] == 1:
+ in_vec = in_vec.T
+ temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int')
+ q = np.random.rand()
+ x = np.where(q > temp[0:-1])
+ y = np.where(q < temp[1:])
+ return np.intersect1d(x, y)[0]
+
+ def next_state_prob(self, s, a):
+ # Gets next state probability for
+ # a given action (a)
+ if hasattr(a, "__iter__"):
+ p = np.squeeze(self.P[s, :, a])
+ else:
+ p = np.squeeze(self.P[s, :, a]).T
+ return p
+
+ def sample_next_state(self, s, a):
+ # Gets the next state given the
+ # current state (s) and an
+ # action (a)
+ vec = self.next_state_prob(s, a)
+ result = self.rand_choose(vec)
+ return result
+
+ def get_size(self):
+ # Returns domain size
+ return self.n_row, self.n_col
+
+ def north(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def south(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def east(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def west(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def neighbors(self, row, col):
+ # Get valid neighbors in all valid directions
+ rows, cols = self.north(row, col)
+ new_row, new_col = self.south(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.east(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.west(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ return rows, cols
+
+ def return_state_map_row(self):
+ return self.state_map_row, self.state_map_col
+
+
+def trace_path(pred, source, target):
+ # traces back shortest path from
+ # source to target given pred
+ # (a predicessor list)
+ max_len = 1000
+ path = np.zeros((max_len, 1))
+ i = max_len - 1
+ path[i] = target
+ while path[i] != source and i > 0:
+ try:
+ path[i - 1] = pred[int(path[i])]
+ i -= 1
+ except Exception as e:
+ return []
+ if i >= 0:
+ path = path[i:]
+ else:
+ path = None
+ return path
+
+
+def sample_trajectory(M, n_states,start,gen=False): #Everything here, find agent (start ... source etc. )
+ # Samples trajectories from random nodes
+ # in our domain (M)
+ G, W = M.get_graph_inv()
+ N = G.shape[0]
+ if N >= n_states:
+ rand_ind = np.random.permutation(N)
+ else:
+ rand_ind = np.tile(np.random.permutation(N), (1, 10))
+ init_states = rand_ind[0:n_states].flatten() #TODO: This is where start is chosen.
+
+ #init_states is a list with the index to the start position in the free space (state_map_row, state_map_col)
+ goal_s = M.map_ind_to_state(M.targetx, M.targety) #This is not the source, rather, this is the index of the goal
+
+ states = []
+ states_xy = []
+ states_one_hot = []
+
+ if not gen:
+ start_x = start[0]
+ start_y = start[1]
+ init_states = [M.map_ind_to_state(start_x,start_y)] #Because the goal and agent I provide are in the form
+ #x and y, but in terms of row, and col , it would be x = col and y = row
+
+ # Get optimal path from graph
+ g_dense = W
+ g_masked = np.ma.masked_values(g_dense, 0)
+ g_sparse = csr_matrix(g_dense)
+
+
+ state_map_row, state_map_col = M.return_state_map_row()
+
+
+ logging.debug('init_states_index = %s', init_states[0])
+ logging.info('Start position is %s %s', state_map_row[init_states[0]],state_map_col[init_states[0]])
+ d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True)
+ for i in range(n_states):
+ path = trace_path(pred, goal_s, init_states[i]) #goal_s is source
+ path = np.flip(path, 0)
+ states.append(path)
+ for state in states:
+ L = len(state)
+ r, c = M.get_coords(state)
+ row_m = np.zeros((L, M.n_row))
+ col_m = np.zeros((L, M.n_col))
+ for i in range(L):
+ row_m[i, r[i]] = 1
+ col_m[i, c[i]] = 1
+ states_one_hot.append(np.hstack((row_m, col_m)))
+ states_xy.append(np.hstack((r, c)))
+ return states_xy, states_one_hot
diff --git a/src/algorithms/learning/VIN/domains/gridworld_before_mem.py b/src/algorithms/learning/VIN/domains/gridworld_before_mem.py
new file mode 100644
index 000000000..bc3c0c495
--- /dev/null
+++ b/src/algorithms/learning/VIN/domains/gridworld_before_mem.py
@@ -0,0 +1,435 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import dijkstra
+import logging
+
+
+class gridworld:
+ """A class for making gridworlds"""
+
+ def __init__(self, image, targetx, targety):
+ self.image = image
+ self.n_row = image.shape[0]
+ self.n_col = image.shape[1]
+ self.obstacles = []
+ self.freespace = []
+ self.targetx = targetx
+ self.targety = targety
+ self.G = []
+ self.W = []
+ self.R = []
+ self.P = []
+ self.A = []
+ self.n_states = 0
+ self.n_actions = 0
+ self.state_map_col = []
+ self.state_map_row = []
+ self.set_vals()
+
+ def set_vals(self):
+ # Setup function to initialize all necessary
+ # data
+ row_obs, col_obs = np.where(self.image == 0)
+ row_free, col_free = np.where(self.image != 0)
+ self.obstacles = [row_obs, col_obs]
+ self.freespace = [row_free, col_free]
+
+ n_states = self.n_row * self.n_col
+ n_actions = 8
+ self.n_states = n_states
+ self.n_actions = n_actions
+
+ p_n = np.zeros((self.n_states, self.n_states),np.int8)
+ p_s = np.zeros((self.n_states, self.n_states),np.int8)
+ p_e = np.zeros((self.n_states, self.n_states),np.int8)
+ p_w = np.zeros((self.n_states, self.n_states),np.int8)
+ p_ne = np.zeros((self.n_states, self.n_states),np.int8)
+ p_nw = np.zeros((self.n_states, self.n_states),np.int8)
+ p_se = np.zeros((self.n_states, self.n_states),np.int8)
+ p_sw = np.zeros((self.n_states, self.n_states),np.int8)
+ print('Line 50')
+ R = -1 * np.ones((self.n_states, self.n_actions))
+ R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2)
+ target = np.ravel_multi_index(
+ [self.targetx, self.targety], (self.n_row, self.n_col), order='F')
+ R[target, :] = 0
+ print('Line 56')
+
+ for row in range(0, self.n_row):
+ for col in range(0, self.n_col):
+
+ curpos = np.ravel_multi_index(
+ [row, col], (self.n_row, self.n_col), order='F')
+
+ rows, cols = self.neighbors(row, col)
+
+ neighbor_inds = np.ravel_multi_index(
+ [rows, cols], (self.n_row, self.n_col), order='F')
+
+ p_n[curpos, neighbor_inds[
+ 0]] = p_n[curpos, neighbor_inds[0]] + 1
+ p_s[curpos, neighbor_inds[
+ 1]] = p_s[curpos, neighbor_inds[1]] + 1
+ p_e[curpos, neighbor_inds[
+ 2]] = p_e[curpos, neighbor_inds[2]] + 1
+ p_w[curpos, neighbor_inds[
+ 3]] = p_w[curpos, neighbor_inds[3]] + 1
+ p_ne[curpos, neighbor_inds[
+ 4]] = p_ne[curpos, neighbor_inds[4]] + 1
+ p_nw[curpos, neighbor_inds[
+ 5]] = p_nw[curpos, neighbor_inds[5]] + 1
+ p_se[curpos, neighbor_inds[
+ 6]] = p_se[curpos, neighbor_inds[6]] + 1
+ p_sw[curpos, neighbor_inds[
+ 7]] = p_sw[curpos, neighbor_inds[7]] + 1
+ print('Line 85')
+ G = np.logical_or.reduce((p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw))
+ print('G ', G.shape)
+ print('Line 87')
+ W = np.maximum(
+ np.maximum(
+ np.maximum(
+ np.maximum(
+ np.maximum(np.maximum(np.maximum(p_n, p_s), p_e), p_w),
+ np.sqrt(2) * p_ne),
+ np.sqrt(2) * p_nw),
+ np.sqrt(2) * p_se),
+ np.sqrt(2) * p_sw)
+
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+ print('Line 101')
+ non_obstacles = np.sort(non_obstacles)
+ p_n = p_n[non_obstacles, :]
+ p_n = np.expand_dims(p_n[:, non_obstacles], axis=2)
+ p_s = p_s[non_obstacles, :]
+ p_s = np.expand_dims(p_s[:, non_obstacles], axis=2)
+ p_e = p_e[non_obstacles, :]
+ p_e = np.expand_dims(p_e[:, non_obstacles], axis=2)
+ p_w = p_w[non_obstacles, :]
+ p_w = np.expand_dims(p_w[:, non_obstacles], axis=2)
+ p_ne = p_ne[non_obstacles, :]
+ p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2)
+ p_nw = p_nw[non_obstacles, :]
+ p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2)
+ p_se = p_se[non_obstacles, :]
+ p_se = np.expand_dims(p_se[:, non_obstacles], axis=2)
+ p_sw = p_sw[non_obstacles, :]
+ p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2)
+ G = G[non_obstacles, :]
+ G = G[:, non_obstacles]
+ W = W[non_obstacles, :]
+ W = W[:, non_obstacles]
+ R = R[non_obstacles, :]
+
+ P = np.concatenate(
+ (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2)
+ print('Line 127')
+ self.G = G
+ self.W = W
+ self.P = P
+ self.R = R
+ state_map_col, state_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ self.state_map_col = state_map_col.flatten('F')[non_obstacles]
+
+ self.state_map_row = state_map_row.flatten('F')[non_obstacles] #see what self.statemaprow is before flattening
+
+
+
+
+ def get_graph(self):
+ # Returns graph
+ G = self.G
+ W = self.W[self.W != 0]
+ return G, W
+
+ def get_graph_inv(self):
+ # Returns transpose of graph
+ G = self.G.T
+ W = self.W.T
+ return G, W
+
+ def val_2_image(self, val):
+ # Zeros for obstacles, val for free space
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.freespace[0], self.freespace[1]] = val
+ return im
+
+ def get_value_prior(self):
+ # Returns value prior for gridworld
+ s_map_col, s_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ im = np.sqrt(
+ np.square(s_map_col - self.targety) +
+ np.square(s_map_row - self.targetx))
+ return im
+
+ def get_reward_prior(self):
+ # Returns reward prior for gridworld
+ im = -1 * np.ones((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def t_get_reward_prior(self):
+ # Returns reward prior as needed for
+ # dataset generation
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def get_state_image(self, row, col):
+ # Zeros everywhere except [row,col]
+ im = np.zeros((self.n_row, self.n_col))
+ im[row, col] = 1
+ return im
+
+ def map_ind_to_state(self, row, col):
+ # Takes [row, col] and maps to a state
+ '''
+ Finds the position of the two integers passed in the freespace, (self.state_map_row and self.state_map_col),
+ and returns the intersection of the two (row and col, to give the coordinate) as a index of self.state_map_row.
+ '''
+ logging.debug('Trying to find row %s', row)
+ logging.debug('Trying to find col %s', col)
+
+ rw = np.where(self.state_map_row == row) #in the list self.state_map_row , what position (1,2,3...) is equal to row (int)
+ cl = np.where(self.state_map_col == col) #i.e where is value col in the aarray self.state_map_col
+ '''
+
+ The above acts as np.nonzero, i.e where in the available space is the targetx and target y (row col)
+ So you find where target_x is available, and where target_y is available, and then you find the intersect,
+ which should be the position of the goal. so self.state_map_row[16] and self.state_map_row[16], i.e 16th element
+ of self.state_map_row, and 16th element of self.state_map_col
+
+ '''
+ logging.debug('self.state_map_row = : %s', self.state_map_row)
+ logging.debug('self.state_map_col = : %s', self.state_map_col)
+
+
+ logging.debug('rw = : %s ', rw)
+ logging.debug('cl = : %s', cl)
+
+
+ return np.intersect1d(rw, cl)[0]
+
+ def get_coords(self, states):
+ # Given a state or states, returns
+ # [row,col] pairs for the state(s)
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+ non_obstacles = np.sort(non_obstacles)
+ states = states.astype(int)
+ r, c = np.unravel_index(
+ non_obstacles[states], (self.n_col, self.n_row), order='F')
+ return r, c
+
+ def rand_choose(self, in_vec):
+ # Samples
+ if len(in_vec.shape) > 1:
+ if in_vec.shape[1] == 1:
+ in_vec = in_vec.T
+ temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int')
+ q = np.random.rand()
+ x = np.where(q > temp[0:-1])
+ y = np.where(q < temp[1:])
+ return np.intersect1d(x, y)[0]
+
+ def next_state_prob(self, s, a):
+ # Gets next state probability for
+ # a given action (a)
+ if hasattr(a, "__iter__"):
+ p = np.squeeze(self.P[s, :, a])
+ else:
+ p = np.squeeze(self.P[s, :, a]).T
+ return p
+
+ def sample_next_state(self, s, a):
+ # Gets the next state given the
+ # current state (s) and an
+ # action (a)
+ vec = self.next_state_prob(s, a)
+ result = self.rand_choose(vec)
+ return result
+
+ def get_size(self):
+ # Returns domain size
+ return self.n_row, self.n_col
+
+ def north(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def south(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def east(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def west(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def neighbors(self, row, col):
+ # Get valid neighbors in all valid directions
+ rows, cols = self.north(row, col)
+ new_row, new_col = self.south(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.east(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.west(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ return rows, cols
+
+ def return_state_map_row(self):
+ return self.state_map_row, self.state_map_col
+
+
+def trace_path(pred, source, target):
+ # traces back shortest path from
+ # source to target given pred
+ # (a predicessor list)
+ max_len = 1000
+ path = np.zeros((max_len, 1))
+ i = max_len - 1
+ path[i] = target
+ while path[i] != source and i > 0:
+ try:
+ path[i - 1] = pred[int(path[i])]
+ i -= 1
+ except Exception as e:
+ return []
+ if i >= 0:
+ path = path[i:]
+ else:
+ path = None
+ return path
+
+
+def sample_trajectory(M, n_states,start,gen=False): #Everything here, find agent (start ... source etc. )
+ # Samples trajectories from random nodes
+ # in our domain (M)
+ G, W = M.get_graph_inv()
+ N = G.shape[0]
+ if N >= n_states:
+ rand_ind = np.random.permutation(N)
+ else:
+ rand_ind = np.tile(np.random.permutation(N), (1, 10))
+ init_states = rand_ind[0:n_states].flatten() #TODO: This is where start is chosen.
+
+ #init_states is a list with the index to the start position in the free space (state_map_row, state_map_col)
+ goal_s = M.map_ind_to_state(M.targetx, M.targety) #This is not the source, rather, this is the index of the goal
+
+ states = []
+ states_xy = []
+ states_one_hot = []
+
+ if not gen:
+ start_x = start[0]
+ start_y = start[1]
+ init_states = [M.map_ind_to_state(start_x,start_y)] #Because the goal and agent I provide are in the form
+ #x and y, but in terms of row, and col , it would be x = col and y = row
+
+ # Get optimal path from graph
+ g_dense = W
+ g_masked = np.ma.masked_values(g_dense, 0)
+ g_sparse = csr_matrix(g_dense)
+
+
+ state_map_row, state_map_col = M.return_state_map_row()
+
+
+ logging.debug('init_states_index = %s', init_states[0])
+ logging.info('Start position is %s %s', state_map_row[init_states[0]],state_map_col[init_states[0]])
+ d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True)
+ for i in range(n_states):
+ path = trace_path(pred, goal_s, init_states[i]) #goal_s is source
+ path = np.flip(path, 0)
+ states.append(path)
+ for state in states:
+ L = len(state)
+ r, c = M.get_coords(state)
+ row_m = np.zeros((L, M.n_row))
+ col_m = np.zeros((L, M.n_col))
+ for i in range(L):
+ row_m[i, r[i]] = 1
+ col_m[i, c[i]] = 1
+ states_one_hot.append(np.hstack((row_m, col_m)))
+ states_xy.append(np.hstack((r, c)))
+ return states_xy, states_one_hot
diff --git a/src/algorithms/learning/VIN/domains/gridworld_og.py b/src/algorithms/learning/VIN/domains/gridworld_og.py
new file mode 100644
index 000000000..c60a4b7a5
--- /dev/null
+++ b/src/algorithms/learning/VIN/domains/gridworld_og.py
@@ -0,0 +1,385 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse.csgraph import dijkstra
+
+
+class gridworld:
+ """A class for making gridworlds"""
+
+ def __init__(self, image, targetx, targety):
+ self.image = image
+ self.n_row = image.shape[0]
+ self.n_col = image.shape[1]
+ self.obstacles = []
+ self.freespace = []
+ self.targetx = targetx
+ self.targety = targety
+ self.G = []
+ self.W = []
+ self.R = []
+ self.P = []
+ self.A = []
+ self.n_states = 0
+ self.n_actions = 0
+ self.state_map_col = []
+ self.state_map_row = []
+ self.set_vals()
+
+ def set_vals(self):
+ # Setup function to initialize all necessary
+ # data
+ row_obs, col_obs = np.where(self.image == 0)
+ row_free, col_free = np.where(self.image != 0)
+ self.obstacles = [row_obs, col_obs]
+ self.freespace = [row_free, col_free]
+
+ n_states = self.n_row * self.n_col
+ n_actions = 8
+ self.n_states = n_states
+ self.n_actions = n_actions
+
+ p_n = np.zeros((self.n_states, self.n_states))
+ p_s = np.zeros((self.n_states, self.n_states))
+ p_e = np.zeros((self.n_states, self.n_states))
+ p_w = np.zeros((self.n_states, self.n_states))
+ p_ne = np.zeros((self.n_states, self.n_states))
+ p_nw = np.zeros((self.n_states, self.n_states))
+ p_se = np.zeros((self.n_states, self.n_states))
+ p_sw = np.zeros((self.n_states, self.n_states))
+
+ R = -1 * np.ones((self.n_states, self.n_actions))
+ R[:, 4:self.n_actions] = R[:, 4:self.n_actions] * np.sqrt(2)
+ target = np.ravel_multi_index(
+ [self.targetx, self.targety], (self.n_row, self.n_col), order='F')
+ R[target, :] = 0
+
+ for row in range(0, self.n_row):
+ for col in range(0, self.n_col):
+
+ curpos = np.ravel_multi_index(
+ [row, col], (self.n_row, self.n_col), order='F')
+
+ rows, cols = self.neighbors(row, col)
+
+ neighbor_inds = np.ravel_multi_index(
+ [rows, cols], (self.n_row, self.n_col), order='F')
+
+ p_n[curpos, neighbor_inds[
+ 0]] = p_n[curpos, neighbor_inds[0]] + 1
+ p_s[curpos, neighbor_inds[
+ 1]] = p_s[curpos, neighbor_inds[1]] + 1
+ p_e[curpos, neighbor_inds[
+ 2]] = p_e[curpos, neighbor_inds[2]] + 1
+ p_w[curpos, neighbor_inds[
+ 3]] = p_w[curpos, neighbor_inds[3]] + 1
+ p_ne[curpos, neighbor_inds[
+ 4]] = p_ne[curpos, neighbor_inds[4]] + 1
+ p_nw[curpos, neighbor_inds[
+ 5]] = p_nw[curpos, neighbor_inds[5]] + 1
+ p_se[curpos, neighbor_inds[
+ 6]] = p_se[curpos, neighbor_inds[6]] + 1
+ p_sw[curpos, neighbor_inds[
+ 7]] = p_sw[curpos, neighbor_inds[7]] + 1
+
+ G = np.logical_or.reduce((p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw))
+
+ W = np.maximum(
+ np.maximum(
+ np.maximum(
+ np.maximum(
+ np.maximum(np.maximum(np.maximum(p_n, p_s), p_e), p_w),
+ np.sqrt(2) * p_ne),
+ np.sqrt(2) * p_nw),
+ np.sqrt(2) * p_se),
+ np.sqrt(2) * p_sw)
+
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+
+ non_obstacles = np.sort(non_obstacles)
+ p_n = p_n[non_obstacles, :]
+ p_n = np.expand_dims(p_n[:, non_obstacles], axis=2)
+ p_s = p_s[non_obstacles, :]
+ p_s = np.expand_dims(p_s[:, non_obstacles], axis=2)
+ p_e = p_e[non_obstacles, :]
+ p_e = np.expand_dims(p_e[:, non_obstacles], axis=2)
+ p_w = p_w[non_obstacles, :]
+ p_w = np.expand_dims(p_w[:, non_obstacles], axis=2)
+ p_ne = p_ne[non_obstacles, :]
+ p_ne = np.expand_dims(p_ne[:, non_obstacles], axis=2)
+ p_nw = p_nw[non_obstacles, :]
+ p_nw = np.expand_dims(p_nw[:, non_obstacles], axis=2)
+ p_se = p_se[non_obstacles, :]
+ p_se = np.expand_dims(p_se[:, non_obstacles], axis=2)
+ p_sw = p_sw[non_obstacles, :]
+ p_sw = np.expand_dims(p_sw[:, non_obstacles], axis=2)
+ G = G[non_obstacles, :]
+ G = G[:, non_obstacles]
+ W = W[non_obstacles, :]
+ W = W[:, non_obstacles]
+ R = R[non_obstacles, :]
+
+ P = np.concatenate(
+ (p_n, p_s, p_e, p_w, p_ne, p_nw, p_se, p_sw), axis=2)
+
+ self.G = G
+ self.W = W
+ self.P = P
+ self.R = R
+ state_map_col, state_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ self.state_map_col = state_map_col.flatten('F')[non_obstacles]
+ self.state_map_row = state_map_row.flatten('F')[non_obstacles]
+
+ def get_graph(self):
+ # Returns graph
+ G = self.G
+ W = self.W[self.W != 0]
+ return G, W
+
+ def get_graph_inv(self):
+ # Returns transpose of graph
+ G = self.G.T
+ W = self.W.T
+ return G, W
+
+ def val_2_image(self, val):
+ # Zeros for obstacles, val for free space
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.freespace[0], self.freespace[1]] = val
+ return im
+
+ def get_value_prior(self):
+ # Returns value prior for gridworld
+ s_map_col, s_map_row = np.meshgrid(
+ np.arange(0, self.n_col), np.arange(0, self.n_row))
+ im = np.sqrt(
+ np.square(s_map_col - self.targety) +
+ np.square(s_map_row - self.targetx))
+ return im
+
+ def get_reward_prior(self):
+ # Returns reward prior for gridworld
+ im = -1 * np.ones((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def t_get_reward_prior(self):
+ # Returns reward prior as needed for
+ # dataset generation
+ im = np.zeros((self.n_row, self.n_col))
+ im[self.targetx, self.targety] = 10
+ return im
+
+ def get_state_image(self, row, col):
+ # Zeros everywhere except [row,col]
+ im = np.zeros((self.n_row, self.n_col))
+ im[row, col] = 1
+ return im
+
+ def map_ind_to_state(self, row, col):
+ # Takes [row, col] and maps to a state
+ rw = np.where(self.state_map_row == row)
+ cl = np.where(self.state_map_col == col)
+ return np.intersect1d(rw, cl)[0]
+
+ def get_coords(self, states):
+ # Given a state or states, returns
+ # [row,col] pairs for the state(s)
+ non_obstacles = np.ravel_multi_index(
+ [self.freespace[0], self.freespace[1]], (self.n_row, self.n_col),
+ order='F')
+ non_obstacles = np.sort(non_obstacles)
+ states = states.astype(int)
+ r, c = np.unravel_index(
+ non_obstacles[states], (self.n_col, self.n_row), order='F')
+ return r, c
+
+ def rand_choose(self, in_vec):
+ # Samples
+ if len(in_vec.shape) > 1:
+ if in_vec.shape[1] == 1:
+ in_vec = in_vec.T
+ temp = np.hstack((np.zeros((1)), np.cumsum(in_vec))).astype('int')
+ q = np.random.rand()
+ x = np.where(q > temp[0:-1])
+ y = np.where(q < temp[1:])
+ return np.intersect1d(x, y)[0]
+
+ def next_state_prob(self, s, a):
+ # Gets next state probability for
+ # a given action (a)
+ if hasattr(a, "__iter__"):
+ p = np.squeeze(self.P[s, :, a])
+ else:
+ p = np.squeeze(self.P[s, :, a]).T
+ return p
+
+ def sample_next_state(self, s, a):
+ # Gets the next state given the
+ # current state (s) and an
+ # action (a)
+ vec = self.next_state_prob(s, a)
+ result = self.rand_choose(vec)
+ return result
+
+ def get_size(self):
+ # Returns domain size
+ return self.n_row, self.n_col
+
+ def north(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def northwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.max([row - 1, 0])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def south(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = col
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southeast(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def southwest(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = np.min([row + 1, self.n_row - 1])
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def east(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.min([col + 1, self.n_col - 1])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def west(self, row, col):
+ # Returns new [row,col]
+ # if we take the action
+ new_row = row
+ new_col = np.max([col - 1, 0])
+ if self.image[new_row, new_col] == 0:
+ new_row = row
+ new_col = col
+ return new_row, new_col
+
+ def neighbors(self, row, col):
+ # Get valid neighbors in all valid directions
+ rows, cols = self.north(row, col)
+ new_row, new_col = self.south(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.east(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.west(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.northwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southeast(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ new_row, new_col = self.southwest(row, col)
+ rows, cols = np.append(rows, new_row), np.append(cols, new_col)
+ return rows, cols
+
+
+def trace_path(pred, source, target):
+ # traces back shortest path from
+ # source to target given pred
+ # (a predicessor list)
+ max_len = 1000
+ path = np.zeros((max_len, 1))
+ i = max_len - 1
+ path[i] = target
+ while path[i] != source and i > 0:
+ try:
+ path[i - 1] = pred[int(path[i])]
+ i -= 1
+ except Exception as e:
+ return []
+ if i >= 0:
+ path = path[i:]
+ else:
+ path = None
+ return path
+
+
+def sample_trajectory(M, n_states):
+ # Samples trajectories from random nodes
+ # in our domain (M)
+ G, W = M.get_graph_inv()
+ N = G.shape[0]
+ if N >= n_states:
+ rand_ind = np.random.permutation(N)
+ else:
+ rand_ind = np.tile(np.random.permutation(N), (1, 10))
+ init_states = rand_ind[0:n_states].flatten()
+ goal_s = M.map_ind_to_state(M.targetx, M.targety)
+ states = []
+ states_xy = []
+ states_one_hot = []
+ # Get optimal path from graph
+ g_dense = W
+ g_masked = np.ma.masked_values(g_dense, 0)
+ g_sparse = csr_matrix(g_dense)
+ d, pred = dijkstra(g_sparse, indices=goal_s, return_predecessors=True)
+ for i in range(n_states):
+ path = trace_path(pred, goal_s, init_states[i])
+ path = np.flip(path, 0)
+ states.append(path)
+ for state in states:
+ L = len(state)
+ r, c = M.get_coords(state)
+ row_m = np.zeros((L, M.n_row))
+ col_m = np.zeros((L, M.n_col))
+ for i in range(L):
+ row_m[i, r[i]] = 1
+ col_m[i, c[i]] = 1
+ states_one_hot.append(np.hstack((row_m, col_m)))
+ states_xy.append(np.hstack((r, c)))
+ return states_xy, states_one_hot
\ No newline at end of file
diff --git a/src/algorithms/learning/VIN/download_weights_and_datasets.sh b/src/algorithms/learning/VIN/download_weights_and_datasets.sh
new file mode 100755
index 000000000..8d85b3455
--- /dev/null
+++ b/src/algorithms/learning/VIN/download_weights_and_datasets.sh
@@ -0,0 +1,9 @@
+cd trained
+wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_8x8.pth'
+wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_16x16.pth'
+wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/vin_28x28.pth'
+cd ../dataset
+#wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_8x8.npz'
+# wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_16x16.npz'
+# wget 'https://github.com/kentsommer/pytorch-value-iteration-networks/releases/download/v1.1/gridworld_28x28.npz'
+cd ..
diff --git a/src/algorithms/learning/VIN/general_test16.py b/src/algorithms/learning/VIN/general_test16.py
new file mode 100644
index 000000000..999ff4a95
--- /dev/null
+++ b/src/algorithms/learning/VIN/general_test16.py
@@ -0,0 +1,340 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/generalization/16_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ print('Gen started')
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/16x16/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ t0 = time.time()
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/30k_no_block_dataset_vin_64x64.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=16, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=20, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/general_test28.py b/src/algorithms/learning/VIN/general_test28.py
new file mode 100644
index 000000000..6ab3dc04a
--- /dev/null
+++ b/src/algorithms/learning/VIN/general_test28.py
@@ -0,0 +1,340 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/generalization/28_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/28x28/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ t0 = time.time()
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total)))
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/30k_no_block_dataset_vin_64x64.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=28, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=36, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/general_test8.py b/src/algorithms/learning/VIN/general_test8.py
new file mode 100644
index 000000000..968191203
--- /dev/null
+++ b/src/algorithms/learning/VIN/general_test8.py
@@ -0,0 +1,339 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/generalization/8_w_64_model',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/8x8/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ t0 = time.time()
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/30k_no_block_dataset_vin_64x64.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/generators/__init__.py b/src/algorithms/learning/VIN/generators/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/algorithms/learning/VIN/generators/obstacle_gen.py b/src/algorithms/learning/VIN/generators/obstacle_gen.py
new file mode 100644
index 000000000..e8d0b4010
--- /dev/null
+++ b/src/algorithms/learning/VIN/generators/obstacle_gen.py
@@ -0,0 +1,93 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class obstacles:
+ """A class for generating obstacles in a domain"""
+
+ def __init__(self,
+ domsize=None,
+ mask=None,
+ size_max=None,
+ dom=None, #Possibly pass it a domain??
+ obs_types=None,
+ num_types=None):
+ self.domsize = domsize or []
+ self.mask = mask or []
+ self.dom = dom or np.zeros(self.domsize)
+ self.obs_types = obs_types or ["circ", "rect"]
+ self.num_types = num_types or len(self.obs_types)
+ self.size_max = size_max or np.max(self.domsize) / 4
+
+ def check_mask(self, dom=None):
+ # Ensure goal is in free space
+ if dom is not None:
+ return np.any(dom[self.mask[0], self.mask[1]])
+ else:
+ return np.any(self.dom[self.mask[0], self.mask[1]])
+
+ def insert_rect(self, x, y, height, width):
+ # Insert a rectangular obstacle into map
+ im_try = np.copy(self.dom)
+ im_try[x:x + height, y:y + width] = 1
+ return im_try
+
+ def add_rand_obs(self, obj_type):
+ # Add random (valid) obstacle to map
+ if obj_type == "circ":
+ print("circ is not yet implemented... sorry")
+ elif obj_type == "rect":
+ rand_height = int(np.ceil(np.random.rand() * self.size_max))
+ rand_width = int(np.ceil(np.random.rand() * self.size_max))
+ randx = int(np.ceil(np.random.rand() * (self.domsize[1] - 1)))
+ randy = int(np.ceil(np.random.rand() * (self.domsize[1] - 1)))
+ im_try = self.insert_rect(randx, randy, rand_height, rand_width)
+ if self.check_mask(im_try):
+ return False
+ else:
+ self.dom = im_try
+ return True
+
+ def add_n_rand_obs(self, n):
+ # Add random (valid) obstacles to map
+ count = 0
+ for i in range(n):
+ obj_type = "rect"
+ if self.add_rand_obs(obj_type):
+ count += 1
+ return count
+
+ def add_border(self):
+ # Make full outer border an obstacle
+ im_try = np.copy(self.dom)
+ im_try[0:self.domsize[0], 0] = 1
+ im_try[0, 0:self.domsize[1]] = 1
+ im_try[0:self.domsize[0], self.domsize[1] - 1] = 1
+ im_try[self.domsize[0] - 1, 0:self.domsize[1]] = 1
+ if self.check_mask(im_try):
+ return False
+ else:
+ self.dom = im_try
+ return True
+
+ def get_final(self):
+ # Process obstacle map for domain
+ im = np.copy(self.dom)
+ im = np.max(im) - im
+ im = im / np.max(im)
+ return im
+
+ def show(self):
+ # Utility function to view obstacle map
+ plt.imshow(self.get_final(), cmap='Greys')
+ plt.show()
+
+ def _print(self):
+ # Utility function to view obstacle map
+ # information
+ print("domsize: ", self.domsize)
+ print("mask: ", self.mask)
+ print("dom: ", self.dom)
+ print("obs_types: ", self.obs_types)
+ print("num_types: ", self.num_types)
+ print("size_max: ", self.size_max)
diff --git a/src/algorithms/learning/VIN/model.py b/src/algorithms/learning/VIN/model.py
new file mode 100644
index 000000000..2d4865795
--- /dev/null
+++ b/src/algorithms/learning/VIN/model.py
@@ -0,0 +1,68 @@
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.nn.parameter import Parameter
+
+
+class VIN(nn.Module):
+ def __init__(self, config):
+ super(VIN, self).__init__()
+ self.config = config
+ self.h = nn.Conv2d(
+ in_channels=config.l_i,
+ out_channels=config.l_h,
+ kernel_size=(3, 3),
+ stride=1,
+ padding=1,
+ bias=True)
+ self.r = nn.Conv2d(
+ in_channels=config.l_h,
+ out_channels=1,
+ kernel_size=(1, 1),
+ stride=1,
+ padding=0,
+ bias=False)
+ self.q = nn.Conv2d(
+ in_channels=1,
+ out_channels=config.l_q,
+ kernel_size=(3, 3),
+ stride=1,
+ padding=1,
+ bias=False)
+ self.fc = nn.Linear(in_features=config.l_q, out_features=8, bias=False)
+ self.w = Parameter(
+ torch.zeros(config.l_q, 1, 3, 3), requires_grad=True)
+ self.sm = nn.Softmax(dim=1)
+
+ def forward(self, X, S1, S2, config):
+ h = self.h(X)
+ r = self.r(h)
+ q = self.q(r)
+ v, _ = torch.max(q, dim=1, keepdim=True)
+ for i in range(0, config.k - 1):
+ q = F.conv2d(
+ torch.cat([r, v], 1),
+ torch.cat([self.q.weight, self.w], 1),
+ stride=1,
+ padding=1)
+ v, _ = torch.max(q, dim=1, keepdim=True)
+
+ q = F.conv2d(
+ torch.cat([r, v], 1),
+ torch.cat([self.q.weight, self.w], 1),
+ stride=1,
+ padding=1)
+
+ slice_s1 = S1.long().expand(config.imsize, 1, config.l_q, q.size(0))
+ slice_s1 = slice_s1.permute(3, 2, 1, 0)
+ q_out = q.gather(2, slice_s1).squeeze(2)
+
+ slice_s2 = S2.long().expand(1, config.l_q, q.size(0))
+ slice_s2 = slice_s2.permute(2, 1, 0)
+ q_out = q_out.gather(2, slice_s2).squeeze(2)
+
+ logits = self.fc(q_out)
+ return logits, self.sm(logits)
diff --git a/src/algorithms/learning/VIN/requirements.txt b/src/algorithms/learning/VIN/requirements.txt
new file mode 100644
index 000000000..27dabf2f9
--- /dev/null
+++ b/src/algorithms/learning/VIN/requirements.txt
@@ -0,0 +1,4 @@
+scipy>=0.19.0
+matplotlib>=2.0.0
+numpy>=1.12.1
+torchvision>=0.1.8
diff --git a/src/algorithms/learning/VIN/results/16x16_1.png b/src/algorithms/learning/VIN/results/16x16_1.png
new file mode 100644
index 000000000..9eb172316
Binary files /dev/null and b/src/algorithms/learning/VIN/results/16x16_1.png differ
diff --git a/src/algorithms/learning/VIN/results/16x16_2.png b/src/algorithms/learning/VIN/results/16x16_2.png
new file mode 100644
index 000000000..5292bdc33
Binary files /dev/null and b/src/algorithms/learning/VIN/results/16x16_2.png differ
diff --git a/src/algorithms/learning/VIN/results/28x28_1.png b/src/algorithms/learning/VIN/results/28x28_1.png
new file mode 100644
index 000000000..4995df1dd
Binary files /dev/null and b/src/algorithms/learning/VIN/results/28x28_1.png differ
diff --git a/src/algorithms/learning/VIN/results/28x28_2.png b/src/algorithms/learning/VIN/results/28x28_2.png
new file mode 100644
index 000000000..4cf22e8fa
Binary files /dev/null and b/src/algorithms/learning/VIN/results/28x28_2.png differ
diff --git a/src/algorithms/learning/VIN/results/8x8_1.png b/src/algorithms/learning/VIN/results/8x8_1.png
new file mode 100644
index 000000000..0feacc9de
Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_1.png differ
diff --git a/src/algorithms/learning/VIN/results/8x8_2.png b/src/algorithms/learning/VIN/results/8x8_2.png
new file mode 100644
index 000000000..22e60de5a
Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_2.png differ
diff --git a/src/algorithms/learning/VIN/results/8x8_3.png b/src/algorithms/learning/VIN/results/8x8_3.png
new file mode 100644
index 000000000..a9a142e1e
Binary files /dev/null and b/src/algorithms/learning/VIN/results/8x8_3.png differ
diff --git a/src/algorithms/learning/VIN/scrap.py b/src/algorithms/learning/VIN/scrap.py
new file mode 100644
index 000000000..506ee75c4
--- /dev/null
+++ b/src/algorithms/learning/VIN/scrap.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+arr1 = np.array([[1 ,1,0], [0, 1, 1], [1,0,0]])
+
+arr2 = np.array([[1 ,0,0], [1, 0, 1], [1,1,1]])
+
+G = np.logical_or.reduce((arr1, arr2))
+W = np.array(G, dtype=np.int8)
+M = np.maximum(arr1,arr2)
+Q = np.intersect2d
+print(G)
+print(M)
+print(W)
+
diff --git a/src/algorithms/learning/VIN/test.py b/src/algorithms/learning/VIN/test.py
new file mode 100644
index 000000000..caeb4e69f
--- /dev/null
+++ b/src/algorithms/learning/VIN/test.py
@@ -0,0 +1,346 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ gen = config.gen
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/30k_16_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights, map_location=None if use_GPU else torch.device("cpu")))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ wpn = False
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = config.maps
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ t0 = time.time()
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(total))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/vin_8x8.pth',
+ help='Path to trained weights')
+ parser.add_argument(
+ '--maps',
+ type=str,
+ default='resources/testing_maps/16x16',
+ help='Path to maps')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test16.py b/src/algorithms/learning/VIN/test16.py
new file mode 100644
index 000000000..5734b5c90
--- /dev/null
+++ b/src/algorithms/learning/VIN/test16.py
@@ -0,0 +1,339 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=10,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/scrap.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ print('Gen started')
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/16x16/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ t0 = time.time()
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.debug('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/60k_no_block_att3_vin_16x16.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=16, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=20, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test28.py b/src/algorithms/learning/VIN/test28.py
new file mode 100644
index 000000000..1be43611b
--- /dev/null
+++ b/src/algorithms/learning/VIN/test28.py
@@ -0,0 +1,339 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/test28_60k_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/28x28/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ t0 = time.time()
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total)))
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/60k_no_block_att3_vin_28x28.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=28, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=36, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test64.py b/src/algorithms/learning/VIN/test64.py
new file mode 100644
index 000000000..e58306b9b
--- /dev/null
+++ b/src/algorithms/learning/VIN/test64.py
@@ -0,0 +1,339 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=600,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/test_64_30k_64x64_600',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/64x64_300/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ t0 = time.time()
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total)))
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/30k_no_block_dataset_vin_64x64.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=64, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=48, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test8.py b/src/algorithms/learning/VIN/test8.py
new file mode 100644
index 000000000..4f2946c84
--- /dev/null
+++ b/src/algorithms/learning/VIN/test8.py
@@ -0,0 +1,338 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=3000,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/test8_rerun_60k_no_block.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/testing_maps/8x8/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ t0 = time.time()
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(correct)))
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/60k_no_block_att3_vin_8x8.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test_house_expo.py b/src/algorithms/learning/VIN/test_house_expo.py
new file mode 100644
index 000000000..e2a9b6666
--- /dev/null
+++ b/src/algorithms/learning/VIN/test_house_expo.py
@@ -0,0 +1,340 @@
+import sys
+import argparse
+import json
+import matplotlib.pyplot as plt
+import random
+import numpy as np
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld import *
+from generators.obstacle_gen import *
+
+import logging
+import time
+import math
+
+def main(config,
+ n_domains=30,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8,gen = False):
+ # Correct vs total:
+ logging.basicConfig(filename='./resources/logs/house_expo.log',format='%(asctime)s-%(levelname)s:%(message)s', level=logging.INFO)
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+ counter,total_no_soln = 0,0
+ global data
+ data = []
+ t_list = []
+ total_dev_non_rel, total_dev_rel = 0.0,0.0
+ total_dist, total_astar_dist = 0.0,0.0
+ metrics = True #this enables displaying the distance left to reach goal upon a failure
+ dist_remain_avg = 0.0
+ for dom in range(n_domains):
+ if gen:
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ start = None
+ else:
+ wpn = True
+ # path = './resources/maps/'
+ path = './resources/house_expo/'
+ mp, goal, start = open_map(dom,path)
+ # path = './maps/8_data_300'
+ # mp, goal, start = open_map_list(dom,path)
+ mp[start[1]][start[0]] = 0 #Set the start position as freespace too
+ mp[goal[1]][goal[0]] = 0 #Set the goal position as freespace too
+
+ goal = [goal[1],goal[0]] #swap them around, for the row col format (x = col not row)
+ start = [start[1],start[0]]
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ obs.dom = mp
+
+ # Get final map
+ im = obs.get_final()
+
+
+ #1 is obstacles.
+ #set obs.dom as the mp
+ logging.debug('0 is obstacle ')
+ logging.debug(' im: %s ', im)
+ # Generate gridworld from obstacle map
+ print('got here')
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj,start,gen) #dijkstra trajectory
+ # print('states_xy', states_xy[0] , len(states_xy[0]))
+ if gen and len(states_xy[0]) > 0:
+ save_image(G.image,(goal[0],goal[1]),states_xy[0][0],states_xy, states_one_hot,counter) #this saves the maps
+
+ counter += 1
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+ t0 = time.time()
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ logging.debug('#################### - Path Found map %s!\n', dom)
+ correct += 1
+ t1 = time.time()
+ t_list.append(t1-t0)
+ dev_rel,dev_non_rel,dist,astar_dist = deviation(states_xy[i],pred_traj,goal,total)
+ total_dev_rel += dev_rel
+ total_dev_non_rel += dev_non_rel
+ total_dist += dist
+ total_astar_dist += astar_dist
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ elif metrics:
+ d = dist_left(pred_traj,goal)
+ dist_remain_avg += d
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ total += 1
+
+
+
+ elif wpn:
+ total_no_soln += 1
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+
+ sys.stdout.write("\n")
+ if total and correct:
+ logging.info('Rollout Accuracy Dijkstra: %s',(100*((total-total_no_soln)/total)))
+ logging.info('Rollout Accuracy: %s',(100 * (correct / total)))
+ logging.info('Rollout Accuracy Adjusted: %s',(100 * (correct / (total+total_no_soln))))
+ logging.info('Total maps with no soln from Dijkstra %s', total_no_soln)
+ logging.info('Total avg Rel Deviation %s', (total_dev_rel/total))
+ logging.info('Total avg Non-Rel Deviation %s', (total_dev_non_rel/total))
+ logging.info('Total avg VIN Distance %s', (total_dist/total))
+ logging.info('Total avg Dijkstra Distance %s', (total_astar_dist/total))
+ logging.info('Avg deviation from Dijkstra: %s', ((((total_astar_dist/total))-((total_dist/total)))/((total_astar_dist/total))))
+ logging.info('Total elapsed time %s', (sum(t_list)/(correct))) #TODO: Possibly add total no soln
+ logging.info('Avg distance left when failed: %s ', (dist_remain_avg/(total-correct)) )
+ logging.info('---------------------------------Done ------------------------------------')
+
+ else:
+ logging.info('No successes either vin or dijkstra')
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+def save_image(im, goal, start,states_xy,states_one_hot,counter):
+ '''
+ Saves the data made by generator as jsons.
+ '''
+ s = config.imsize
+
+ if len(states_xy[0]) == 0:
+
+ im.tolist()[start_x][start_y] = 1
+ start_xy = [0,0]
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': start_xy}
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ else:
+ mp = {
+ 'grid': im.tolist(),
+ 'goal': [goal[0],goal[1]],
+ # 'start': int(start),
+ 'agent': states_xy[0][0].tolist()
+ # 'states_xy': states_xy[0].tolist(),
+ # 'states_one_hot': states_one_hot[0].tolist()
+ }
+ data.append(mp)
+ with open('./maps/' +str(s) + '_data_300' + '.json', 'w') as outfile:
+ json.dump(data,outfile)
+
+def open_map(dom,path):
+ '''
+ Used to open a map json given dom and path, returns grid, goal and agent
+ '''
+ with open(str(path) + str(dom) +'.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data['grid'], data['goal'], data['agent']
+
+def open_map_list(dom,path):
+ with open(str(path) + '.json') as json_file:
+ data = json.load(json_file)
+ logging.info('Opening file: ' + str(path) + str(dom) + '.json' )
+ return data[dom]['grid'], data[dom]['goal'], data[dom]['agent']
+
+def deviation(optimal_path, pred_path,goal, map_num):
+ optimal_path = np.array(optimal_path)
+ optimal_path = 1.0 * optimal_path
+
+ optimal_path_x = np.array(optimal_path[:,0])
+ optimal_path_y = np.array(optimal_path[:,1])
+
+ pred_path = np.unique(pred_path, axis=0) #removes duplicates at the end (when it reaches goal)
+
+ #print('Shortened path' , pred_path)
+ pred_path_x = np.array(pred_path[:,0])
+ pred_path_y = np.array(pred_path[:,1])
+ dist = 0.0
+ astar_dist = 0.0
+ prev = pred_path[0,:]
+ total_diff_gen = 0
+ for xy in pred_path[:,:]:
+
+ diff = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_gen += diff
+ dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ #prev = [0,0]
+ #print('opt', optimal_path[0,:])
+ prev = optimal_path[0,:]
+ total_diff_optim = 0
+ for xy in optimal_path[:,:]:
+ # print('xy', xy)
+ diff2 = math.sqrt( ((1.0 * xy[0]- 1.0*prev[0])**2)+((1.0*xy[1] - 1.0*prev[1])**2))
+ total_diff_optim += diff2
+ astar_dist+= ((xy[0]-prev[0])**2 + (xy[1]-prev[1])**2)**0.5
+ prev = xy
+
+ dev_non_rel = abs(total_diff_optim-total_diff_gen)
+ dev_rel = dev_non_rel/total_diff_optim #TODO: Add avg distance of gen trajectory
+ return(dev_rel,dev_non_rel,dist,astar_dist)
+
+def dist_left(pred_traj, goal):
+ '''
+ Finds the distance left between the point and the goal
+ '''
+ pred_traj = np.array(pred_traj) #euclidean distance or geometric distance ? use geometric
+ x1,y1 = pred_traj[-1][0], pred_traj[-1][1]
+ x2,y2 = goal[0],goal[1]
+ dist = (((x2-x1)**2 + (y2-y1)**2))**0.5
+ return dist
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/30k_no_block_dataset_vin_64x64.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--gen', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=100, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=48, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+
+ for i in range(1):
+ main(config)
+ # main(config)
diff --git a/src/algorithms/learning/VIN/test_nonzeros.py b/src/algorithms/learning/VIN/test_nonzeros.py
new file mode 100644
index 000000000..3cc429ef3
--- /dev/null
+++ b/src/algorithms/learning/VIN/test_nonzeros.py
@@ -0,0 +1,23 @@
+import numpy as np
+a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+print(a>3) #Where is A>3?
+
+'''
+[[False False False]
+ [ True True True]
+ [ True True True]]
+
+gives the above.
+So in 0th list, none are true. Then you have 1st list, 0th is true. 1st list 1th is true. 1st list 2nd is true
+so you have 1-0, 1-1, 1-2, where 1st # is the list # and 2nd # is the index in that list
+In the 2nd list (3rd one), 0th is true, 1st etc.
+
+'''
+print(np.nonzero(a>3))
+'''
+(array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))
+gives above. So the first array is the list #, and 2nd array is the index within that list
+so list 1 number 0, list 1 number 1, list 1 number 2
+list 2 number 0, list 2 number 1, list 2 number 2
+
+'''
\ No newline at end of file
diff --git a/src/algorithms/learning/VIN/test_og.py b/src/algorithms/learning/VIN/test_og.py
new file mode 100644
index 000000000..5755a3d00
--- /dev/null
+++ b/src/algorithms/learning/VIN/test_og.py
@@ -0,0 +1,173 @@
+import sys
+import argparse
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+
+import torch
+from torch.autograd import Variable
+
+from dataset.dataset_og import *
+from utility.utils import *
+from model import *
+
+from domains.gridworld_og import *
+from generators.obstacle_gen import *
+
+
+def main(config,
+ n_domains=100,
+ max_obs=30,
+ max_obs_size=None,
+ n_traj=1,
+ n_actions=8):
+ # Correct vs total:
+ correct, total = 0.0, 0.0
+ # Automatic swith of GPU mode if available
+ use_GPU = torch.cuda.is_available()
+ # Instantiate a VIN model
+ vin = VIN(config)
+ # Load model parameters
+ vin.load_state_dict(torch.load(config.weights))
+ # Use GPU if available
+ if use_GPU:
+ vin = vin.cuda()
+
+ for dom in range(n_domains):
+ # Randomly select goal position
+ goal = [
+ np.random.randint(config.imsize),
+ np.random.randint(config.imsize)
+ ]
+ # Generate obstacle map
+ obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
+ # Add obstacles to map
+ n_obs = obs.add_n_rand_obs(max_obs)
+ # Add border to map
+ border_res = obs.add_border()
+ # Ensure we have valid map
+ if n_obs == 0 or not border_res:
+ continue
+ # Get final map
+ im = obs.get_final()
+
+ # Generate gridworld from obstacle map
+ G = gridworld(im, goal[0], goal[1])
+ # Get value prior
+ value_prior = G.get_reward_prior()
+ # Sample random trajectories to our goal
+ states_xy, states_one_hot = sample_trajectory(G, n_traj)
+
+ for i in range(n_traj):
+ if len(states_xy[i]) > 1:
+
+ # Get number of steps to goal
+ L = len(states_xy[i]) * 2
+ # Allocate space for predicted steps
+ pred_traj = np.zeros((L, 2))
+ # Set starting position
+ pred_traj[0, :] = states_xy[i][0, :]
+
+ for j in range(1, L):
+ # Transform current state data
+ state_data = pred_traj[j - 1, :]
+ state_data = state_data.astype(np.int)
+ # Transform domain to Networks expected input shape
+ im_data = G.image.astype(np.int)
+ im_data = 1 - im_data
+ im_data = im_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Transfrom value prior to Networks expected input shape
+ value_data = value_prior.astype(np.int)
+ value_data = value_data.reshape(1, 1, config.imsize,
+ config.imsize)
+ # Get inputs as expected by network
+ X_in = torch.from_numpy(
+ np.append(im_data, value_data, axis=1)).float()
+ S1_in = torch.from_numpy(state_data[0].reshape(
+ [1, 1])).float()
+ S2_in = torch.from_numpy(state_data[1].reshape(
+ [1, 1])).float()
+ # Send Tensors to GPU if available
+ if use_GPU:
+ X_in = X_in.cuda()
+ S1_in = S1_in.cuda()
+ S2_in = S2_in.cuda()
+ # Wrap to autograd.Variable
+ X_in, S1_in, S2_in = Variable(X_in), Variable(
+ S1_in), Variable(S2_in)
+ # Forward pass in our neural net
+ _, predictions = vin(X_in, S1_in, S2_in, config)
+ _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
+ a = indices.data.numpy()[0][0]
+ # Transform prediction to indices
+ s = G.map_ind_to_state(pred_traj[j - 1, 0],
+ pred_traj[j - 1, 1])
+ ns = G.sample_next_state(s, a)
+ nr, nc = G.get_coords(ns)
+ pred_traj[j, 0] = nr
+ pred_traj[j, 1] = nc
+ if nr == goal[0] and nc == goal[1]:
+ # We hit goal so fill remaining steps
+ pred_traj[j + 1:, 0] = nr
+ pred_traj[j + 1:, 1] = nc
+ break
+ # Plot optimal and predicted path (also start, end)
+ if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
+ correct += 1
+ total += 1
+ if config.plot == True:
+ visualize(G.image.T, states_xy[i], pred_traj)
+ sys.stdout.write("\r" + str(int(
+ (float(dom) / n_domains) * 100.0)) + "%")
+ sys.stdout.flush()
+ sys.stdout.write("\n")
+ print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total)))
+
+
+def visualize(dom, states_xy, pred_traj):
+ fig, ax = plt.subplots()
+ implot = plt.imshow(dom, cmap="Greys_r")
+ ax.plot(states_xy[:, 0], states_xy[:, 1], c='b', label='Optimal Path')
+ ax.plot(
+ pred_traj[:, 0], pred_traj[:, 1], '-X', c='r', label='Predicted Path')
+ ax.plot(states_xy[0, 0], states_xy[0, 1], '-o', label='Start')
+ ax.plot(states_xy[-1, 0], states_xy[-1, 1], '-s', label='Goal')
+ legend = ax.legend(loc='upper right', shadow=False)
+ for label in legend.get_texts():
+ label.set_fontsize('x-small') # the legend text size
+ for label in legend.get_lines():
+ label.set_linewidth(0.5) # the legend line width
+ plt.draw()
+ plt.waitforbuttonpress(0)
+ plt.close(fig)
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--weights',
+ type=str,
+ default='trained/vin_8x8.pth',
+ help='Path to trained weights')
+ parser.add_argument('--plot', action='store_true', default=False)
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ config = parser.parse_args()
+ # Compute Paths generated by network and plot
+ main(config)
\ No newline at end of file
diff --git a/src/algorithms/learning/VIN/train.py b/src/algorithms/learning/VIN/train.py
new file mode 100644
index 000000000..0e0056e54
--- /dev/null
+++ b/src/algorithms/learning/VIN/train.py
@@ -0,0 +1,142 @@
+import time
+import argparse
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+import torchvision.transforms as transforms
+
+import matplotlib.pyplot as plt
+from dataset.dataset import *
+from utility.utils import *
+from model import *
+
+
+def train(net, trainloader, config, criterion, optimizer):
+ print_header()
+ for epoch in range(config.epochs): # Loop over dataset multiple times
+ avg_error, avg_loss, num_batches = 0.0, 0.0, 0.0
+ start_time = time.time()
+ for i, data in enumerate(trainloader): # Loop over batches of data
+ # Get input batch
+ X, S1, S2, labels = data
+ if X.size()[0] != config.batch_size:
+ continue # Drop those data, if not enough for a batch
+ # Automaticlly select device to make the code device agnostic
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ X = X.to(device)
+ S1 = S1.to(device)
+ S2 = S2.to(device)
+ labels = labels.to(device)
+ net = net.to(device)
+ # Zero the parameter gradients
+ optimizer.zero_grad()
+ # Forward pass
+ outputs, predictions = net(X, S1, S2, config)
+ # Loss
+ loss = criterion(outputs, labels)
+ # Backward pass
+ loss.backward()
+ # Update params
+ optimizer.step()
+ # Calculate Loss and Error
+ loss_batch, error_batch = get_stats(loss, predictions, labels)
+ avg_loss += loss_batch
+ avg_error += error_batch
+ num_batches += 1
+ time_duration = time.time() - start_time
+ # Print epoch logs
+ print_stats(epoch, avg_loss, avg_error, num_batches, time_duration)
+ print('\nFinished training. \n')
+
+
+def test(net, testloader, config):
+ total, correct = 0.0, 0.0
+ for i, data in enumerate(testloader):
+ # Get inputs
+ X, S1, S2, labels = data
+ if X.size()[0] != config.batch_size:
+ continue # Drop those data, if not enough for a batch
+ # automaticlly select device, device agnostic
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ X = X.to(device)
+ S1 = S1.to(device)
+ S2 = S2.to(device)
+ labels = labels.to(device)
+ net = net.to(device)
+ # Forward pass
+ outputs, predictions = net(X, S1, S2, config)
+ # Select actions with max scores(logits)
+ _, predicted = torch.max(outputs, dim=1, keepdim=True)
+ # Unwrap autograd.Variable to Tensor
+ predicted = predicted.data
+ # Compute test accuracy
+ correct += (torch.eq(torch.squeeze(predicted), labels)).sum()
+ total += labels.size()[0]
+ print('Test Accuracy: {:.2f}%'.format(100 * (correct / total)))
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--datafile',
+ type=str,
+ default='dataset/30000_new_gridworld_8x8.npz',
+ help='Path to data file')
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--lr',
+ type=float,
+ default=0.005,
+ help='Learning rate, [0.01, 0.005, 0.002, 0.001]')
+ parser.add_argument(
+ '--epochs', type=int, default=30, help='Number of epochs to train')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ parser.add_argument(
+ '--batch_size', type=int, default=128, help='Batch size')
+ config = parser.parse_args()
+ # Get path to save trained model
+ save_path = "trained/60k_local_new_vin_{0}x{0}.pth".format(config.imsize)
+ # Instantiate a VIN model
+ net = VIN(config)
+ # Loss
+ criterion = nn.CrossEntropyLoss()
+ # Optimizer
+ optimizer = optim.RMSprop(net.parameters(), lr=config.lr, eps=1e-6)
+ # Dataset transformer: torchvision.transforms
+ transform = None
+ # Define Dataset
+ trainset = GridworldData(
+ config.datafile, imsize=config.imsize, train=True, transform=transform)
+ testset = GridworldData(
+ config.datafile,
+ imsize=config.imsize,
+ train=False,
+ transform=transform)
+ # Create Dataloader
+ trainloader = torch.utils.data.DataLoader(
+ trainset, batch_size=config.batch_size, shuffle=True, num_workers=0)
+ testloader = torch.utils.data.DataLoader(
+ testset, batch_size=config.batch_size, shuffle=False, num_workers=0)
+ # Train the model
+ train(net, trainloader, config, criterion, optimizer)
+ # Test accuracy
+ test(net, testloader, config)
+ # Save the trained model parameters
+ torch.save(net.state_dict(), save_path)
diff --git a/src/algorithms/learning/VIN/train_og.py b/src/algorithms/learning/VIN/train_og.py
new file mode 100644
index 000000000..4e18c0f19
--- /dev/null
+++ b/src/algorithms/learning/VIN/train_og.py
@@ -0,0 +1,142 @@
+import time
+import argparse
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+
+import torchvision.transforms as transforms
+
+import matplotlib.pyplot as plt
+from dataset.dataset_og import *
+from utility.utils import *
+from model import *
+
+
+def train(net, trainloader, config, criterion, optimizer):
+ print_header()
+ for epoch in range(config.epochs): # Loop over dataset multiple times
+ avg_error, avg_loss, num_batches = 0.0, 0.0, 0.0
+ start_time = time.time()
+ for i, data in enumerate(trainloader): # Loop over batches of data
+ # Get input batch
+ X, S1, S2, labels = data
+ if X.size()[0] != config.batch_size:
+ continue # Drop those data, if not enough for a batch
+ # Automaticlly select device to make the code device agnostic
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ X = X.to(device)
+ S1 = S1.to(device)
+ S2 = S2.to(device)
+ labels = labels.to(device)
+ net = net.to(device)
+ # Zero the parameter gradients
+ optimizer.zero_grad()
+ # Forward pass
+ outputs, predictions = net(X, S1, S2, config)
+ # Loss
+ loss = criterion(outputs, labels)
+ # Backward pass
+ loss.backward()
+ # Update params
+ optimizer.step()
+ # Calculate Loss and Error
+ loss_batch, error_batch = get_stats(loss, predictions, labels)
+ avg_loss += loss_batch
+ avg_error += error_batch
+ num_batches += 1
+ time_duration = time.time() - start_time
+ # Print epoch logs
+ print_stats(epoch, avg_loss, avg_error, num_batches, time_duration)
+ print('\nFinished training. \n')
+
+
+def test(net, testloader, config):
+ total, correct = 0.0, 0.0
+ for i, data in enumerate(testloader):
+ # Get inputs
+ X, S1, S2, labels = data
+ if X.size()[0] != config.batch_size:
+ continue # Drop those data, if not enough for a batch
+ # automaticlly select device, device agnostic
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+ X = X.to(device)
+ S1 = S1.to(device)
+ S2 = S2.to(device)
+ labels = labels.to(device)
+ net = net.to(device)
+ # Forward pass
+ outputs, predictions = net(X, S1, S2, config)
+ # Select actions with max scores(logits)
+ _, predicted = torch.max(outputs, dim=1, keepdim=True)
+ # Unwrap autograd.Variable to Tensor
+ predicted = predicted.data
+ # Compute test accuracy
+ correct += (torch.eq(torch.squeeze(predicted), labels)).sum()
+ total += labels.size()[0]
+ print('Test Accuracy: {:.2f}%'.format(100 * (correct / total)))
+
+
+if __name__ == '__main__':
+ # Parsing training parameters
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--datafile',
+ type=str,
+ default='dataset/gridworld_8x8.npz',
+ help='Path to data file')
+ parser.add_argument('--imsize', type=int, default=8, help='Size of image')
+ parser.add_argument(
+ '--lr',
+ type=float,
+ default=0.005,
+ help='Learning rate, [0.01, 0.005, 0.002, 0.001]')
+ parser.add_argument(
+ '--epochs', type=int, default=30, help='Number of epochs to train')
+ parser.add_argument(
+ '--k', type=int, default=10, help='Number of Value Iterations')
+ parser.add_argument(
+ '--l_i', type=int, default=2, help='Number of channels in input layer')
+ parser.add_argument(
+ '--l_h',
+ type=int,
+ default=150,
+ help='Number of channels in first hidden layer')
+ parser.add_argument(
+ '--l_q',
+ type=int,
+ default=10,
+ help='Number of channels in q layer (~actions) in VI-module')
+ parser.add_argument(
+ '--batch_size', type=int, default=128, help='Batch size')
+ config = parser.parse_args()
+ # Get path to save trained model
+ save_path = "trained/vin_{0}x{0}.pth".format(config.imsize)
+ # Instantiate a VIN model
+ net = VIN(config)
+ # Loss
+ criterion = nn.CrossEntropyLoss()
+ # Optimizer
+ optimizer = optim.RMSprop(net.parameters(), lr=config.lr, eps=1e-6)
+ # Dataset transformer: torchvision.transforms
+ transform = None
+ # Define Dataset
+ trainset = GridworldData(
+ config.datafile, imsize=config.imsize, train=True, transform=transform)
+ testset = GridworldData(
+ config.datafile,
+ imsize=config.imsize,
+ train=False,
+ transform=transform)
+ # Create Dataloader
+ trainloader = torch.utils.data.DataLoader(
+ trainset, batch_size=config.batch_size, shuffle=True, num_workers=0)
+ testloader = torch.utils.data.DataLoader(
+ testset, batch_size=config.batch_size, shuffle=False, num_workers=0)
+ # Train the model
+ train(net, trainloader, config, criterion, optimizer)
+ # Test accuracy
+ test(net, testloader, config)
+ # Save the trained model parameters
+ torch.save(net.state_dict(), save_path)
diff --git a/src/algorithms/learning/VIN/trained/README.md b/src/algorithms/learning/VIN/trained/README.md
new file mode 100644
index 000000000..9d1d9b432
--- /dev/null
+++ b/src/algorithms/learning/VIN/trained/README.md
@@ -0,0 +1,4 @@
+# Trained Models
+To use a pretrained model you have two choices:
+1. Download and place the trained .pth model files here
+2. Train the VIN on the datasets yourself (the models will save themselves here)
\ No newline at end of file
diff --git a/src/algorithms/learning/VIN/utility/__init__.py b/src/algorithms/learning/VIN/utility/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/algorithms/learning/VIN/utility/utils.py b/src/algorithms/learning/VIN/utility/utils.py
new file mode 100644
index 000000000..de0104400
--- /dev/null
+++ b/src/algorithms/learning/VIN/utility/utils.py
@@ -0,0 +1,34 @@
+import numpy as np
+import torch
+
+
+def fmt_row(width, row):
+ out = " | ".join(fmt_item(x, width) for x in row)
+ return out
+
+
+def fmt_item(x, l):
+ if isinstance(x, np.ndarray):
+ assert x.ndim == 0
+ x = x.item()
+ if isinstance(x, float): rep = "%g" % x
+ else: rep = str(x)
+ return " " * (l - len(rep)) + rep
+
+
+def get_stats(loss, predictions, labels):
+ cp = np.argmax(predictions.cpu().data.numpy(), 1)
+ error = np.mean(cp != labels.cpu().data.numpy())
+ return loss.item(), error
+
+
+def print_stats(epoch, avg_loss, avg_error, num_batches, time_duration):
+ print(
+ fmt_row(10, [
+ epoch + 1, avg_loss / num_batches, avg_error / num_batches,
+ time_duration
+ ]))
+
+
+def print_header():
+ print(fmt_row(10, ["Epoch", "Train Loss", "Train Error", "Epoch Time"]))
diff --git a/src/algorithms/learning/__init__.py b/src/algorithms/learning/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/algorithms/lstm/a_star_heuristic_augmentation.py b/src/algorithms/learning/a_star_heuristic_augmentation.py
similarity index 98%
rename from src/algorithms/lstm/a_star_heuristic_augmentation.py
rename to src/algorithms/learning/a_star_heuristic_augmentation.py
index 39762f7c8..d958a394d 100644
--- a/src/algorithms/lstm/a_star_heuristic_augmentation.py
+++ b/src/algorithms/learning/a_star_heuristic_augmentation.py
@@ -8,7 +8,7 @@
from algorithms.classic.graph_based.a_star import AStar
from algorithms.configuration.entities.goal import Goal
from algorithms.configuration.entities.trace import Trace
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
from simulator.services.algorithm_runner import AlgorithmRunner
from simulator.services.services import Services
from simulator.views.map.display.gradient_list_map_display import GradientListMapDisplay
diff --git a/src/algorithms/lstm/a_star_waypoint.py b/src/algorithms/learning/a_star_waypoint.py
similarity index 98%
rename from src/algorithms/lstm/a_star_waypoint.py
rename to src/algorithms/learning/a_star_waypoint.py
index 2ea738daf..57a23460d 100644
--- a/src/algorithms/lstm/a_star_waypoint.py
+++ b/src/algorithms/learning/a_star_waypoint.py
@@ -7,8 +7,8 @@
from algorithms.classic.testing.combined_online_lstm_testing import CombinedOnlineLSTMTesting
from algorithms.configuration.maps.map import Map
from algorithms.configuration.maps.ros_map import RosMap
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
-from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM
from simulator.services.algorithm_runner import AlgorithmRunner
from simulator.services.services import Services
from simulator.views.map.display.map_display import MapDisplay
diff --git a/src/algorithms/lstm/combined_online_LSTM.py b/src/algorithms/learning/combined_online_LSTM.py
similarity index 98%
rename from src/algorithms/lstm/combined_online_LSTM.py
rename to src/algorithms/learning/combined_online_LSTM.py
index fa1cc1a45..512dc1f57 100644
--- a/src/algorithms/lstm/combined_online_LSTM.py
+++ b/src/algorithms/learning/combined_online_LSTM.py
@@ -5,7 +5,7 @@
from algorithms.algorithm import Algorithm
from algorithms.basic_testing import BasicTesting
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
from simulator.services.algorithm_runner import AlgorithmRunner
from simulator.services.services import Services
from simulator.views.map.display.entities_map_display import EntitiesMapDisplay
diff --git a/src/algorithms/lstm/map_processing.py b/src/algorithms/learning/map_processing.py
similarity index 100%
rename from src/algorithms/lstm/map_processing.py
rename to src/algorithms/learning/map_processing.py
diff --git a/src/algorithms/lstm/trainer.py b/src/algorithms/learning/trainer.py
similarity index 100%
rename from src/algorithms/lstm/trainer.py
rename to src/algorithms/learning/trainer.py
diff --git a/src/analyzer/analyzer.py b/src/analyzer/analyzer.py
index 7e4ca09ac..804895b3c 100644
--- a/src/analyzer/analyzer.py
+++ b/src/analyzer/analyzer.py
@@ -3,8 +3,8 @@
from simulator.simulator import Simulator
from simulator.services.services import Services
from simulator.services.debug import DebugLevel, Debug
-from algorithms.lstm.a_star_waypoint import WayPointNavigation
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.a_star_waypoint import WayPointNavigation
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
from algorithms.configuration.maps.map import Map
from algorithms.configuration.maps.dense_map import DenseMap
from algorithms.configuration.configuration import Configuration
diff --git a/src/generator/generator.py b/src/generator/generator.py
index 93ed2d726..746a21704 100644
--- a/src/generator/generator.py
+++ b/src/generator/generator.py
@@ -10,7 +10,7 @@
from matplotlib import pyplot as plt
from natsort import natsorted
-from algorithms.lstm.LSTM_CAE_tile_by_tile import CAE
+from algorithms.learning.LSTM_CAE_tile_by_tile import CAE
from algorithms.classic.graph_based.a_star import AStar
from algorithms.classic.testing.a_star_testing import AStarTesting
from algorithms.configuration.configuration import Configuration
@@ -18,7 +18,7 @@
from algorithms.configuration.entities.entity import Entity
from algorithms.configuration.maps.dense_map import DenseMap
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.map_processing import MapProcessing
+from algorithms.learning.map_processing import MapProcessing
from simulator.services.debug import DebugLevel
from simulator.services.progress import Progress
from simulator.services.resources.atlas import Atlas
diff --git a/src/main.py b/src/main.py
index 34f12be6c..42469710d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,7 +1,7 @@
from algorithms.configuration.configuration import Configuration
from algorithms.algorithm_manager import AlgorithmManager
from maps.map_manager import MapManager
-from algorithms.lstm.trainer import Trainer
+from algorithms.learning.trainer import Trainer
from analyzer.analyzer import Analyzer
from generator.generator import Generator
from simulator.services.debug import DebugLevel
diff --git a/src/run_trainer.py b/src/run_trainer.py
index e5107abc0..e1e522e17 100644
--- a/src/run_trainer.py
+++ b/src/run_trainer.py
@@ -5,8 +5,8 @@
from algorithms.basic_testing import BasicTesting
from algorithms.configuration.maps.map import Map
from maps.map_manager import MapManager
-from algorithms.lstm.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM
-from algorithms.lstm.ML_model import MLModel
+from algorithms.learning.LSTM_tile_by_tile import BasicLSTMModule, OnlineLSTM
+from algorithms.learning.ML_model import MLModel
from simulator.services.debug import DebugLevel
from analyzer.analyzer import Analyzer
from generator.generator import Generator
@@ -27,10 +27,10 @@
from algorithms.classic.sample_based.rrt_connect import RRT_Connect
from algorithms.classic.graph_based.wavefront import Wavefront
from algorithms.configuration.configuration import Configuration
-from algorithms.lstm.LSTM_tile_by_tile import OnlineLSTM
-from algorithms.lstm.a_star_waypoint import WayPointNavigation
-from algorithms.lstm.combined_online_LSTM import CombinedOnlineLSTM
-from algorithms.lstm.LSTM_CAE_tile_by_tile import CAE, LSTMCAEModel
+from algorithms.learning.LSTM_tile_by_tile import OnlineLSTM
+from algorithms.learning.a_star_waypoint import WayPointNavigation
+from algorithms.learning.combined_online_LSTM import CombinedOnlineLSTM
+from algorithms.learning.LSTM_CAE_tile_by_tile import CAE, LSTMCAEModel
# planner testing
diff --git a/src/simulator/services/resources/directories.py b/src/simulator/services/resources/directories.py
index 5da6bc806..2167873b4 100644
--- a/src/simulator/services/resources/directories.py
+++ b/src/simulator/services/resources/directories.py
@@ -13,7 +13,7 @@
from simulator.services.services import Services
if TYPE_CHECKING:
- from algorithms.lstm.ML_model import MLModel
+ from algorithms.learning.ML_model import MLModel
class ModelSubdir(Directory):
diff --git a/src/simulator/views/map/display/online_lstm_map_display.py b/src/simulator/views/map/display/online_lstm_map_display.py
index 9c90be130..d1cc4de86 100644
--- a/src/simulator/views/map/display/online_lstm_map_display.py
+++ b/src/simulator/views/map/display/online_lstm_map_display.py
@@ -8,7 +8,7 @@
from algorithms.configuration.entities.agent import Agent
from algorithms.configuration.entities.goal import Goal
from algorithms.configuration.maps.map import Map
-from algorithms.lstm.map_processing import MapProcessing
+from algorithms.learning.map_processing import MapProcessing
from simulator.services.services import Services
from simulator.views.map.display.map_display import MapDisplay
from structures import Point, Colour, DynamicColour, RED