pascalwhoop
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/.latexmkrc‎
Lines changed: 5 additions & 0 deletions b/‎src/.latexmkrc‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/acronyms.tex‎
Lines changed: 9 additions & 6 deletions b/‎src/acronyms.tex‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎src/bibliography.bib‎
Lines changed: 86 additions & 0 deletions b/‎src/bibliography.bib‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎src/chaps/artificialintelligence.tex‎
Lines changed: 36 additions & 0 deletions b/‎src/chaps/artificialintelligence.tex‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎src/chaps/body.tex‎
Lines changed: 18 additions & 18 deletions b/‎src/chaps/body.tex‎
Lines changed: 18 additions & 18 deletions
@@ -1,6 +1,7 @@
 Session.vim
 ipad.vim
 macbook.vim
+_minted-main/
 
 
 Literature/
 
@@ -0,0 +1,5 @@
+$pdf_previewer = 'zathura';
+@default_files = ('main.tex');
+$latex         = 'latex -interaction=nonstopmode -shell-escape';
+$pdflatex      = 'pdflatex -interaction=nonstopmode -shell-escape';
+
@@ -1,6 +1,7 @@
 \begin        {acronym}[Bash]
-
 	\acro {RL}       {Reinforcement Learning}
+    \acro {CRIU}     {Checkpoint/Restore in Userspace}
+    \acro {VM}       {Virtual Machines}
 	\acro {JMI}      {Java Message Service}
 	\acro {XML}      {Extensive Markup Language}
 	\acro {JSON}     {JavaScript Object Notation}
@@ -10,6 +11,7 @@
 	\acro {POPO}     {Plain Old Python Object}
 	\acro {DU}       {Distribution Utility}
 	\acro {CHP}      {Combined Heat and Power Unit}
+	\acro {SARSA}    {State-Action-Reward-State-Action}
 	\acro {GRPC}     {Google Remote Process Call}
 	\acro {NN}       {Neural Networks}
 	\acro {MDP}      {Markovian Decision Process}
@@ -19,11 +21,12 @@
 	\acro {PPO}      {Proximal Policy Optimization}
 	\acro {POMDP}    {Partially Observable Markovian Decision Process}
 	\acro {GPU}      {Graphical Processing Unit}
-        \acro {UL}       {Unsupervised Learning}
-        \acro {SL}       {Supervised Learning}
-        \acro {RNN}      {Recurrent Neural Network}
-        \acro {LSTM}     {Long-Short Term Memory}
-        \acro {CNN}      {Convolutional Neural Network}
+    \acro {UL}       {Unsupervised Learning}
+    \acro {API}      {Application Programming Interface}
+    \acro {SL}       {Supervised Learning}
+    \acro {RNN}      {Recurrent Neural Network}
+    \acro {LSTM}     {Long-Short Term Memory}
+    \acro {CNN}      {Convolutional Neural Network}
 	\acro {CPU}      {Central Processing Unit}
 	\acro {TF}       {TensorFlow}
 	\acro {TPU}      {Tensor Processing Unit}
 
@@ -1,3 +1,24 @@
+@misc{docker,
+    title        = {What is Docker},
+    howpublished = {\url{https://www.docker.com/what-docker}},
+    author       = {Docker Inc},
+    note         = {Accessed: 2018-04-20}
+}
+
+@misc{criu,
+    title        = {CRIU},
+    howpublished = {\url{http://criu.org}},
+    author       = {criu.org},
+    note         = {Accessed: 2018-04-20}
+}
+
+@misc{clickcli,
+    title        = {Click_},
+    howpublished = {\url{http://click.pocoo.org/5/}},
+    author       = {Ronacher, Armin},
+    note         = {Accessed: 2018-04-22}
+}
+
 
 @misc{GoogleColabOnline2018,
 	title        = {Google Colab Introduction},
@@ -405,6 +426,14 @@ @article{ketter2018powertac
 	journal    = {ERIM Report Series Reference No. 2017-016-LIS}
 }
 
+@article{schmitt2018kickstarting,
+  title   = {Kickstarting Deep Reinforcement Learning},
+  author  = {Schmitt, Simon and Hudson, Jonathan J and Zidek, Augustin and Osindero, Simon and Doersch, Carl and Czarnecki, Wojciech M and Leibo, Joel Z and Kuttler, Heinrich and Zisserman, Andrew and Simonyan, Karen and others},
+  journal = {arXiv preprint arXiv:1803.03835},
+  year    = {2018}
+}
+
+
 @article{abbeel2010autonomous,
   title     = {Autonomous helicopter aerobatics through apprenticeship learning},
   author    = {Abbeel, Pieter and Coates, Adam and Ng, Andrew Y},
@@ -452,3 +481,60 @@ @article{EvalGRU2014
   bibsource     = {dblp computer science bibliography, https://dblp.org}
 }
 
+@inproceedings{mnih2016asynchronous,
+  title     = {Asynchronous methods for deep reinforcement learning},
+  author    = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
+  booktitle = {International Conference on Machine Learning},
+  pages     = {1928--1937},
+  year      = {2016}
+}
+	
+@article{arulkumaran2017brief,
+  title   = {A brief survey of deep reinforcement learning},
+  author  = {Arulkumaran, Kai and Deisenroth, Marc Peter and Brundage, Miles and Bharath, Anil Anthony},
+  journal = {arXiv preprint arXiv:1708.05866},
+   year   = {2017}
+}
+
+@article{brockman2016openai,
+  title   = {Openai gym},
+  author  = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
+  journal = {arXiv preprint arXiv:1606.01540},
+  year    = {2016}
+}
+
+@article{matiisen2017teacher,
+  title   = {Teacher-Student Curriculum Learning},
+  author  = {Matiisen, Tambet and Oliver, Avital and Cohen, Taco and Schulman, John},
+  journal = {arXiv preprint arXiv:1707.00183},
+  year    = {2017}
+}
+
+@article{silver2016mastering,
+  title     = {Mastering the game of Go with deep neural networks and tree search},
+  author    = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
+  journal   = {nature},
+  volume    = {529},
+  number    = {7587},
+  pages     = {484--489},
+  year      = {2016},
+  publisher = {Nature Research}
+}
+
+@misc{baselines,
+  author       = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
+  title        = {OpenAI Baselines},
+  year         = {2017},
+  publisher    = {GitHub},
+  journal      = {GitHub repository},
+  howpublished = {\url{https://github.com/openai/baselines}},
+}
+
+@misc{plappert2016kerasrl,
+    author       = {Matthias Plappert},
+    title        = {keras-rl},
+    year         = {2016},
+    publisher    = {GitHub},
+    journal      = {GitHub repository},
+    howpublished = {\url{https://github.com/keras-rl/keras-rl}},
+}
@@ -0,0 +1,36 @@
+\section{Artificial Intelligence}%
+\label{sec:artificial_intelligence}
+
+The field of \ac {AI} is, in comparison to Information Technology, both old and yet quiet contemporary. In the middle of
+the 20th century, Alan Touring introduced the \emph{Turing Test} which, in essence, tests the ability of a human to tell
+if answers to its questions are given by a machine or a human. With the advent of computers around the same time,
+research has started to aim for artificial intelligence. Generally though, clearly defining \ac {AI} in a single sentence is hard.
+\citet{russell2016artificial} structures historical definitions along two dimensions: The grade of how \emph{human} a system is \emph{thinks} or
+\emph{behaves} and how \emph{rational} it thinks or behaves. These four directions are all pursued by researchers. In
+this thesis, the goal of \emph{acting rationally} is most appropriate. 
+sub fields of research in the larger field of \ac {AI}. 
+
+%TODO prettify
+\begin{table}[] 
+    \renewcommand{\arraystretch}{2.5}
+    \centering
+    \begin{tabular}{p{0.45\textwidth}|p{0.45\textwidth}} 
+        \textbf{Thinking Humanly}: The goal of creating machines with \emph{minds}
+&   
+        \textbf{Thinking Rationally}: Computation that can perceive, reason and act [rationally]
+\\
+            \textbf{Acting Humanly}: "Machines that perform functions that require intelligence when performed by people"
+&
+        \textbf{Acting Rationally}:  design of intelligent agents
+    \end{tabular}
+    \caption{Various definitions of \ac {AI} \citep{russell2016artificial}  }
+    \label{tab:ai_definitions}
+\end{table}
+
+Today, some 70 years later, \ac {AI} is again extensively discussed by both researchers and public media
+\citep[p.24ff.]{russell2016artificial, arulkumaran2017brief}. The reasons for this are diverse but it can be argued that
+the combination of readily available computing power through cloud computing and advances in the mathematical
+underpinnings have allowed for fast-paced advances in recent years. Also, the currently very popular \acf {NN}
+architectures often require large amouts of data to learn which have lately been readily available for companies and
+:esearchers through the adoption of online technologies by the majority of the population
+\citep[p.27]{russell2016artificial}.
@@ -1,48 +1,48 @@
 \chapter{Introduction}
 \input{chaps/introduction.tex}
-\subsection{Methodology}
 \input{chaps/methodology.tex}
 
-\chapter{Artificial Intelligence}
+\chapter{Background}
+The thesis relies on work in three fields of research: Artificial Intelligence, Reinforcement Learning and (Animal)
+Cognition. The application of these fields happens in the field of competitive simulations, specifically an energy
+market simulation. 
 
- TODO intro from russel book summarized 
-% TODO intro from russel book summarized 
-\section{Learning}
+\input{chaps/artificialintelligence.tex}
+\subsection{Learning}
 \input{chaps/learning.tex}
-\subsection{Supervised Learning}
+\subsubsection{Supervised Learning}
 \input{chaps/supervisedlearning.tex}
-\subsection{Unsupervised Learning}
+\subsubsection{Unsupervised Learning}
 \input{chaps/unsupervisedlearning.tex}
-\section{Neural Networks}%
+\subsection{Neural Networks}%
 \label{sec:neural_networks}
 \input{chaps/neuralnetworks.tex}
-\subsection{Learning Neural Networks and Backpropagation}
+\subsubsection{Learning Neural Networks and Backpropagation}
 \label{sec:Backpropagation}
 \input{chaps/backpropagation.tex}
 
-\section{Recurrent Neural Networks}%
+\subsection{Recurrent Neural Networks}%
 \label{sec:recurrent_neural_networks}
 \input{chaps/recurrentnn.tex}
 
 %TODO is this part of AI?
-\chapter{Reinforcement Learning}
+\section{Reinforcement Learning}
 \input{chaps/reinforcement.tex}
 
 %TODO still needed after paper by DeepMind? --> showed that learning from teacher helps
-\chapter{Animal Cognition}
-\section{Recognition}
-\section{Memory}
-\section{Social Cognition}
+%\section{Animal Cognition}
+%\subsection{Recognition}
+%\subsection{Memory}
+%\subsection{Social Cognition}
 
-\chapter{Competitive Simulations}%as a tool of experimental research into AI
+%\section{Competitive Simulations}%as a tool of experimental research into AI
 
-\chapter{Power Trading Agent Competition}
+\section{\ac {PowerTAC}: A Competitive Simulation}
 \input{chaps/powertac.tex}
 
 \chapter{Implementation}
 \input{chaps/implementation.tex}
 
 \chapter{Results}
 \input{chaps/results.tex}
-\chapter{Conclusion}