Skip to content

Commit 3fbbba9

Browse files
authored
Merge pull request #2 from pascalwhoop/restructure
Restructure
2 parents 257d8e9 + 3b7c05d commit 3fbbba9

File tree

13 files changed

+817
-208
lines changed

13 files changed

+817
-208
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
Session.vim
22
ipad.vim
33
macbook.vim
4+
_minted-main/
45

56

67
Literature/

src/.latexmkrc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
$pdf_previewer = 'zathura';
2+
@default_files = ('main.tex');
3+
$latex = 'latex -interaction=nonstopmode -shell-escape';
4+
$pdflatex = 'pdflatex -interaction=nonstopmode -shell-escape';
5+

src/acronyms.tex

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
\begin {acronym}[Bash]
2-
32
\acro {RL} {Reinforcement Learning}
3+
\acro {CRIU} {Checkpoint/Restore in Userspace}
4+
\acro {VM} {Virtual Machines}
45
\acro {JMI} {Java Message Service}
56
\acro {XML} {Extensive Markup Language}
67
\acro {JSON} {JavaScript Object Notation}
@@ -10,6 +11,7 @@
1011
\acro {POPO} {Plain Old Python Object}
1112
\acro {DU} {Distribution Utility}
1213
\acro {CHP} {Combined Heat and Power Unit}
14+
\acro {SARSA} {State-Action-Reward-State-Action}
1315
\acro {GRPC} {Google Remote Process Call}
1416
\acro {NN} {Neural Networks}
1517
\acro {MDP} {Markovian Decision Process}
@@ -19,11 +21,12 @@
1921
\acro {PPO} {Proximal Policy Optimization}
2022
\acro {POMDP} {Partially Observable Markovian Decision Process}
2123
\acro {GPU} {Graphical Processing Unit}
22-
\acro {UL} {Unsupervised Learning}
23-
\acro {SL} {Supervised Learning}
24-
\acro {RNN} {Recurrent Neural Network}
25-
\acro {LSTM} {Long-Short Term Memory}
26-
\acro {CNN} {Convolutional Neural Network}
24+
\acro {UL} {Unsupervised Learning}
25+
\acro {API} {Application Programming Interface}
26+
\acro {SL} {Supervised Learning}
27+
\acro {RNN} {Recurrent Neural Network}
28+
\acro {LSTM} {Long-Short Term Memory}
29+
\acro {CNN} {Convolutional Neural Network}
2730
\acro {CPU} {Central Processing Unit}
2831
\acro {TF} {TensorFlow}
2932
\acro {TPU} {Tensor Processing Unit}

src/bibliography.bib

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
@misc{docker,
2+
title = {What is Docker},
3+
howpublished = {\url{https://www.docker.com/what-docker}},
4+
author = {Docker Inc},
5+
note = {Accessed: 2018-04-20}
6+
}
7+
8+
@misc{criu,
9+
title = {CRIU},
10+
howpublished = {\url{http://criu.org}},
11+
author = {criu.org},
12+
note = {Accessed: 2018-04-20}
13+
}
14+
15+
@misc{clickcli,
16+
title = {Click_},
17+
howpublished = {\url{http://click.pocoo.org/5/}},
18+
author = {Ronacher, Armin},
19+
note = {Accessed: 2018-04-22}
20+
}
21+
122

223
@misc{GoogleColabOnline2018,
324
title = {Google Colab Introduction},
@@ -405,6 +426,14 @@ @article{ketter2018powertac
405426
journal = {ERIM Report Series Reference No. 2017-016-LIS}
406427
}
407428

429+
@article{schmitt2018kickstarting,
430+
title = {Kickstarting Deep Reinforcement Learning},
431+
author = {Schmitt, Simon and Hudson, Jonathan J and Zidek, Augustin and Osindero, Simon and Doersch, Carl and Czarnecki, Wojciech M and Leibo, Joel Z and Kuttler, Heinrich and Zisserman, Andrew and Simonyan, Karen and others},
432+
journal = {arXiv preprint arXiv:1803.03835},
433+
year = {2018}
434+
}
435+
436+
408437
@article{abbeel2010autonomous,
409438
title = {Autonomous helicopter aerobatics through apprenticeship learning},
410439
author = {Abbeel, Pieter and Coates, Adam and Ng, Andrew Y},
@@ -452,3 +481,60 @@ @article{EvalGRU2014
452481
bibsource = {dblp computer science bibliography, https://dblp.org}
453482
}
454483

484+
@inproceedings{mnih2016asynchronous,
485+
title = {Asynchronous methods for deep reinforcement learning},
486+
author = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
487+
booktitle = {International Conference on Machine Learning},
488+
pages = {1928--1937},
489+
year = {2016}
490+
}
491+
492+
@article{arulkumaran2017brief,
493+
title = {A brief survey of deep reinforcement learning},
494+
author = {Arulkumaran, Kai and Deisenroth, Marc Peter and Brundage, Miles and Bharath, Anil Anthony},
495+
journal = {arXiv preprint arXiv:1708.05866},
496+
year = {2017}
497+
}
498+
499+
@article{brockman2016openai,
500+
title = {Openai gym},
501+
author = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
502+
journal = {arXiv preprint arXiv:1606.01540},
503+
year = {2016}
504+
}
505+
506+
@article{matiisen2017teacher,
507+
title = {Teacher-Student Curriculum Learning},
508+
author = {Matiisen, Tambet and Oliver, Avital and Cohen, Taco and Schulman, John},
509+
journal = {arXiv preprint arXiv:1707.00183},
510+
year = {2017}
511+
}
512+
513+
@article{silver2016mastering,
514+
title = {Mastering the game of Go with deep neural networks and tree search},
515+
author = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
516+
journal = {nature},
517+
volume = {529},
518+
number = {7587},
519+
pages = {484--489},
520+
year = {2016},
521+
publisher = {Nature Research}
522+
}
523+
524+
@misc{baselines,
525+
author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
526+
title = {OpenAI Baselines},
527+
year = {2017},
528+
publisher = {GitHub},
529+
journal = {GitHub repository},
530+
howpublished = {\url{https://github.com/openai/baselines}},
531+
}
532+
533+
@misc{plappert2016kerasrl,
534+
author = {Matthias Plappert},
535+
title = {keras-rl},
536+
year = {2016},
537+
publisher = {GitHub},
538+
journal = {GitHub repository},
539+
howpublished = {\url{https://github.com/keras-rl/keras-rl}},
540+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
\section{Artificial Intelligence}%
2+
\label{sec:artificial_intelligence}
3+
4+
The field of \ac {AI} is, in comparison to Information Technology, both old and yet quiet contemporary. In the middle of
5+
the 20th century, Alan Touring introduced the \emph{Turing Test} which, in essence, tests the ability of a human to tell
6+
if answers to its questions are given by a machine or a human. With the advent of computers around the same time,
7+
research has started to aim for artificial intelligence. Generally though, clearly defining \ac {AI} in a single sentence is hard.
8+
\citet{russell2016artificial} structures historical definitions along two dimensions: The grade of how \emph{human} a system is \emph{thinks} or
9+
\emph{behaves} and how \emph{rational} it thinks or behaves. These four directions are all pursued by researchers. In
10+
this thesis, the goal of \emph{acting rationally} is most appropriate.
11+
sub fields of research in the larger field of \ac {AI}.
12+
13+
%TODO prettify
14+
\begin{table}[]
15+
\renewcommand{\arraystretch}{2.5}
16+
\centering
17+
\begin{tabular}{p{0.45\textwidth}|p{0.45\textwidth}}
18+
\textbf{Thinking Humanly}: The goal of creating machines with \emph{minds}
19+
&
20+
\textbf{Thinking Rationally}: Computation that can perceive, reason and act [rationally]
21+
\\
22+
\textbf{Acting Humanly}: "Machines that perform functions that require intelligence when performed by people"
23+
&
24+
\textbf{Acting Rationally}: design of intelligent agents
25+
\end{tabular}
26+
\caption{Various definitions of \ac {AI} \citep{russell2016artificial} }
27+
\label{tab:ai_definitions}
28+
\end{table}
29+
30+
Today, some 70 years later, \ac {AI} is again extensively discussed by both researchers and public media
31+
\citep[p.24ff.]{russell2016artificial, arulkumaran2017brief}. The reasons for this are diverse but it can be argued that
32+
the combination of readily available computing power through cloud computing and advances in the mathematical
33+
underpinnings have allowed for fast-paced advances in recent years. Also, the currently very popular \acf {NN}
34+
architectures often require large amouts of data to learn which have lately been readily available for companies and
35+
:esearchers through the adoption of online technologies by the majority of the population
36+
\citep[p.27]{russell2016artificial}.

src/chaps/body.tex

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,48 @@
11
\chapter{Introduction}
22
\input{chaps/introduction.tex}
3-
\subsection{Methodology}
43
\input{chaps/methodology.tex}
54

6-
\chapter{Artificial Intelligence}
5+
\chapter{Background}
6+
The thesis relies on work in three fields of research: Artificial Intelligence, Reinforcement Learning and (Animal)
7+
Cognition. The application of these fields happens in the field of competitive simulations, specifically an energy
8+
market simulation.
79

8-
TODO intro from russel book summarized
9-
% TODO intro from russel book summarized
10-
\section{Learning}
10+
\input{chaps/artificialintelligence.tex}
11+
\subsection{Learning}
1112
\input{chaps/learning.tex}
12-
\subsection{Supervised Learning}
13+
\subsubsection{Supervised Learning}
1314
\input{chaps/supervisedlearning.tex}
14-
\subsection{Unsupervised Learning}
15+
\subsubsection{Unsupervised Learning}
1516
\input{chaps/unsupervisedlearning.tex}
16-
\section{Neural Networks}%
17+
\subsection{Neural Networks}%
1718
\label{sec:neural_networks}
1819
\input{chaps/neuralnetworks.tex}
19-
\subsection{Learning Neural Networks and Backpropagation}
20+
\subsubsection{Learning Neural Networks and Backpropagation}
2021
\label{sec:Backpropagation}
2122
\input{chaps/backpropagation.tex}
2223

23-
\section{Recurrent Neural Networks}%
24+
\subsection{Recurrent Neural Networks}%
2425
\label{sec:recurrent_neural_networks}
2526
\input{chaps/recurrentnn.tex}
2627

2728
%TODO is this part of AI?
28-
\chapter{Reinforcement Learning}
29+
\section{Reinforcement Learning}
2930
\input{chaps/reinforcement.tex}
3031

3132
%TODO still needed after paper by DeepMind? --> showed that learning from teacher helps
32-
\chapter{Animal Cognition}
33-
\section{Recognition}
34-
\section{Memory}
35-
\section{Social Cognition}
33+
%\section{Animal Cognition}
34+
%\subsection{Recognition}
35+
%\subsection{Memory}
36+
%\subsection{Social Cognition}
3637

37-
\chapter{Competitive Simulations}%as a tool of experimental research into AI
38+
%\section{Competitive Simulations}%as a tool of experimental research into AI
3839

39-
\chapter{Power Trading Agent Competition}
40+
\section{\ac {PowerTAC}: A Competitive Simulation}
4041
\input{chaps/powertac.tex}
4142

4243
\chapter{Implementation}
4344
\input{chaps/implementation.tex}
4445

4546
\chapter{Results}
4647
\input{chaps/results.tex}
47-
\chapter{Conclusion}
4848

0 commit comments

Comments
 (0)