garage/docs/user/references.bib at 4ee2e66ecf3cf6cad721ae1deb7a00c4e6eeafe6 · rlworkgroup/garage · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
@inproceedings{ho2016model,
  title={Model-free imitation learning with policy optimization},
  author={Ho, Jonathan and Gupta, Jayesh and Ermon, Stefano},
  booktitle={International Conference on Machine Learning},
  pages={2760--2769},
  year={2016},
  url={https://arxiv.org/abs/1605.08478},
}

@article{williams1992simple,
  title={Simple statistical gradient-following algorithms for connectionist reinforcement learning},
  author={Williams, Ronald J},
  journal={Machine learning},
  volume={8},
  number={3-4},
  pages={229--256},
  year={1992},
  publisher={Springer}
}

@article{rubinstein2004cross,
  title={The cross-entropy method: A unified approach to Monte Carlo simulation, randomized optimization and machine learning},
  author={Rubinstein, Reuven Y and Kroese, Dirk P},
  journal={Information Science \& Statistics, Springer Verlag, NY},
  year={2004}
}

@article{duan2016rl,
  title={RL $\^{} 2$: Fast reinforcement learning via slow reinforcement learning},
  author={Duan, Yan and Schulman, John and Chen, Xi and Bartlett, Peter L and Sutskever, Ilya and Abbeel, Pieter},
  journal={arXiv preprint arXiv:1611.02779},
  year={2016}
}

@article{haarnoja2018soft,
  title={Soft actor-critic algorithms and applications},
  author={Haarnoja, Tuomas and Zhou, Aurick and Hartikainen, Kristian and Tucker, George and Ha, Sehoon and Tan, Jie and Kumar, Vikash and Zhu, Henry and Gupta, Abhishek and Abbeel, Pieter and others},
  journal={arXiv preprint arXiv:1812.05905},
  year={2018}
}

@article{rakelly2019efficient,
  title={Efficient Off-Policy Meta-Reinforcement Learning via Probabilistic Context Variables},
  author={Kate Rakelly and Aurick Zhou and Deirdre Quillen and Chelsea Finn and Sergey Levine},
  year={2019},
  journal={arXiv preprint arXiv:1903.08254},
}

@article{schulman2017proximal,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}

@article{levine2018reinforcement,
  title={Reinforcement learning and control as probabilistic inference: Tutorial and review},
  author={Levine, Sergey},
  journal={arXiv preprint arXiv:1805.00909},
  year={2018}
}

@article{schulman2015trust,
    title={Trust region policy optimization},
    author={John Schulman and Sergey Levine and Philipp Moritz and Michael I. Jordan and Pieter Abbeel},
    year={2015},
    eprint={1502.05477},
    journal={arXiv},
}

@article{Fujimoto2018AddressingFA,
  title={Addressing Function Approximation Error in Actor-Critic Methods},
  author={Scott Fujimoto and Herke van Hoof and David Meger},
  journal={ArXiv},
  year={2018},
  url={https://arxiv.org/abs/1802.09477},
}

@article{yu2019metaworld,
    title={Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning},
    author={Tianhe Yu and Deirdre Quillen and Zhanpeng He and Ryan Julian and Karol Hausman and Chelsea Finn and Sergey Levine},
    year={2019},
    journal={arXiv:1910.10897},
}

@inproceedings{hausman2018learning,
  title={Learning an Embedding Space for Transferable Robot Skills},
  author={Karol Hausman and Jost Tobias Springenberg and Ziyu Wang and Nicolas Heess and Martin Riedmiller},
  booktitle={International Conference on Learning Representations},
  year={2018},
  journal={},
  url={https://openreview.net/forum?id=rk07ZXZRb},
}

@article{lillicrap2015continuous,
  title={Continuous control with deep reinforcement learning},
  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  journal={arXiv preprint arXiv:1509.02971},
  year={2015}
}

@inproceedings{peters2007reward,
  author={J. {Peters} and S. {Schaal}},
  booktitle={2007 IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning},
  title={Using Reward-weighted Regression for Reinforcement Learning of Task Space Control},
  year={2007},
  volume={},
  number={},
  pages={262-267}
}

@article{2009koberpolicy,
  title = {Policy Search for Motor Primitives in Robotics},
  author = {Kober, J. and Peters, J.},
  journal = {Advances in neural information processing systems 21 : 22nd Annual Conference on Neural Information Processing Systems 2008},
  booktitle = {Advances in neural information processing systems 21},
  pages = {849-856},
  editors = {Koller, D. , D. Schuurmans, Y. Bengio, L. Bottou},
  publisher = {Curran},
  organization = {Max-Planck-Gesellschaft},
  school = {Biologische Kybernetik},
  address = {Red Hook, NY, USA},
  month = jun,
  year = {2009},
  month_numeric = {6}
}

@misc{finn2017modelagnostic,
  title={Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks},
  author={Chelsea Finn and Pieter Abbeel and Sergey Levine},
  year={2017},
  eprint={1703.03400},
  archivePrefix={arXiv},
  primaryClass={cs.LG}
}