@@ -256,229 +256,27 @@ Citations
256256
257257`AdaHessian <https://scholar.googleusercontent.com/scholar.bib?q=info:NVTf2oQp6YoJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YeqDj8:AAGBfm0AAAAAYxCsFj89NAaxz72Tc2BaFva6FGFHuzjO&scisig=AAGBfm0AAAAAYxCsFm7SeFVY6NaIy5w0BOLAVGM4oy-z&scisf=4&ct=citation&cd=-1&hl=en >`__
258258
259- ` < >`__
259+ `AdaBound < https://scholar.googleusercontent.com/scholar.bib?q=info:CsrDHbimhWgJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YeqXZQ:AAGBfm0AAAAAYxCsRZR-WfagzOhOzHZ3ARAlehesAaQL&scisig=AAGBfm0AAAAAYxCsRSRkCJhTl9QisH1o5k8cbHBOOaQ0&scisf=4&ct=citation&cd=-1&hl=en >`__
260260
261- ` < >`__
261+ `Adabelief < https://scholar.googleusercontent.com/scholar.bib?q=info:cf1gkNMQCAsJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YeqcPk:AAGBfm0AAAAAYxCsaPn6O2pgnuIZmWlssnrLY7Zug1ab&scisig=AAGBfm0AAAAAYxCsaPiac1Ktzqa7-8wabbO3pQzq2ezC&scisf=4&ct=citation&cd=-1&hl=en >`__
262262
263+ `Sharpness-aware minimization <https://scholar.googleusercontent.com/scholar.bib?q=info:621rS0TnyooJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YeqkcY:AAGBfm0AAAAAYxCsicYP7tw5aRNOjjXwkA4Vow-7jzWX&scisig=AAGBfm0AAAAAYxCsibGf462P1_gsWErL-yeGdIeNHywO&scisf=4&ct=citation&cd=-1&hl=en >`__
263264
264- .. raw :: html
265+ ` Adaptive Sharpness-aware minimization < https://scholar.googleusercontent.com/scholar.bib?q=info:ta4j_XtLqXYJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YeqhhE:AAGBfm0AAAAAYxCsnhEGLjlU7PCikAYnM6LYuACuKcfu&scisig=AAGBfm0AAAAAYxCsno-VG_RWK0tOtqZdWxel6qTKtNyC&scisf=4&ct=citation&cd=-1&hl=en >`__
265266
266- </ details >
267+ ` diffGrad < https://scholar.googleusercontent.com/scholar.bib?q=info:yGmD33AMjN4J:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0Yeqp7I:AAGBfm0AAAAAYxCsv7IYbE3ozFQrbhjAxbBdhbcNrNaT&scisig=AAGBfm0AAAAAYxCsv2mDmsNyW0R1koLK3vG04K7HEyRW&scisf=4&ct=citation&cd=-1&hl=en >`__
267268
268- .. raw :: html
269+ ` On the Convergence of Adam and Beyond < https://scholar.googleusercontent.com/scholar.bib?q=info:B0s07Z6wFWkJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0Yeq5VI:AAGBfm0AAAAAYxCs_VIET-w3Fc6Bx3B7pbnercaue84a&scisig=AAGBfm0AAAAAYxCs_Rzcu3G4tmMrxOdaeXsfN9RSp3aA&scisf=4&ct=citation&cd=-1&hl=en >`__
269270
270- <details >
271- <summary ><a >AdaBound: Adaptive Gradient Methods with Dynamic Bound of Learning Rate</a ></summary >
271+ `Gradient surgery for multi-task learning <https://scholar.googleusercontent.com/scholar.bib?q=info:ae9CdgI_CtkJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YerBWY:AAGBfm0AAAAAYxCtHWZzzktUQ2GRhrSx_LWh7AiWbeUV&scisig=AAGBfm0AAAAAYxCtHaXMBqe9K0CCS9McXDPM8BRHHrTD&scisf=4&ct=citation&cd=-1&hl=en >`__
272272
273- ::
274-
275- @inproceedings{Luo2019AdaBound,
276- author = {Luo, Liangchen and Xiong, Yuanhao and Liu, Yan and Sun, Xu},
277- title = {Adaptive Gradient Methods with Dynamic Bound of Learning Rate},
278- booktitle = {Proceedings of the 7th International Conference on Learning Representations},
279- month = {May},
280- year = {2019},
281- address = {New Orleans, Louisiana}
282- }
283-
284- .. raw :: html
285-
286- </details >
287-
288- .. raw :: html
289-
290- <details >
291- <summary ><a >AdaBelief: Adapting stepsizes by the belief in observed gradients</a ></summary >
292-
293- ::
294-
295- @article{zhuang2020adabelief,
296- title={Adabelief optimizer: Adapting stepsizes by the belief in observed gradients},
297- author={Zhuang, Juntang and Tang, Tommy and Ding, Yifan and Tatikonda, Sekhar and Dvornek, Nicha and Papademetris, Xenophon and Duncan, James S},
298- journal={arXiv preprint arXiv:2010.07468},
299- year={2020}
300- }
301-
302- .. raw :: html
303-
304- </details >
305-
306- .. raw :: html
307-
308- <details >
309- <summary ><a >Sharpness-Aware Minimization</a ></summary >
310-
311- ::
312-
313- @article{foret2020sharpness,
314- title={Sharpness-aware minimization for efficiently improving generalization},
315- author={Foret, Pierre and Kleiner, Ariel and Mobahi, Hossein and Neyshabur, Behnam},
316- journal={arXiv preprint arXiv:2010.01412},
317- year={2020}
318- }
319-
320- .. raw :: html
321-
322- </details >
273+ `AdamD <https://scholar.googleusercontent.com/scholar.bib?q=info:XimgvO50x1AJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YerIAo:AAGBfm0AAAAAYxCtOAq69M6dSH0RQEVyiQYk-5ToDCvA&scisig=AAGBfm0AAAAAYxCtOJRDGw1cq6WRv2NODkLE5sgxAPz-&scisf=4&ct=citation&cd=-1&hl=en >`__
323274
324- .. raw :: html
325-
326- <details >
327- <summary ><a >Adaptive Sharpness-Aware Minimization</a ></summary >
328-
329- ::
330-
331- @article{kwon2021asam,
332- title={ASAM: Adaptive Sharpness-Aware Minimization for Scale-Invariant Learning of Deep Neural Networks},
333- author={Kwon, Jungmin and Kim, Jeongseop and Park, Hyunseo and Choi, In Kwon},
334- journal={arXiv preprint arXiv:2102.11600},
335- year={2021}
336- }
337-
338- .. raw :: html
339-
340- </details >
341-
342- .. raw :: html
343-
344- <details >
345- <summary ><a >diffGrad: An optimization method for convolutional neural networks</a ></summary >
346-
347- ::
348-
349- @article{dubey2019diffgrad,
350- title={diffgrad: An optimization method for convolutional neural networks},
351- author={Dubey, Shiv Ram and Chakraborty, Soumendu and Roy, Swalpa Kumar and Mukherjee, Snehasis and Singh, Satish Kumar and Chaudhuri, Bidyut Baran},
352- journal={IEEE transactions on neural networks and learning systems},
353- volume={31},
354- number={11},
355- pages={4500--4511},
356- year={2019},
357- publisher={IEEE}
358- }
359-
360- .. raw :: html
361-
362- </details >
363-
364- .. raw :: html
365-
366- <details >
367- <summary ><a >On the Convergence of Adam and Beyond</a ></summary >
368-
369- ::
275+ `Shampoo <https://scholar.googleusercontent.com/scholar.bib?q=info:GQn55DATO9sJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YerS64:AAGBfm0AAAAAYxCtU65eO2d2kyAf36X-vcVbovISPAY9&scisig=AAGBfm0AAAAAYxCtUwoUqdIzjjuqat7lPKZylm3bO6io&scisf=4&ct=citation&cd=-1&hl=en >`__
370276
371- @article{reddi2019convergence,
372- title={On the convergence of adam and beyond},
373- author={Reddi, Sashank J and Kale, Satyen and Kumar, Sanjiv},
374- journal={arXiv preprint arXiv:1904.09237},
375- year={2019}
376- }
377-
378- .. raw :: html
379-
380- </details >
381-
382- .. raw :: html
383-
384- <details >
385- <summary ><a >Gradient Surgery for Multi-Task Learning</a ></summary >
386-
387- ::
388-
389- @article{yu2020gradient,
390- title={Gradient surgery for multi-task learning},
391- author={Yu, Tianhe and Kumar, Saurabh and Gupta, Abhishek and Levine, Sergey and Hausman, Karol and Finn, Chelsea},
392- journal={arXiv preprint arXiv:2001.06782},
393- year={2020}
394- }
395-
396- .. raw :: html
397-
398- </details >
399-
400- .. raw :: html
401-
402- <details >
403- <summary ><a >AdamD: Improved bias-correction in Adam</a ></summary >
404-
405- ::
406-
407- @article{john2021adamd,
408- title={AdamD: Improved bias-correction in Adam},
409- author={John, John St},
410- journal={arXiv preprint arXiv:2110.10828},
411- year={2021}
412- }
413-
414- .. raw :: html
415-
416- </details >
417-
418- .. raw :: html
419-
420- <details >
421- <summary ><a >Shampoo: Preconditioned Stochastic Tensor Optimization</a ></summary >
422-
423- ::
424-
425- @inproceedings{gupta2018shampoo,
426- title={Shampoo: Preconditioned stochastic tensor optimization},
427- author={Gupta, Vineet and Koren, Tomer and Singer, Yoram},
428- booktitle={International Conference on Machine Learning},
429- pages={1842--1850},
430- year={2018},
431- organization={PMLR}
432- }
433-
434- .. raw :: html
435-
436- </details >
437-
438- .. raw :: html
439-
440- <details >
441- <summary ><a >Nero: Learning by Turning: Neural Architecture Aware Optimisation</a ></summary >
442-
443- ::
444-
445- @misc{nero2021,
446- title={Learning by Turning: Neural Architecture Aware Optimisation},
447- author={Yang Liu and Jeremy Bernstein and Markus Meister and Yisong Yue},
448- year={2021},
449- eprint={arXiv:2102.07227}
450- }
451-
452- .. raw :: html
453-
454- </details >
455-
456- .. raw :: html
457-
458- <details >
459- <summary ><a >Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models</a ></summary >
460-
461- ::
277+ `Nero <https://scholar.googleusercontent.com/scholar.bib?q=info:X7-f1Z-47X8J:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0Yercz8:AAGBfm0AAAAAYxCtaz9tFLHi82julKp6XCCGPZLRN2Qt&scisig=AAGBfm0AAAAAYxCta7MAiMjXj8qzcM8XBLi2AxsgVHIB&scisf=4&ct=citation&cd=-1&hl=en >`__
462278
463- @ARTICLE{2022arXiv220806677X,
464- author = {{Xie}, Xingyu and {Zhou}, Pan and {Li}, Huan and {Lin}, Zhouchen and {Yan}, Shuicheng},
465- title = "{Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models}",
466- journal = {arXiv e-prints},
467- keywords = {Computer Science - Machine Learning, Mathematics - Optimization and Control},
468- year = 2022,
469- month = aug,
470- eid = {arXiv:2208.06677},
471- pages = {arXiv:2208.06677},
472- archivePrefix = {arXiv},
473- eprint = {2208.06677},
474- primaryClass = {cs.LG},
475- adsurl = {https://ui.adsabs.harvard.edu/abs/2022arXiv220806677X},
476- adsnote = {Provided by the SAO/NASA Astrophysics Data System}
477- }
478-
479- .. raw :: html
480-
481- </details >
279+ `Adan <https://scholar.googleusercontent.com/scholar.bib?q=info:rMUXKCk35EAJ:scholar.google.com/&output=citation&scisdr=CgX1Wk9EELXN0YerkVs:AAGBfm0AAAAAYxCtiVs7M7Oh9VkEVan-wY3IXOKyQtx1&scisig=AAGBfm0AAAAAYxCtiYyoEigNiau7MNmGcvqAEC8nSm-L&scisf=4&ct=citation&cd=-1&hl=en >`__
482280
483281Author
484282------
0 commit comments