-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreferences.bib
More file actions
815 lines (755 loc) · 36.5 KB
/
references.bib
File metadata and controls
815 lines (755 loc) · 36.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
@misc{price2024gencast,
title={GenCast: Diffusion-based ensemble forecasting for medium-range weather},
author={Ilan Price and Alvaro Sanchez-Gonzalez and Ferran Alet and Tom R. Andersson and Andrew El-Kadi and Dominic Masters and Timo Ewalds and Jacklynn Stott and Shakir Mohamed and Peter Battaglia and Remi Lam and Matthew Willson},
year={2024},
eprint={2312.15796},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2312.15796},
}
@misc{allen2025aurora,
title={Aurora: Architecting Argonne's First Exascale Supercomputer for Accelerated Scientific Discovery},
author={Benjamin S. Allen and James Anchell and Victor Anisimov and Thomas Applencourt and Abhishek Bagusetty and Ramesh Balakrishnan and Riccardo Balin and Solomon Bekele and Colleen Bertoni and Cyrus Blackworth and Renzo Bustamante and Kevin Canada and John Carrier and Christopher Chan-nui and Lance C. Cheney and Taylor Childers and Paul Coffman and Susan Coghlan and Michael D'Mello and Murali Emani and Kyle G. Felker and Sam Foreman and Olivier Franza and Longfei Gao and Marta García and María Garzarán and Balazs Gerofi and Yasaman Ghadar and Neha Gupta and Kevin Harms and Väinö Hatanpää and Brian Holland and Carissa Holohan and Brian Homerding and Khalid Hossain and Louise Huot and Huda Ibeid and Joseph A. Insley and Sai Jayanthi and Hong Jiang and Wei Jiang and Xiao-Yong Jin and Jeongnim Kim and Christopher Knight and Kalyan Kumaran and JaeHyuk Kwack and Ti Leggett and Ben Lenard and Chris Lewis and Nevin Liber and Johann Lombardi and Raymond M. Loy and Ye Luo and Bethany Lusch and Nilakantan Mahadevan and Victor A. Mateevitsi and Gordon McPheeters and Ryan Milner and Vitali A. Morozov and Servesh Muralidharan and Tom Musta and Mrigendra Nagar and Vikram Narayana and Marieme Ngom and Anthony-Trung Nguyen and Nathan Nichols and Aditya Nishtala and James C. Osborn and Michael E. Papka and Scott Parker and Saumil S. Patel and Adrian C. Pope and Sucheta Raghunanda and Esteban Rangel and Paul M. Rich and Silvio Rizzi and Kris Rowe and Varuni Sastry and Adam Scovel and Filippo Simini and Haritha Siddabathuni Som and Patrick Steinbrecher and Rick Stevens and Xinmin Tian and Peter Upton and Thomas Uram and Archit K. Vasan and Álvaro Vázquez-Mayagoitia and Kaushik Velusamy and Brice Videau and Venkatram Vishwanath and Brian Whitney and Timothy J. Williams and Michael Woodacre and Sam Zeltner and Gengbin Zheng and Huihuo Zheng},
year={2025},
eprint={2509.08207},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/2509.08207},
}
@conference{torsiello2025automated,
author = {Torsiello, J. and Fleming, G. T. and Foreman, S. and Jin, X.-Y. and Osborn, J. C.},
title = {Automated tuning for HMC mass ratios},
annote = {We extended previous work on tuning HMC parameters using gradient information to include Hasenbusch mass ratios. The inclusion of mass ratios adds many more parameters that need to be tuned, and also allows for lots of variations in the choice of integrator pattern. We investigate the effectiveness of automatically tuning a large number of HMC parameters and compare the optimally tuned versions over a range of integrator variants.},
doi = {10.22323/1.466.0052},
url = {https://www.osti.gov/biblio/2551828},
journal = {PoS},
place = {United States},
organization = {Argonne, ALCF; Argonne National Laboratory (ANL), Argonne, IL (United States); Temple U.; Fermi National Accelerator Laboratory (FNAL), Batavia, IL (United States)},
year = {2025},
month = {02}
}
@misc{stock2025aeris,
title = {AERIS: Argonne Earth Systems Model for Reliable and Skillful
Predictions},
author = {Väinö Hatanpää and Eugene Ku and Jason Stock and Murali Emani and
Sam Foreman and Chunyong Jung and Sandeep Madireddy and Tung Nguyen
and Varuni Sastry and Ray A. O. Sinurat and Sam Wheeler and Huihuo
Zheng and Troy Arcomano and Venkatram Vishwanath and Rao Kotamarthi
},
year = {2025},
eprint = {2509.13523},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
url = {https://arxiv.org/abs/2509.13523},
}
@misc{mccandlish2018empiricalmodellargebatchtraining,
title = {An Empirical Model of Large-Batch Training},
author = {Sam McCandlish and Jared Kaplan and Dario Amodei and OpenAI Dota
Team},
year = {2018},
eprint = {1812.06162},
archivePrefix = {arXiv},
primaryClass = {cs.LG},
url = {https://arxiv.org/abs/1812.06162},
}
@misc{gokdemir2025hiperrag,
title = {HiPerRAG: High-Performance Retrieval Augmented Generation for
Scientific Insights},
author = {Ozan Gokdemir and Carlo Siebenschuh and Alexander Brace and Azton
Wells and Brian Hsu and Kyle Hippe and Priyanka V. Setty and
Aswathy Ajith and J. Gregory Pauloski and Varuni Sastry and Sam
Foreman and Huihuo Zheng and Heng Ma and Bharat Kale and Nicholas
Chia and Thomas Gibbs and Michael E. Papka and Thomas Brettin and
Francis J. Alexander and Anima Anandkumar and Ian Foster and Rick
Stevens and Venkatram Vishwanath and Arvind Ramanathan},
year = {2025},
eprint = {2505.04846},
archivePrefix = {arXiv},
primaryClass = {cs.IR},
}
@misc{yan2025mofa,
title = {MOFA: Discovering Materials for Carbon Capture with a GenAI- and
Simulation-Based Workflow},
author = {Xiaoli Yan and Nathaniel Hudson and Hyun Park and Daniel Grzenda
and J. Gregory Pauloski and Marcus Schwarting and Haochen Pan and
Hassan Harb and Samuel Foreman and Chris Knight and Tom Gibbs and
Kyle Chard and Santanu Chaudhuri and Emad Tajkhorshid and Ian
Foster and Mohamad Moosavi and Logan Ward and E. A. Huerta},
year = {2025},
eprint = {2501.10651},
archivePrefix = {arXiv},
primaryClass = {cs.DC},
}
@article{leung2024intro,
author = {Leung, Mary Ann and Cahill, Katharine and Hartman-Baker, Rebecca
and Kinsley, Paige and Curfman McInnes, Lois and Parete-Koon,
Suzanne and Ramprakash, Sreeranjani and Abraham, Subil and Beach
Barrier, Lacy and Chen, Gladys and others},
title = {Intro to HPC Bootcamp: Engaging New Communities Through Energy
Justice Projects},
annote = {The U.S. Department of Energy (DOE) is a long-standing leader in
research and development of high-performance computing (HPC) in the
pursuit of science. However, we face daunting challenges in
fostering a robust and diverse HPC workforce. Basic HPC is not
typically taught at early stages of students' academic careers, and
the capacity and knowledge of HPC at many institutions are limited.
Even so, such topics are prerequisites for advanced training
programs, internships, graduate school, and ultimately for careers
in HPC. To help address this challenge, as part of the DOE Exascale
Computing Project's Broadening Participation Initiative, we
recently launched the Introduction to HPC Training and Workforce
Pipeline Program to provide accessible introductory material on HPC
, scalable AI, and analytics. We describe the Intro to HPC Bootcamp
, an immersive program designed to engage students from
underrepresented groups as they learn foundational HPC skills. Here
, the program takes a novel approach to HPC training by turning the
traditional curriculum upside down. Instead of focusing on
technology and its applications, the bootcamp focuses on energy
justice to motivate the training of HPC skills through
project-based pedagogy and real-life science stories. Additionally,
the bootcamp prepares students for internships and future careers
at DOE labs. The first bootcamp, hosted by the advanced computing
facilities at Argonne, Lawrence Berkeley, and Oak Ridge National
Labs and organized by Sustainable Horizons Institute, took place in
August 2023.},
doi = {10.22369/issn.2153-4136/15/1/10},
url = {https://www.osti.gov/biblio/2447313},
journal = {Journal of Computational Science Education},
issn = {ISSN 2153-4136},
number = {1},
volume = {15},
place = {United States},
publisher = {Shodor Education Foundation, Inc.},
year = {2024},
month = {02},
}
@inproceedings{mprot-dpo2024,
author = {Dharuman, Gautham and Hippe, Kyle and Brace, Alexander and Foreman
, Sam and Hatanp\"{a}\"{a}, V\"{a}in\"{o} and Sastry, Varuni K. and
Zheng, Huihuo and Ward, Logan and Muralidharan, Servesh and Vasan,
Archit and Kale, Bharat and Mann, Carla M. and Ma, Heng and Cheng,
Yun-Hsuan and Zamora, Yuliana and Liu, Shengchao and Xiao, Chaowei
and Emani, Murali and Gibbs, Tom and Tatineni, Mahidhar and Canchi,
Deepak and Mitchell, Jerome and Yamada, Koichi and Garzaran, Maria
and Papka, Michael E. and Foster, Ian and Stevens, Rick and
Anandkumar, Anima and Vishwanath, Venkatram and Ramanathan, Arvind},
title = {MProt-DPO: Breaking the ExaFLOPS Barrier for Multimodal Protein
Design Workflows with Direct Preference Optimization},
year = {2024},
isbn = {9798350352917},
publisher = {IEEE Press},
url = {https://doi.org/10.1109/SC41406.2024.00013},
doi = {10.1109/SC41406.2024.00013},
abstract = {We present a scalable, end-to-end workflow for protein design.
By augmenting protein sequences with natural language
descriptions of their biochemical properties, we train generative
models that can be preferentially aligned with protein fitness
landscapes. Through complex experimental- and simulation-based
observations, we integrate these measures as preferred parameters
for generating new protein variants and demonstrate our workflow
on five diverse supercomputers. We achieve >1 ExaFLOPS sustained
performance in mixed precision on each supercomputer and a
maximum sustained performance of 4.11 ExaFLOPS and peak
performance of 5.57 ExaFLOPS. We establish the scientific
performance of our model on two tasks: (1) across a predetermined
benchmark dataset of deep mutational scanning experiments to
optimize the fitness-determining mutations in the yeast protein
HIS7, and (2) in optimizing the design of the enzyme malate
dehydrogenase to achieve lower activation barriers (and therefore
increased catalytic rates) using simulation data. Our
implementation thus sets high watermarks for multimodal protein
design workflows.},
booktitle = {Proceedings of the International Conference for High
Performance Computing, Networking, Storage, and Analysis},
articleno = {7},
numpages = {13},
keywords = {AI, HPC, Large language models, protein design},
location = {Atlanta, GA, USA},
series = {SC '24},
}
@inproceedings{hosseini2025quality,
title = {Quality Measures for Dynamic Graph Generative Models},
author = {Ryien Hosseini and Filippo Simini and Venkatram Vishwanath and
Rebecca Willett and Henry Hoffmann},
booktitle = {The Thirteenth International Conference on Learning
Representations},
year = {2025},
url = {https://openreview.net/forum?id=8bjspmAMBk},
}
@article{deamont2014superconductivity,
title = {Superconductivity of In and Sn Samples},
author = {Deamont, George and Foreman, Sam},
year = {2014},
}
@INPROCEEDINGS{foreman2018rg,
author = {{Foreman}, Sam and {Giedt}, Joel and {Meurice}, Yannick and {Unmuth-Yockey}, Judah},
title = "{RG-inspired machine learning for lattice field theory}",
keywords = {High Energy Physics - Lattice, Condensed Matter - Statistical Mechanics},
booktitle = {European Physical Journal Web of Conferences},
year = 2018,
series = {European Physical Journal Web of Conferences},
volume = {175},
month = mar,
eid = {11025},
pages = {11025},
doi = {10.1051/epjconf/201817511025},
archivePrefix = {arXiv},
eprint = {1710.02079},
primaryClass = {hep-lat},
adsurl = {https://ui.adsabs.harvard.edu/abs/2018EPJWC.17511025F},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}
@article{hubler2018large,
title = {Large energy density in three-plate nanocapacitors due to Coulomb
blockade},
author = {Hubler, A and Foreman, S and Liu, J and Wortsmann, L},
journal = {Journal of Applied Physics},
volume = {123},
number = {10},
year = {2018},
publisher = {AIP Publishing},
}
@article{foreman2018examples,
title = {Examples of renormalization group transformations for image sets},
author = {Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
Unmuth-Yockey, Judah},
journal = {Physical Review E},
volume = {98},
number = {5},
pages = {052129},
year = {2018},
publisher = {American Physical Society},
}
@article{Foreman:2018qei,
author = "Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
Unmuth-Yockey, Judah",
title = "{Machine learning inspired analysis of the Ising model transition}",
doi = "10.22323/1.334.0245",
journal = "PoS",
volume = "LATTICE2018",
pages = "245",
year = "2018",
}
@inproceedings{foreman2018machine,
title = {Machine learning inspired analysis of the Ising model transition},
author = {Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
Unmuth-Yockey, Judah},
booktitle = {Lattice 2018},
year = {2018},
}
@phdthesis{foreman2019learning,
title = {Learning Better Physics: A Machine Learning Approach to Lattice
Gauge Theory},
author = {Foreman, Samuel Alfred},
year = {2019},
school = {University of Iowa},
}
@article{foreman2020machine,
title = {Machine Learning and Neural Networks for Field Theory},
author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
year = {2020},
}
@article{foreman2021hmc,
title = {HMC with normalizing flows},
author = {Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
Xiao-Yong and Osborn, James C and Tomiya, Akio},
journal = {arXiv preprint arXiv:2112.01586},
archivePrefix = {arXiv},
eprint = {2112.01586},
year = {2021},
}
@article{foreman2021leapfroglayers,
title = {LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling},
author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
journal = {arXiv preprint arXiv:2112.01582},
archivePrefix = {arXiv},
eprint = {2112.01582},
year = {2021},
}
@article{liu2017energy,
title = {Energy storage in quantum resonators},
author = {Liu, Jiaqi and Hubler, Alfred W and Foreman, Samuel Alfred and Ott
, Katharina},
year = {2017},
}
@article{boyda2022applications,
title = {Applications of machine learning to lattice quantum field theory},
author = {Boyda, Denis and Cal{\`\i}, Salvatore and Foreman, Sam and Funcke,
Lena and Hackett, Daniel C and Lin, Yin and Aarts, Gert and
Alexandru, Andrei and Jin, Xiao-Yong and Lucini, Biagio and others},
journal = {arXiv preprint arXiv:2202.05838},
archivePrefix = {arXiv},
eprint = {2202.05838},
year = {2022},
}
@article{kronfeld2022lattice,
title = {Lattice QCD and particle physics},
author = {Kronfeld, Andreas S and Bhattacharya, Tanmoy and Blum, Thomas and
Christ, Norman H and DeTar, Carleton and Detmold, William and
Edwards, Robert and Hasenfratz, Anna and Lin, Huey-Wen and
Mukherjee, Swagato and others},
journal = {arXiv preprint arXiv:2207.07641},
archivePrefix = {arXiv},
eprint = {2207.07641},
year = {2022},
}
@article{zvyagin2023genslms,
title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
evolutionary dynamics},
author = {Zvyagin, Maxim and Brace, Alexander and Hippe, Kyle and Deng,
Yuntian and Zhang, Bin and Bohorquez, Cindy Orozco and Clyde,
Austin and Kale, Bharat and Perez-Rivera, Danilo and Ma, Heng and
others},
journal = {The International Journal of High Performance Computing
Applications},
volume = {37},
number = {6},
pages = {683--705},
year = {2023},
publisher = {SAGE Publications Sage UK: London, England},
}
@article{emani2023comprehensive,
title = {A Comprehensive Performance Study of Large Language Models on Novel
AI Accelerators},
author = {Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen
and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and
Vishwanath, Venkatram and Papka, Michael E},
journal = {arXiv preprint arXiv:2310.04607},
eprint = {2310.04607},
archivePrefix = {arXiv},
year = {2023},
}
@article{song2023deepspeed4science,
title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
Discovery through Sophisticated AI System Technologies},
author = {Song, Shuaiwen Leon and Kruft, Bonnie and Zhang, Minjia and Li,
Conglong and Chen, Shiyang and Zhang, Chengming and Tanaka,
Masahiro and Wu, Xiaoxia and Rasley, Jeff and Awan, Ammar Ahmad and
others},
journal = {arXiv preprint arXiv:2310.04610},
eprint = {2310.04610},
archivePrefix = {arXiv},
year = {2023},
}
@inproceedings{dharuman2023protein,
title = {Protein Generation via Genome-scale Language Models with
Bio-physical Scoring},
author = {Dharuman, Gautham and Ward, Logan and Ma, Heng and Setty, Priyanka
V and Gokdemir, Ozan and Foreman, Sam and Emani, Murali and Hippe,
Kyle and Brace, Alexander and Keipert, Kristopher and others},
booktitle = {Proceedings of the SC'23 Workshops of The International
Conference on High Performance Computing, Network, Storage, and
Analysis},
pages = {95--101},
year = {2023},
}
@online{foreman2023climrr,
author = {Foreman, Sam},
title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
date = {2023-08-07},
url = {https://saforem2.github.io/climate-analysis},
langid = {en},
}
@online{foreman2024long,
author = {Foreman, Sam},
title = {🚂 {Loooooooong} {Sequence} {Lengths}},
date = {2024-02-12},
url = {https://samforeman.me/posts/AuroraGPT/long-sequences/},
langid = {en},
}
@misc{foreman2023mlmc,
title = {MLMC: Machine Learning Monte Carlo for Lattice Gauge Theory},
author = {Sam Foreman and Xiao-Yong Jin and James C. Osborn},
year = {2023},
eprint = {2312.08936},
archivePrefix = {arXiv},
primaryClass = {hep-lat},
url = {https://arxiv.org/abs/2312.08936},
}
@article{shanahan2022snowmass,
title = {Snowmass 2021 computational frontier CompF03 topical group report:
Machine learning},
author = {Shanahan, Phiala and Terao, Kazuhiro and Whiteson, Daniel},
journal = {arXiv preprint arXiv:2209.07559},
year = {2022},
archivePrefix = {arXiv},
eprint = {2209.07559},
}
@article{cheng2024thorough,
author = {Cheng, Scott and Lin, Jun-Liang and Emani, Murali and Raskar,
Siddhisanket and Foreman, Sam and Xie, Zhen and Vishwanath,
Venkatram and Kandemir, Mahmut Taylan},
title = {Thorough Characterization and Analysis of Large Transformer Model
Training At-Scale},
year = {2024},
issue_date = {March 2024},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {8},
number = {1},
url = {https://doi.org/10.1145/3639034},
doi = {10.1145/3639034},
abstract = {Large transformer models have recently achieved great success
across various domains. With a growing number of model parameters
, a large transformer model training today typically involves
model sharding, data parallelism, and model parallelism. Thus,
the throughput of large-scale model training depends heavily on
the network bandwidth since a combination of model sharding and
multiple parallelism strategies incurs various costs. However,
prior characterizations of transformer models on high-bandwidth
DGX machines that use TFLOPS as a metric may not reflect the
performance of a system with lower bandwidth. Furthermore, data
and model parallelism reveal significantly distinct training
profiles on different system bandwidths at scale and, thus, need
a thorough study. In this paper, we provide a bottom-up breakdown
of training throughput into compute and communication time, and
quantitatively analyze their respective influences on overall
end-to-end training scaling. Our evaluation involves an in-depth
exploration of data parallelism, scaling up to 512 GPUs with
limited bandwidth, and examines three model sharding strategies
among six model sizes. We also evaluate three combinations of
model parallelism on both high and low bandwidth supercomputing
systems. Overall, our work provides a broader perspective on
large-scale transformer model training, and our analysis and
evaluation yield practical insights for predicting training
scaling, shaping the future development of supercomputing system
design.},
journal = {Proc. ACM Meas. Anal. Comput. Syst.},
month = feb,
articleno = {8},
numpages = {25},
}
@article{leung2024communities,
title = {Communities Through Energy Justice Projects},
author = {Leung, Mary Ann and Cahill, Katharine and Hartman-Baker, Rebecca
and Kinsley, Paige and McInnes, Lois Curfman and Parete-Koon,
Suzanne and Abraham, Subil and Barrier, Lacy Beach and Chen, Gladys
and DeStefano, Lizanne and others},
journal = {Journal of Computational Science},
volume = {15},
number = {1},
year = {2024},
}
@inproceedings{arcomano2023applications,
title = {Applications of a Foundation Model Approach for Weather and Climate
},
author = {Arcomano, Troy and Wikner, Alexander and Maulik, Romit and
Kotamarthi, Veerabhadra Rao and Foreman, Sam},
booktitle = {AGU Fall Meeting Abstracts},
volume = {2023},
pages = {GC22C--06},
year = {2023},
}
@inproceedings{emani2024toward,
title = {Toward a holistic performance evaluation of large language models
across diverse ai accelerators},
author = {Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen
and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and
Vishwanath, Venkatram and Papka, Michael E and Shanmugavelu, Sanjif
and others},
booktitle = {2024 IEEE International Parallel and Distributed Processing
Symposium Workshops (IPDPSW)},
pages = {1--10},
year = {2024},
organization = {IEEE},
}
@article{parete2024intro,
title = {Intro to HPC Bootcamp: Engaging New Communities Through Energy
Justice Projects},
author = {Parete-Koon, Suzanne and Sandoval, Michael and Leland, Kellen and
Abraham, Subil and Leung, Mary Ann and Hartman-Baker, Rebecca and
Kinsley, Paige and McInnes, Lois and Ramprakash, Sreeranjani and
Beach Barrier, Lacy and others},
journal = {Journal of Computational Science Education},
volume = {15},
number = {1},
year = {2024},
publisher = {Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United
States)},
}
@inproceedings{dharuman2024mprot,
title = {MProt-DPO: Breaking the ExaFLOPS Barrier for Multimodal Protein
Design Workflows with Direct Preference Optimization},
author = {Dharuman, Gautham and Hippe, Kyle and Brace, Alexander and Foreman
, Sam and Hatanp{\"a}{\"a}, V{\"a}in{\"a} and Sastry, Varuni K and
Zheng, Huihuo and Ward, Logan and Muralidharan, Servesh and Vasan,
Archit and others},
booktitle = {2024 SC24: International Conference for High Performance
Computing, Networking, Storage and Analysis SC},
pages = {74--86},
year = {2024},
organization = {IEEE Computer Society},
}
@misc{song2023ds4sci,
title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
Discovery through Sophisticated AI System Technologies},
author = {Shuaiwen Leon Song and Bonnie Kruft and Minjia Zhang and Conglong
Li and Shiyang Chen and Chengming Zhang and Masahiro Tanaka and
Xiaoxia Wu and Jeff Rasley and Ammar Ahmad Awan and Connor Holmes
and Martin Cai and Adam Ghanem and Zhongzhu Zhou and Yuxiong He and
Pete Luferenko and Divya Kumar and Jonathan Weyn and Ruixiong Zhang
and Sylwester Klocek and Volodymyr Vragov and Mohammed AlQuraishi
and Gustaf Ahdritz and Christina Floristean and Cristina Negri and
Rao Kotamarthi and Venkatram Vishwanath and Arvind Ramanathan and
Sam Foreman and Kyle Hippe and Troy Arcomano and Romit Maulik and
Maxim Zvyagin and Alexander Brace and Bin Zhang and Cindy Orozco
Bohorquez and Austin Clyde and Bharat Kale and Danilo Perez-Rivera
and Heng Ma and Carla M. Mann and Michael Irvin and J. Gregory
Pauloski and Logan Ward and Valerie Hayot and Murali Emani and Zhen
Xie and Diangen Lin and Maulik Shukla and Ian Foster and James J.
Davis and Michael E. Papka and Thomas Brettin and Prasanna
Balaprakash and Gina Tourassi and John Gounley and Heidi Hanson and
Thomas E Potok and Massimiliano Lupo Pasini and Kate Evans and Dan
Lu and Dalton Lunga and Junqi Yin and Sajal Dash and Feiyi Wang and
Mallikarjun Shankar and Isaac Lyngaas and Xiao Wang and Guojing
Cong and Pei Zhang and Ming Fan and Siyan Liu and Adolfy Hoisie and
Shinjae Yoo and Yihui Ren and William Tang and Kyle Felker and
Alexey Svyatkovskiy and Hang Liu and Ashwin Aji and Angela Dalton
and Michael Schulte and Karl Schulz and Yuntian Deng and Weili Nie
and Josh Romero and Christian Dallago and Arash Vahdat and Chaowei
Xiao and Thomas Gibbs and Anima Anandkumar and Rick Stevens},
year = {2023},
eprint = {2310.04610},
archivePrefix = {arXiv},
primaryClass = {cs.AI},
url = {https://arxiv.org/abs/2310.04610},
}
@misc{wei2022emergentabilitieslargelanguage,
title = {Emergent Abilities of Large Language Models},
author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and
Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten
Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori
Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and
William Fedus},
year = {2022},
eprint = {2206.07682},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
url = {https://arxiv.org/abs/2206.07682},
}
@misc{Burdi:2023climrr,
title = {The Climate Risk & Resilience Portal (ClimRR) Metadata and Data
Dictionary},
author = "Burdi, C. and Branham, J., Wall. T",
year = "2023",
note = {Available at \url{
https://anl.app.box.com/s/hmkkgkrkzxxocfe9kpgrzk2gfc4gizp8/file/1055145398460
}},
url = {https://dub.sh/ClimRR-Metadata},
}
@misc{wittig2023progress,
title = {Progress on $(g-2)_\mu$ from Lattice QCD},
author = {Hartmut Wittig},
year = {2023},
eprint = {2306.04165},
archivePrefix = {arXiv},
primaryClass = {hep-ph},
}
@article{Duane:1987de,
author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
title = "{Hybrid Monte Carlo}",
doi = "10.1016/0370-2693(87)91197-X",
journal = "Phys. Lett. B",
volume = "195",
pages = "216--222",
year = "1987",
}
@article{Shanahan:2022ifi,
author = "Shanahan, Phiala and others",
title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
Machine Learning}",
eprint = "2209.07559",
archivePrefix = "arXiv",
primaryClass = "physics.comp-ph",
reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
month = "9",
year = "2022",
}
@inproceedings{Boyda:2022nmh,
author = "Boyda, Denis and others",
title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
booktitle = "{Snowmass 2021}",
eprint = "2202.05838",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
reportNumber = "MIT-CTP/5405",
month = "2",
year = "2022",
}
@article{Foreman:2021ljl,
author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
Xiao-Yong and Osborn, James C. and Tomiya, Akio",
title = "{HMC with Normalizing Flows}",
eprint = "2112.01586",
archivePrefix = "arXiv",
primaryClass = "cs.LG",
doi = "10.22323/1.396.0073",
journal = "PoS",
volume = "LATTICE2021",
pages = "073",
year = "2022",
}
@article{Foreman:2021rhs,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling}",
eprint = "2112.01582",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
doi = "10.22323/1.396.0508",
journal = "PoS",
volume = "LATTICE2021",
pages = "508",
year = "2022",
}
@inproceedings{Foreman:2021ixr,
author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
title = "{Deep Learning Hamiltonian Monte Carlo}",
booktitle = "{9th International Conference on Learning Representations}",
eprint = "2105.03418",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
month = "5",
year = "2021",
}
@misc{foreman2021deep,
title = {Deep Learning Hamiltonian Monte Carlo},
author = {Foreman, Sam and Jin, Xiao-Yong and Osborn James C.},
year = {2021},
eprint = {2105.03418},
archivePrefix = {arXiv},
primaryClass = {hep-lat},
}
@article{foreman2021leapfrog,
title = {LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling},
author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
journal = {arXiv preprint arXiv:2112.01582},
year = {2021},
}
@online{foreman2023climate,
author = {Foreman, Sam},
title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
date = {2023-08-07},
url = {https://saforem2.github.io/climate-analysis},
langid = {en},
}
@misc{foreman2023-l2hmcqcd,
author = {Foreman, Sam},
date = {2023-08-19},
url = {https://saforem2.github.io/l2hmc-qcd},
langid = {en},
}
@inproceedings{2022slft.confE.508F,
author = {{Foreman}, S. and {Jin}, X. y. and {Osborn}, J.},
title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
Sampling}",
keywords = {High Energy Physics - Lattice, Computer Science - Machine
Learning},
booktitle = {The 38th International Symposium on Lattice Field Theory},
year = 2022,
month = jul,
eid = {508},
pages = {508},
doi = {10.22323/1.396.0508},
archivePrefix = {arXiv},
eprint = {2112.01582},
primaryClass = {hep-lat},
adsurl = {https://ui.adsabs.harvard.edu/abs/2022slft.confE.508F},
adsnote = {Provided by the SAO/NASA Astrophysics Data System},
}
@misc{Montgomery_2023,
title = {Mastering language models},
url = {https://towardsdatascience.com/mastering-language-models-32e1d891511a
},
journal = {Medium},
publisher = {Towards Data Science},
author = {Montgomery, Samuel},
year = {2023},
month = {Oct},
}
@misc{yang2023harnessing,
title = {Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and
Beyond},
author = {Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han
and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
year = {2023},
eprint = {2304.13712},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@article{Popel_2018,
doi = {10.2478/pralin-2018-0002},
url = {https://doi.org/10.2478%2Fpralin-2018-0002},
year = 2018,
month = {apr},
publisher = {Charles University in Prague, Karolinum Press},
volume = {110},
number = {1},
pages = {43--70},
author = {Martin Popel and Ond{\v{r}}ej Bojar},
title = {Training Tips for the Transformer Model},
journal = {The Prague Bulletin of Mathematical Linguistics},
}
@misc{vaswani2017attention,
title = {Attention Is All You Need},
author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob
Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and
Illia Polosukhin},
year = {2017},
eprint = {1706.03762},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@misc{yao2023tree,
title = {Tree of Thoughts: Deliberate Problem Solving with Large Language
Models},
author = {Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and
Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
year = {2023},
eprint = {2305.10601},
archivePrefix = {arXiv},
primaryClass = {cs.CL},
}
@article{Zvyagin2022.10.10.511571,
author = {Maxim Zvyagin and Alexander Brace and Kyle Hippe and Yuntian Deng
and Bin Zhang and Cindy Orozco Bohorquez and Austin Clyde and
Bharat Kale and Danilo Perez-Rivera and Heng Ma and Carla M. Mann
and Michael Irvin and J. Gregory Pauloski and Logan Ward and
Valerie Hayot-Sasson and Murali Emani and Sam Foreman and Zhen Xie
and Diangen Lin and Maulik Shukla and Weili Nie and Josh Romero and
Christian Dallago and Arash Vahdat and Chaowei Xiao and Thomas
Gibbs and Ian Foster and James J. Davis and Michael E. Papka and
Thomas Brettin and Rick Stevens and Anima Anandkumar and Venkatram
Vishwanath and Arvind Ramanathan},
title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
evolutionary dynamics},
elocation-id = {2022.10.10.511571},
year = {2022},
doi = {10.1101/2022.10.10.511571},
publisher = {Cold Spring Harbor Laboratory},
abstract = {We seek to transform how new and emergent variants of
pandemiccausing viruses, specifically SARS-CoV-2, are identified
and classified. By adapting large language models (LLMs) for
genomic data, we build genome-scale language models (GenSLMs)
which can learn the evolutionary landscape of SARS-CoV-2 genomes.
By pretraining on over 110 million prokaryotic gene sequences and
finetuning a SARS-CoV-2-specific model on 1.5 million genomes, we
show that GenSLMs can accurately and rapidly identify variants of
concern. Thus, to our knowledge, GenSLMs represents one of the
first whole genome scale foundation models which can generalize
to other prediction tasks. We demonstrate scaling of GenSLMs on
GPU-based supercomputers and AI-hardware accelerators utilizing
1.63 Zettaflops in training runs with a sustained performance of
121 PFLOPS in mixed precision and peak of 850 PFLOPS. We present
initial scientific insights from examining GenSLMs in tracking
evolutionary dynamics of SARS-CoV-2, paving the path to realizing
this on large biological data.Competing Interest StatementThe
authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571},
eprint = {
https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571.full.pdf
},
journal = {bioRxiv},
}