personal_site/references.bib at main · saforem2/personal_site · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
@misc{price2024gencast,
      title={GenCast: Diffusion-based ensemble forecasting for medium-range weather},
      author={Ilan Price and Alvaro Sanchez-Gonzalez and Ferran Alet and Tom R. Andersson and Andrew El-Kadi and Dominic Masters and Timo Ewalds and Jacklynn Stott and Shakir Mohamed and Peter Battaglia and Remi Lam and Matthew Willson},
      year={2024},
      eprint={2312.15796},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2312.15796},
}

@misc{allen2025aurora,
      title={Aurora: Architecting Argonne's First Exascale Supercomputer for Accelerated Scientific Discovery},
      author={Benjamin S. Allen and James Anchell and Victor Anisimov and Thomas Applencourt and Abhishek Bagusetty and Ramesh Balakrishnan and Riccardo Balin and Solomon Bekele and Colleen Bertoni and Cyrus Blackworth and Renzo Bustamante and Kevin Canada and John Carrier and Christopher Chan-nui and Lance C. Cheney and Taylor Childers and Paul Coffman and Susan Coghlan and Michael D'Mello and Murali Emani and Kyle G. Felker and Sam Foreman and Olivier Franza and Longfei Gao and Marta García and María Garzarán and Balazs Gerofi and Yasaman Ghadar and Neha Gupta and Kevin Harms and Väinö Hatanpää and Brian Holland and Carissa Holohan and Brian Homerding and Khalid Hossain and Louise Huot and Huda Ibeid and Joseph A. Insley and Sai Jayanthi and Hong Jiang and Wei Jiang and Xiao-Yong Jin and Jeongnim Kim and Christopher Knight and Kalyan Kumaran and JaeHyuk Kwack and Ti Leggett and Ben Lenard and Chris Lewis and Nevin Liber and Johann Lombardi and Raymond M. Loy and Ye Luo and Bethany Lusch and Nilakantan Mahadevan and Victor A. Mateevitsi and Gordon McPheeters and Ryan Milner and Vitali A. Morozov and Servesh Muralidharan and Tom Musta and Mrigendra Nagar and Vikram Narayana and Marieme Ngom and Anthony-Trung Nguyen and Nathan Nichols and Aditya Nishtala and James C. Osborn and Michael E. Papka and Scott Parker and Saumil S. Patel and Adrian C. Pope and Sucheta Raghunanda and Esteban Rangel and Paul M. Rich and Silvio Rizzi and Kris Rowe and Varuni Sastry and Adam Scovel and Filippo Simini and Haritha Siddabathuni Som and Patrick Steinbrecher and Rick Stevens and Xinmin Tian and Peter Upton and Thomas Uram and Archit K. Vasan and Álvaro Vázquez-Mayagoitia and Kaushik Velusamy and Brice Videau and Venkatram Vishwanath and Brian Whitney and Timothy J. Williams and Michael Woodacre and Sam Zeltner and Gengbin Zheng and Huihuo Zheng},
      year={2025},
      eprint={2509.08207},
      archivePrefix={arXiv},
      primaryClass={cs.DC},
      url={https://arxiv.org/abs/2509.08207},
}

@conference{torsiello2025automated,
  author       = {Torsiello, J. and Fleming, G. T. and Foreman, S. and Jin, X.-Y. and Osborn, J. C.},
  title        = {Automated tuning for HMC mass ratios},
  annote       = {We extended previous work on tuning HMC parameters using gradient information to include Hasenbusch mass ratios. The inclusion of mass ratios adds many more parameters that need to be tuned, and also allows for lots of variations in the choice of integrator pattern. We investigate the effectiveness of automatically tuning a large number of HMC parameters and compare the optimally tuned versions over a range of integrator variants.},
  doi          = {10.22323/1.466.0052},
  url          = {https://www.osti.gov/biblio/2551828},
  journal      = {PoS},
  place        = {United States},
  organization = {Argonne, ALCF; Argonne National Laboratory (ANL), Argonne, IL (United States); Temple U.; Fermi National Accelerator Laboratory (FNAL), Batavia, IL (United States)},
  year         = {2025},
  month        = {02}
}


@misc{stock2025aeris,
    title = {AERIS: Argonne Earth Systems Model for Reliable and Skillful
             Predictions},
    author = {Väinö Hatanpää and Eugene Ku and Jason Stock and Murali Emani and
              Sam Foreman and Chunyong Jung and Sandeep Madireddy and Tung Nguyen
              and Varuni Sastry and Ray A. O. Sinurat and Sam Wheeler and Huihuo
              Zheng and Troy Arcomano and Venkatram Vishwanath and Rao Kotamarthi
              },
    year = {2025},
    eprint = {2509.13523},
    archivePrefix = {arXiv},
    primaryClass = {cs.LG},
    url = {https://arxiv.org/abs/2509.13523},
}

@misc{mccandlish2018empiricalmodellargebatchtraining,
    title = {An Empirical Model of Large-Batch Training},
    author = {Sam McCandlish and Jared Kaplan and Dario Amodei and OpenAI Dota
              Team},
    year = {2018},
    eprint = {1812.06162},
    archivePrefix = {arXiv},
    primaryClass = {cs.LG},
    url = {https://arxiv.org/abs/1812.06162},
}

@misc{gokdemir2025hiperrag,
    title = {HiPerRAG: High-Performance Retrieval Augmented Generation for
             Scientific Insights},
    author = {Ozan Gokdemir and Carlo Siebenschuh and Alexander Brace and Azton
              Wells and Brian Hsu and Kyle Hippe and Priyanka V. Setty and
              Aswathy Ajith and J. Gregory Pauloski and Varuni Sastry and Sam
              Foreman and Huihuo Zheng and Heng Ma and Bharat Kale and Nicholas
              Chia and Thomas Gibbs and Michael E. Papka and Thomas Brettin and
              Francis J. Alexander and Anima Anandkumar and Ian Foster and Rick
              Stevens and Venkatram Vishwanath and Arvind Ramanathan},
    year = {2025},
    eprint = {2505.04846},
    archivePrefix = {arXiv},
    primaryClass = {cs.IR},
}

@misc{yan2025mofa,
    title = {MOFA: Discovering Materials for Carbon Capture with a GenAI- and
             Simulation-Based Workflow},
    author = {Xiaoli Yan and Nathaniel Hudson and Hyun Park and Daniel Grzenda
              and J. Gregory Pauloski and Marcus Schwarting and Haochen Pan and
              Hassan Harb and Samuel Foreman and Chris Knight and Tom Gibbs and
              Kyle Chard and Santanu Chaudhuri and Emad Tajkhorshid and Ian
              Foster and Mohamad Moosavi and Logan Ward and E. A. Huerta},
    year = {2025},
    eprint = {2501.10651},
    archivePrefix = {arXiv},
    primaryClass = {cs.DC},
}

@article{leung2024intro,
    author = {Leung, Mary Ann and Cahill, Katharine and Hartman-Baker, Rebecca
              and Kinsley, Paige and Curfman McInnes, Lois and Parete-Koon,
              Suzanne and Ramprakash, Sreeranjani and Abraham, Subil and Beach
              Barrier, Lacy and Chen, Gladys and others},
    title = {Intro to HPC Bootcamp: Engaging New Communities Through Energy
             Justice Projects},
    annote = {The U.S. Department of Energy (DOE) is a long-standing leader in
              research and development of high-performance computing (HPC) in the
              pursuit of science. However, we face daunting challenges in
              fostering a robust and diverse HPC workforce. Basic HPC is not
              typically taught at early stages of students' academic careers, and
              the capacity and knowledge of HPC at many institutions are limited.
              Even so, such topics are prerequisites for advanced training
              programs, internships, graduate school, and ultimately for careers
              in HPC. To help address this challenge, as part of the DOE Exascale
              Computing Project's Broadening Participation Initiative, we
              recently launched the Introduction to HPC Training and Workforce
              Pipeline Program to provide accessible introductory material on HPC
              , scalable AI, and analytics. We describe the Intro to HPC Bootcamp
              , an immersive program designed to engage students from
              underrepresented groups as they learn foundational HPC skills. Here
              , the program takes a novel approach to HPC training by turning the
              traditional curriculum upside down. Instead of focusing on
              technology and its applications, the bootcamp focuses on energy
              justice to motivate the training of HPC skills through
              project-based pedagogy and real-life science stories. Additionally,
              the bootcamp prepares students for internships and future careers
              at DOE labs. The first bootcamp, hosted by the advanced computing
              facilities at Argonne, Lawrence Berkeley, and Oak Ridge National
              Labs and organized by Sustainable Horizons Institute, took place in
              August 2023.},
    doi = {10.22369/issn.2153-4136/15/1/10},
    url = {https://www.osti.gov/biblio/2447313},
    journal = {Journal of Computational Science Education},
    issn = {ISSN 2153-4136},
    number = {1},
    volume = {15},
    place = {United States},
    publisher = {Shodor Education Foundation, Inc.},
    year = {2024},
    month = {02},
}

@inproceedings{mprot-dpo2024,
    author = {Dharuman, Gautham and Hippe, Kyle and Brace, Alexander and Foreman
              , Sam and Hatanp\"{a}\"{a}, V\"{a}in\"{o} and Sastry, Varuni K. and
              Zheng, Huihuo and Ward, Logan and Muralidharan, Servesh and Vasan,
              Archit and Kale, Bharat and Mann, Carla M. and Ma, Heng and Cheng,
              Yun-Hsuan and Zamora, Yuliana and Liu, Shengchao and Xiao, Chaowei
              and Emani, Murali and Gibbs, Tom and Tatineni, Mahidhar and Canchi,
              Deepak and Mitchell, Jerome and Yamada, Koichi and Garzaran, Maria
              and Papka, Michael E. and Foster, Ian and Stevens, Rick and
              Anandkumar, Anima and Vishwanath, Venkatram and Ramanathan, Arvind},
    title = {MProt-DPO: Breaking the ExaFLOPS Barrier for Multimodal Protein
             Design Workflows with Direct Preference Optimization},
    year = {2024},
    isbn = {9798350352917},
    publisher = {IEEE Press},
    url = {https://doi.org/10.1109/SC41406.2024.00013},
    doi = {10.1109/SC41406.2024.00013},
    abstract = {We present a scalable, end-to-end workflow for protein design.
                By augmenting protein sequences with natural language
                descriptions of their biochemical properties, we train generative
                models that can be preferentially aligned with protein fitness
                landscapes. Through complex experimental- and simulation-based
                observations, we integrate these measures as preferred parameters
                for generating new protein variants and demonstrate our workflow
                on five diverse supercomputers. We achieve >1 ExaFLOPS sustained
                performance in mixed precision on each supercomputer and a
                maximum sustained performance of 4.11 ExaFLOPS and peak
                performance of 5.57 ExaFLOPS. We establish the scientific
                performance of our model on two tasks: (1) across a predetermined
                benchmark dataset of deep mutational scanning experiments to
                optimize the fitness-determining mutations in the yeast protein
                HIS7, and (2) in optimizing the design of the enzyme malate
                dehydrogenase to achieve lower activation barriers (and therefore
                increased catalytic rates) using simulation data. Our
                implementation thus sets high watermarks for multimodal protein
                design workflows.},
    booktitle = {Proceedings of the International Conference for High
                 Performance Computing, Networking, Storage, and Analysis},
    articleno = {7},
    numpages = {13},
    keywords = {AI, HPC, Large language models, protein design},
    location = {Atlanta, GA, USA},
    series = {SC '24},
}

@inproceedings{hosseini2025quality,
    title = {Quality Measures for Dynamic Graph Generative Models},
    author = {Ryien Hosseini and Filippo Simini and Venkatram Vishwanath and
              Rebecca Willett and Henry Hoffmann},
    booktitle = {The Thirteenth International Conference on Learning
                 Representations},
    year = {2025},
    url = {https://openreview.net/forum?id=8bjspmAMBk},
}

@article{deamont2014superconductivity,
    title = {Superconductivity of In and Sn Samples},
    author = {Deamont, George and Foreman, Sam},
    year = {2014},
}

@INPROCEEDINGS{foreman2018rg,
       author = {{Foreman}, Sam and {Giedt}, Joel and {Meurice}, Yannick and {Unmuth-Yockey}, Judah},
        title = "{RG-inspired machine learning for lattice field theory}",
     keywords = {High Energy Physics - Lattice, Condensed Matter - Statistical Mechanics},
    booktitle = {European Physical Journal Web of Conferences},
         year = 2018,
       series = {European Physical Journal Web of Conferences},
       volume = {175},
        month = mar,
          eid = {11025},
        pages = {11025},
          doi = {10.1051/epjconf/201817511025},
archivePrefix = {arXiv},
       eprint = {1710.02079},
 primaryClass = {hep-lat},
       adsurl = {https://ui.adsabs.harvard.edu/abs/2018EPJWC.17511025F},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}


@article{hubler2018large,
    title = {Large energy density in three-plate nanocapacitors due to Coulomb
             blockade},
    author = {Hubler, A and Foreman, S and Liu, J and Wortsmann, L},
    journal = {Journal of Applied Physics},
    volume = {123},
    number = {10},
    year = {2018},
    publisher = {AIP Publishing},
}

@article{foreman2018examples,
    title = {Examples of renormalization group transformations for image sets},
    author = {Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
              Unmuth-Yockey, Judah},
    journal = {Physical Review E},
    volume = {98},
    number = {5},
    pages = {052129},
    year = {2018},
    publisher = {American Physical Society},
}

@article{Foreman:2018qei,
    author = "Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
              Unmuth-Yockey, Judah",
    title = "{Machine learning inspired analysis of the Ising model transition}",
    doi = "10.22323/1.334.0245",
    journal = "PoS",
    volume = "LATTICE2018",
    pages = "245",
    year = "2018",
}

@inproceedings{foreman2018machine,
    title = {Machine learning inspired analysis of the Ising model transition},
    author = {Foreman, Samuel and Giedt, Joel and Meurice, Yannick and
              Unmuth-Yockey, Judah},
    booktitle = {Lattice 2018},
    year = {2018},
}

@phdthesis{foreman2019learning,
    title = {Learning Better Physics: A Machine Learning Approach to Lattice
             Gauge Theory},
    author = {Foreman, Samuel Alfred},
    year = {2019},
    school = {University of Iowa},
}

@article{foreman2020machine,
    title = {Machine Learning and Neural Networks for Field Theory},
    author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
    year = {2020},
}

@article{foreman2021hmc,
    title = {HMC with normalizing flows},
    author = {Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
              Xiao-Yong and Osborn, James C and Tomiya, Akio},
    journal = {arXiv preprint arXiv:2112.01586},
    archivePrefix = {arXiv},
    eprint = {2112.01586},
    year = {2021},
}

@article{foreman2021leapfroglayers,
    title = {LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling},
    author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
    journal = {arXiv preprint arXiv:2112.01582},
    archivePrefix = {arXiv},
    eprint = {2112.01582},
    year = {2021},
}

@article{liu2017energy,
    title = {Energy storage in quantum resonators},
    author = {Liu, Jiaqi and Hubler, Alfred W and Foreman, Samuel Alfred and Ott
              , Katharina},
    year = {2017},
}

@article{boyda2022applications,
    title = {Applications of machine learning to lattice quantum field theory},
    author = {Boyda, Denis and Cal{\`\i}, Salvatore and Foreman, Sam and Funcke,
              Lena and Hackett, Daniel C and Lin, Yin and Aarts, Gert and
              Alexandru, Andrei and Jin, Xiao-Yong and Lucini, Biagio and others},
    journal = {arXiv preprint arXiv:2202.05838},
    archivePrefix = {arXiv},
    eprint = {2202.05838},
    year = {2022},
}

@article{kronfeld2022lattice,
    title = {Lattice QCD and particle physics},
    author = {Kronfeld, Andreas S and Bhattacharya, Tanmoy and Blum, Thomas and
              Christ, Norman H and DeTar, Carleton and Detmold, William and
              Edwards, Robert and Hasenfratz, Anna and Lin, Huey-Wen and
              Mukherjee, Swagato and others},
    journal = {arXiv preprint arXiv:2207.07641},
    archivePrefix = {arXiv},
    eprint = {2207.07641},
    year = {2022},
}

@article{zvyagin2023genslms,
    title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
             evolutionary dynamics},
    author = {Zvyagin, Maxim and Brace, Alexander and Hippe, Kyle and Deng,
              Yuntian and Zhang, Bin and Bohorquez, Cindy Orozco and Clyde,
              Austin and Kale, Bharat and Perez-Rivera, Danilo and Ma, Heng and
              others},
    journal = {The International Journal of High Performance Computing
               Applications},
    volume = {37},
    number = {6},
    pages = {683--705},
    year = {2023},
    publisher = {SAGE Publications Sage UK: London, England},
}

@article{emani2023comprehensive,
    title = {A Comprehensive Performance Study of Large Language Models on Novel
             AI Accelerators},
    author = {Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen
              and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and
              Vishwanath, Venkatram and Papka, Michael E},
    journal = {arXiv preprint arXiv:2310.04607},
    eprint = {2310.04607},
    archivePrefix = {arXiv},
    year = {2023},
}

@article{song2023deepspeed4science,
    title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
             Discovery through Sophisticated AI System Technologies},
    author = {Song, Shuaiwen Leon and Kruft, Bonnie and Zhang, Minjia and Li,
              Conglong and Chen, Shiyang and Zhang, Chengming and Tanaka,
              Masahiro and Wu, Xiaoxia and Rasley, Jeff and Awan, Ammar Ahmad and
              others},
    journal = {arXiv preprint arXiv:2310.04610},
    eprint = {2310.04610},
    archivePrefix = {arXiv},
    year = {2023},
}

@inproceedings{dharuman2023protein,
    title = {Protein Generation via Genome-scale Language Models with
             Bio-physical Scoring},
    author = {Dharuman, Gautham and Ward, Logan and Ma, Heng and Setty, Priyanka
              V and Gokdemir, Ozan and Foreman, Sam and Emani, Murali and Hippe,
              Kyle and Brace, Alexander and Keipert, Kristopher and others},
    booktitle = {Proceedings of the SC'23 Workshops of The International
                 Conference on High Performance Computing, Network, Storage, and
                 Analysis},
    pages = {95--101},
    year = {2023},
}

@online{foreman2023climrr,
    author = {Foreman, Sam},
    title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
    date = {2023-08-07},
    url = {https://saforem2.github.io/climate-analysis},
    langid = {en},
}

@online{foreman2024long,
    author = {Foreman, Sam},
    title = {🚂 {Loooooooong} {Sequence} {Lengths}},
    date = {2024-02-12},
    url = {https://samforeman.me/posts/AuroraGPT/long-sequences/},
    langid = {en},
}

@misc{foreman2023mlmc,
    title = {MLMC: Machine Learning Monte Carlo for Lattice Gauge Theory},
    author = {Sam Foreman and Xiao-Yong Jin and James C. Osborn},
    year = {2023},
    eprint = {2312.08936},
    archivePrefix = {arXiv},
    primaryClass = {hep-lat},
    url = {https://arxiv.org/abs/2312.08936},
}

@article{shanahan2022snowmass,
    title = {Snowmass 2021 computational frontier CompF03 topical group report:
             Machine learning},
    author = {Shanahan, Phiala and Terao, Kazuhiro and Whiteson, Daniel},
    journal = {arXiv preprint arXiv:2209.07559},
    year = {2022},
    archivePrefix = {arXiv},
    eprint = {2209.07559},
}

@article{cheng2024thorough,
    author = {Cheng, Scott and Lin, Jun-Liang and Emani, Murali and Raskar,
              Siddhisanket and Foreman, Sam and Xie, Zhen and Vishwanath,
              Venkatram and Kandemir, Mahmut Taylan},
    title = {Thorough Characterization and Analysis of Large Transformer Model
             Training At-Scale},
    year = {2024},
    issue_date = {March 2024},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {8},
    number = {1},
    url = {https://doi.org/10.1145/3639034},
    doi = {10.1145/3639034},
    abstract = {Large transformer models have recently achieved great success
                across various domains. With a growing number of model parameters
                , a large transformer model training today typically involves
                model sharding, data parallelism, and model parallelism. Thus,
                the throughput of large-scale model training depends heavily on
                the network bandwidth since a combination of model sharding and
                multiple parallelism strategies incurs various costs. However,
                prior characterizations of transformer models on high-bandwidth
                DGX machines that use TFLOPS as a metric may not reflect the
                performance of a system with lower bandwidth. Furthermore, data
                and model parallelism reveal significantly distinct training
                profiles on different system bandwidths at scale and, thus, need
                a thorough study. In this paper, we provide a bottom-up breakdown
                of training throughput into compute and communication time, and
                quantitatively analyze their respective influences on overall
                end-to-end training scaling. Our evaluation involves an in-depth
                exploration of data parallelism, scaling up to 512 GPUs with
                limited bandwidth, and examines three model sharding strategies
                among six model sizes. We also evaluate three combinations of
                model parallelism on both high and low bandwidth supercomputing
                systems. Overall, our work provides a broader perspective on
                large-scale transformer model training, and our analysis and
                evaluation yield practical insights for predicting training
                scaling, shaping the future development of supercomputing system
                design.},
    journal = {Proc. ACM Meas. Anal. Comput. Syst.},
    month = feb,
    articleno = {8},
    numpages = {25},
}

@article{leung2024communities,
    title = {Communities Through Energy Justice Projects},
    author = {Leung, Mary Ann and Cahill, Katharine and Hartman-Baker, Rebecca
              and Kinsley, Paige and McInnes, Lois Curfman and Parete-Koon,
              Suzanne and Abraham, Subil and Barrier, Lacy Beach and Chen, Gladys
              and DeStefano, Lizanne and others},
    journal = {Journal of Computational Science},
    volume = {15},
    number = {1},
    year = {2024},
}

@inproceedings{arcomano2023applications,
    title = {Applications of a Foundation Model Approach for Weather and Climate
             },
    author = {Arcomano, Troy and Wikner, Alexander and Maulik, Romit and
              Kotamarthi, Veerabhadra Rao and Foreman, Sam},
    booktitle = {AGU Fall Meeting Abstracts},
    volume = {2023},
    pages = {GC22C--06},
    year = {2023},
}

@inproceedings{emani2024toward,
    title = {Toward a holistic performance evaluation of large language models
             across diverse ai accelerators},
    author = {Emani, Murali and Foreman, Sam and Sastry, Varuni and Xie, Zhen
              and Raskar, Siddhisanket and Arnold, William and Thakur, Rajeev and
              Vishwanath, Venkatram and Papka, Michael E and Shanmugavelu, Sanjif
              and others},
    booktitle = {2024 IEEE International Parallel and Distributed Processing
                 Symposium Workshops (IPDPSW)},
    pages = {1--10},
    year = {2024},
    organization = {IEEE},
}

@article{parete2024intro,
    title = {Intro to HPC Bootcamp: Engaging New Communities Through Energy
             Justice Projects},
    author = {Parete-Koon, Suzanne and Sandoval, Michael and Leland, Kellen and
              Abraham, Subil and Leung, Mary Ann and Hartman-Baker, Rebecca and
              Kinsley, Paige and McInnes, Lois and Ramprakash, Sreeranjani and
              Beach Barrier, Lacy and others},
    journal = {Journal of Computational Science Education},
    volume = {15},
    number = {1},
    year = {2024},
    publisher = {Oak Ridge National Laboratory (ORNL), Oak Ridge, TN (United
                 States)},
}

@inproceedings{dharuman2024mprot,
    title = {MProt-DPO: Breaking the ExaFLOPS Barrier for Multimodal Protein
             Design Workflows with Direct Preference Optimization},
    author = {Dharuman, Gautham and Hippe, Kyle and Brace, Alexander and Foreman
              , Sam and Hatanp{\"a}{\"a}, V{\"a}in{\"a} and Sastry, Varuni K and
              Zheng, Huihuo and Ward, Logan and Muralidharan, Servesh and Vasan,
              Archit and others},
    booktitle = {2024 SC24: International Conference for High Performance
                 Computing, Networking, Storage and Analysis SC},
    pages = {74--86},
    year = {2024},
    organization = {IEEE Computer Society},
}

@misc{song2023ds4sci,
    title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
             Discovery through Sophisticated AI System Technologies},
    author = {Shuaiwen Leon Song and Bonnie Kruft and Minjia Zhang and Conglong
              Li and Shiyang Chen and Chengming Zhang and Masahiro Tanaka and
              Xiaoxia Wu and Jeff Rasley and Ammar Ahmad Awan and Connor Holmes
              and Martin Cai and Adam Ghanem and Zhongzhu Zhou and Yuxiong He and
              Pete Luferenko and Divya Kumar and Jonathan Weyn and Ruixiong Zhang
              and Sylwester Klocek and Volodymyr Vragov and Mohammed AlQuraishi
              and Gustaf Ahdritz and Christina Floristean and Cristina Negri and
              Rao Kotamarthi and Venkatram Vishwanath and Arvind Ramanathan and
              Sam Foreman and Kyle Hippe and Troy Arcomano and Romit Maulik and
              Maxim Zvyagin and Alexander Brace and Bin Zhang and Cindy Orozco
              Bohorquez and Austin Clyde and Bharat Kale and Danilo Perez-Rivera
              and Heng Ma and Carla M. Mann and Michael Irvin and J. Gregory
              Pauloski and Logan Ward and Valerie Hayot and Murali Emani and Zhen
              Xie and Diangen Lin and Maulik Shukla and Ian Foster and James J.
              Davis and Michael E. Papka and Thomas Brettin and Prasanna
              Balaprakash and Gina Tourassi and John Gounley and Heidi Hanson and
              Thomas E Potok and Massimiliano Lupo Pasini and Kate Evans and Dan
              Lu and Dalton Lunga and Junqi Yin and Sajal Dash and Feiyi Wang and
              Mallikarjun Shankar and Isaac Lyngaas and Xiao Wang and Guojing
              Cong and Pei Zhang and Ming Fan and Siyan Liu and Adolfy Hoisie and
              Shinjae Yoo and Yihui Ren and William Tang and Kyle Felker and
              Alexey Svyatkovskiy and Hang Liu and Ashwin Aji and Angela Dalton
              and Michael Schulte and Karl Schulz and Yuntian Deng and Weili Nie
              and Josh Romero and Christian Dallago and Arash Vahdat and Chaowei
              Xiao and Thomas Gibbs and Anima Anandkumar and Rick Stevens},
    year = {2023},
    eprint = {2310.04610},
    archivePrefix = {arXiv},
    primaryClass = {cs.AI},
    url = {https://arxiv.org/abs/2310.04610},
}

@misc{wei2022emergentabilitieslargelanguage,
    title = {Emergent Abilities of Large Language Models},
    author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and
              Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten
              Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori
              Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and
              William Fedus},
    year = {2022},
    eprint = {2206.07682},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
    url = {https://arxiv.org/abs/2206.07682},
}

@misc{Burdi:2023climrr,
    title = {The Climate Risk & Resilience Portal (ClimRR) Metadata and Data
             Dictionary},
    author = "Burdi, C. and Branham, J., Wall. T",
    year = "2023",
    note = {Available at \url{
            https://anl.app.box.com/s/hmkkgkrkzxxocfe9kpgrzk2gfc4gizp8/file/1055145398460
            }},
    url = {https://dub.sh/ClimRR-Metadata},
}

@misc{wittig2023progress,
    title = {Progress on $(g-2)_\mu$ from Lattice QCD},
    author = {Hartmut Wittig},
    year = {2023},
    eprint = {2306.04165},
    archivePrefix = {arXiv},
    primaryClass = {hep-ph},
}

@article{Duane:1987de,
    author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
    title = "{Hybrid Monte Carlo}",
    doi = "10.1016/0370-2693(87)91197-X",
    journal = "Phys. Lett. B",
    volume = "195",
    pages = "216--222",
    year = "1987",
}

@article{Shanahan:2022ifi,
    author = "Shanahan, Phiala and others",
    title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
             Machine Learning}",
    eprint = "2209.07559",
    archivePrefix = "arXiv",
    primaryClass = "physics.comp-ph",
    reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
    month = "9",
    year = "2022",
}

@inproceedings{Boyda:2022nmh,
    author = "Boyda, Denis and others",
    title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
    booktitle = "{Snowmass 2021}",
    eprint = "2202.05838",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    reportNumber = "MIT-CTP/5405",
    month = "2",
    year = "2022",
}

@article{Foreman:2021ljl,
    author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
              Xiao-Yong and Osborn, James C. and Tomiya, Akio",
    title = "{HMC with Normalizing Flows}",
    eprint = "2112.01586",
    archivePrefix = "arXiv",
    primaryClass = "cs.LG",
    doi = "10.22323/1.396.0073",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "073",
    year = "2022",
}

@article{Foreman:2021rhs,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling}",
    eprint = "2112.01582",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    doi = "10.22323/1.396.0508",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "508",
    year = "2022",
}

@inproceedings{Foreman:2021ixr,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{Deep Learning Hamiltonian Monte Carlo}",
    booktitle = "{9th International Conference on Learning Representations}",
    eprint = "2105.03418",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    month = "5",
    year = "2021",
}

@misc{foreman2021deep,
    title = {Deep Learning Hamiltonian Monte Carlo},
    author = {Foreman, Sam and Jin, Xiao-Yong and Osborn James C.},
    year = {2021},
    eprint = {2105.03418},
    archivePrefix = {arXiv},
    primaryClass = {hep-lat},
}

@article{foreman2021leapfrog,
    title = {LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling},
    author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James C},
    journal = {arXiv preprint arXiv:2112.01582},
    year = {2021},
}

@online{foreman2023climate,
    author = {Foreman, Sam},
    title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
    date = {2023-08-07},
    url = {https://saforem2.github.io/climate-analysis},
    langid = {en},
}

@misc{foreman2023-l2hmcqcd,
    author = {Foreman, Sam},
    date = {2023-08-19},
    url = {https://saforem2.github.io/l2hmc-qcd},
    langid = {en},
}

@inproceedings{2022slft.confE.508F,
    author = {{Foreman}, S. and {Jin}, X. y. and {Osborn}, J.},
    title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling}",
    keywords = {High Energy Physics - Lattice, Computer Science - Machine
                Learning},
    booktitle = {The 38th International Symposium on Lattice Field Theory},
    year = 2022,
    month = jul,
    eid = {508},
    pages = {508},
    doi = {10.22323/1.396.0508},
    archivePrefix = {arXiv},
    eprint = {2112.01582},
    primaryClass = {hep-lat},
    adsurl = {https://ui.adsabs.harvard.edu/abs/2022slft.confE.508F},
    adsnote = {Provided by the SAO/NASA Astrophysics Data System},
}

@misc{Montgomery_2023,
    title = {Mastering language models},
    url = {https://towardsdatascience.com/mastering-language-models-32e1d891511a
           },
    journal = {Medium},
    publisher = {Towards Data Science},
    author = {Montgomery, Samuel},
    year = {2023},
    month = {Oct},
}

@misc{yang2023harnessing,
    title = {Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and
             Beyond},
    author = {Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han
              and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
    year = {2023},
    eprint = {2304.13712},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@article{Popel_2018,
    doi = {10.2478/pralin-2018-0002},
    url = {https://doi.org/10.2478%2Fpralin-2018-0002},
    year = 2018,
    month = {apr},
    publisher = {Charles University in Prague, Karolinum Press},
    volume = {110},
    number = {1},
    pages = {43--70},
    author = {Martin Popel and Ond{\v{r}}ej Bojar},
    title = {Training Tips for the Transformer Model},
    journal = {The Prague Bulletin of Mathematical Linguistics},
}

@misc{vaswani2017attention,
    title = {Attention Is All You Need},
    author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob
              Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and
              Illia Polosukhin},
    year = {2017},
    eprint = {1706.03762},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@misc{yao2023tree,
    title = {Tree of Thoughts: Deliberate Problem Solving with Large Language
             Models},
    author = {Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and
              Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
    year = {2023},
    eprint = {2305.10601},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@article{Zvyagin2022.10.10.511571,
    author = {Maxim Zvyagin and Alexander Brace and Kyle Hippe and Yuntian Deng
              and Bin Zhang and Cindy Orozco Bohorquez and Austin Clyde and
              Bharat Kale and Danilo Perez-Rivera and Heng Ma and Carla M. Mann
              and Michael Irvin and J. Gregory Pauloski and Logan Ward and
              Valerie Hayot-Sasson and Murali Emani and Sam Foreman and Zhen Xie
              and Diangen Lin and Maulik Shukla and Weili Nie and Josh Romero and
              Christian Dallago and Arash Vahdat and Chaowei Xiao and Thomas
              Gibbs and Ian Foster and James J. Davis and Michael E. Papka and
              Thomas Brettin and Rick Stevens and Anima Anandkumar and Venkatram
              Vishwanath and Arvind Ramanathan},
    title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
             evolutionary dynamics},
    elocation-id = {2022.10.10.511571},
    year = {2022},
    doi = {10.1101/2022.10.10.511571},
    publisher = {Cold Spring Harbor Laboratory},
    abstract = {We seek to transform how new and emergent variants of
                pandemiccausing viruses, specifically SARS-CoV-2, are identified
                and classified. By adapting large language models (LLMs) for
                genomic data, we build genome-scale language models (GenSLMs)
                which can learn the evolutionary landscape of SARS-CoV-2 genomes.
                By pretraining on over 110 million prokaryotic gene sequences and
                finetuning a SARS-CoV-2-specific model on 1.5 million genomes, we
                show that GenSLMs can accurately and rapidly identify variants of
                concern. Thus, to our knowledge, GenSLMs represents one of the
                first whole genome scale foundation models which can generalize
                to other prediction tasks. We demonstrate scaling of GenSLMs on
                GPU-based supercomputers and AI-hardware accelerators utilizing
                1.63 Zettaflops in training runs with a sustained performance of
                121 PFLOPS in mixed precision and peak of 850 PFLOPS. We present
                initial scientific insights from examining GenSLMs in tracking
                evolutionary dynamics of SARS-CoV-2, paving the path to realizing
                this on large biological data.Competing Interest StatementThe
                authors have declared no competing interest.},
    URL = {https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571},
    eprint = {
              https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571.full.pdf
              },
    journal = {bioRxiv},
}