Merge branch 'main' into YOURBRANCH

alcf-haritha · web-flow · commit e819e5a5a752 · 2026-02-19T16:03:18.000-06:00
diff --git a/.github/workflows/summarize-commits.yml b/.github/workflows/summarize-commits.yml
@@ -0,0 +1,40 @@
+name: Export All Commits
+
+on:
+  workflow_dispatch: # Allows you to run this manually from the Actions tab
+
+jobs:
+  get-history:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Crucial: 0 fetches all history for all branches and tags
+      
+      - name: Extract Log for Each File
+        run: |
+          # Find all files (excluding the .git folder) and run git log on each
+          find . -type f -name "*.md" -not -path '*/.*' | while read -r file; do
+              git log --stat --pretty=format:"__|__$file||%H||%s||%an||%ae||%ar||%ad||%B" -- "$file" >> commit_history.txt
+              echo "_/^\_" >> commit_history.txt
+          done && [ -s commit_history.txt ] && sed -i '$d' commit_history.txt
+
+      - name: Install dependencies
+        run: |
+          python3 -m pip install --upgrade pip
+          pip install pandas
+          
+      - name: Process with Python
+        run: |
+          # Set PYTHONPATH to the current directory so imports work correctly
+          export PYTHONPATH=$PYTHONPATH:$(pwd)
+          python3 scripts/summarize_commits.py
+          
+      - name: Upload Results
+        uses: actions/upload-artifact@v4
+        with:
+          name: repository-commit-log
+          path: |
+            commit_history.txt
+            commits_summary.csv
diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS
@@ -20,8 +20,8 @@
 
 
 # Machine-specific documentation
-ai-testbed/                        @vksastry @wcarnold1010
-ai-testbed/cerebras/               @sraskar
+**/ai-testbed/                     @vksastry @wcarnold1010
+#**/ai-testbed/cerebras/           @sraskar
 # aurora/
 # polaris/
 # sophia/
@@ -49,37 +49,35 @@ aurora/aurora-pe.md                @koysean
 **/example-program-makefile.md      @cjknight
 **/example-program*.md              @cjknight
 
-# Data Science frameworks across all machines
-**/frameworks/pytorch.md            @khossain4337 @FilippoSimini @saforem2
-**/frameworks/tensorflow.md         @zhenghh04 @khossain4337
-**/frameworks/deepspeed.md          @saforem2 @hatanp
-**/frameworks/megatron-deepspeed.md @saforem2 @hatanp
-**/frameworks/jax.md                @khossain4337
-**/frameworks/scikit-learn.md       @BethanyL
-**onedal.md                         @BethanyL
-**/frameworks/dask.md               @okaforn @FilippoSimini
-**/frameworks/pyg.md                @FilippoSimini
-**/frameworks/gpytorch.md           @mngom2
-**/frameworks/oneCCL.md             @kaushikvelusamy @khossain4337 @hatanp
-**/frameworks/libtorch.md           @rickybalin
-
-# Applications across all machines
-**/applications/gpt-neox.md         @saforem2 @hatanp
-**/applications/megatron-deepspeed.md @saforem2 @hatanp
+# Data Science frameworks and applications across all machines
+**/data-science/**/pytorch.md            @khossain4337 @FilippoSimini @saforem2
+**/data-science/**/tensorflow.md         @zhenghh04 @khossain4337
+**/data-science/**/deepspeed.md          @saforem2 @hatanp
+**/data-science/**/megatron-deepspeed.md @saforem2 @hatanp
+**/data-science/**/gpt-neox.md           @saforem2 @hatanp
+**/data-science/**/jax.md                @khossain4337
+**/data-science/**/scikit-learn.md       @BethanyL
+**onedal.md                              @BethanyL
+**/data-science/**/dask.md               @okaforn @FilippoSimini
+**/data-science/**/pyg.md                @FilippoSimini
+**/data-science/**/gpytorch.md           @FilippoSimini
+**/data-science/**/oneCCL.md             @kaushikvelusamy @khossain4337 @hatanp
+**/data-science/**/libtorch.md           @rickybalin
+**/data-science/**/fine-tune-LLM-with-Autotrain.md  @saforem2
 
 # Python documentation across all machines
-**/python.md                        @felker @khossain4337
-**/jupyter*			    @keceli
+**/data-science/**/python.md             @felker @khossain4337
+**/data-science/**/jupyter*			         @keceli
 
 # Julia documentation
 #**/julia.md                         @michel2323
 
 # Inference documentation
-**/inference/                       @rickybalin
-**/inference/vllm.md                @sraskar
+**/data-science/**/inference/            @rickybalin
+**/data-science/**/inference/vllm.md     @FilippoSimini @khossain4337
 
 # Profiling documentation
-**/profiling_dl.md                  @zhenghh04 @khossain4337
+**/data-science/**/profiling_dl.md  @zhenghh04 @khossain4337
 **/performance-tools/vtune.md       @jkwack
 
 # Libraries documentation
@@ -91,13 +89,13 @@ aurora/aurora-pe.md                @koysean
 **/visualization/**                 @srizzi88
 
 # Data management across all machines
-**/data-management/daos/            @kaushikvelusamy @zhenghh04 @kevin-harms
-**/data-management/lustre/          @kaushikvelusamy @zhenghh04 @kevin-harms
-**/data-management/copper/          @kaushikvelusamy @kevin-harms
+**/daos*.md                         @kaushikvelusamy @zhenghh04 @kevin-harms
+**/data-management/**/lustre/       @kaushikvelusamy @zhenghh04 @kevin-harms
+**/data-management/**/copper/       @kaushikvelusamy @kevin-harms
 **/filesystem-and-storage/          @kevin-harms
 
 # All container documentation
-###**/containers/                    
+#### **/containers/                      @bcote-anl  # ?
 
 # All debugger documentation
 **/debugging*/*                     @jkwack
@@ -116,14 +114,16 @@ aurora/aurora-pe.md                @koysean
 # Services documentation
 ###**/services/                          @???
 **/services/gitlab-ci.md            @peterupton @thilinarmtb
+**/services/inference-endpoints.md  @bcote-anl @vksastry
 
 # Workflows
 ####**/workflows/                       @???
-**/smartsim.md                      @rickybalin
-###**/adios.md                      @rickybalin
-*adios*                      @rickybalin   # TOOD: how fleixlbe is single-glob pattern matching?
-**/workflows/parsl.md               @cms21
-#**/workflows/libensemble.md        @shuds13
+**/workflows/**/smartsim.md         @rickybalin
+**/workflows/**/adios.md            @rickybalin
+**/workflows/**/parsl.md            @cms21
+**/workflows/**/balsam.md           @cms21
+**/workflows/**/dragon.md           @cms21
+**/workflows/**/libensemble.md      @shuds13
 
 # User Support documentation
 ###**/support/                @alcf-haritha @jfrancis-anl
diff --git a/docs/aurora/system-updates.md b/docs/aurora/system-updates.md
@@ -1,4 +1,73 @@
 # Aurora System Updates
+## 2026-02-23
+We have a **temporary** test queue `next-eval` (open to all users) with upto 2,600 nodes that has a new compute image. **UANs aurora-uan-0007 and aurora-uan-008 have the new software image and can be used for compiling.** Please prioritize use of `next-eval` queue for testing and evaluation. See [Running jobs on Aurora](running-jobs-aurora.md) for queue policies. The new image includes updates to Intel's User (UMD) and Kernel Mode Drivers (KMD) (Agama 1146.40 / LTS release 2523.40), and OneAPI 2025.3.1.
+
+Details of the full change log are below (**next-eval test queue only**):
+
+### OS Image
+ - Intel KMD/UMD 1146.40 / LTS 2523.40
+ - Intel SEPDK KMDs from OneAPI 2025.3.0
+ - Lustre Client cray-2.15.B23
+ - Geopm 3.2.2
+ - DAOS Client 2.6.4-11
+ - /daos is now a symlink to /tmp for use with DAOS dfuse mounts
+ - Legacy AuroraSDK / PE versions dropped:
+   - 24.347.0 (OneAPI 2025.0.5)
+   - 24.180.3 (OneAPI 2024.2.1
+
+### PE 26.26.0
+- OneAPI 2025.3.1
+  - [oneAPI Base Toolkit 2025.3.1](running-jobs-aurora.md)
+  - [oneAPI HPC Toolkit 2025.3.1](running-jobs-aurora.md)
+  - [Intel Deep Learning Essentials 2025.3.2](running-jobs-aurora.md)
+  - [Intel Compiler 2025.3.2](running-jobs-aurora.md)
+  - See [Known Issues](https://docs.alcf.anl.gov/aurora/bugs-table/)
+- Spack
+    - Spack 1.1 update with backported patches for externals and OneAPI
+    - Base Python updated to 3.12.12
+- Spack - Packages (limited to oneapi dependencies)
+    - amrex - 26.02
+    - ginkgo - 1.11
+    - blaspp, lapackpp - 2025.05.28
+    - hdf5 - 2.0.0, 1.14.6
+    - kokkos - 5.0.1, 4.7.02
+    - umpire - 2025.12.0 +sycl
+    - raja - 2025.12.0
+    - petsc - 3.24.3 +sycl
+    - hypre - 3.0.0 +sycl
+    - geopm - 3.2.2
+    - boost - 1.88
+    - py-torch - 2.10.0 and deps
+    - xpu-smi - 1.2.42, 1.3.5
+    - warpx - 26.02
+- Forge
+    - 25.1.1
+- MPICH
+    - aurora_test branch @ [3c70a61](https://github.com/pmodels/mpich/compare/6037a7a..3c70a61)
+    - Libfabric optimization variables set by default according to HPE's SHS guide
+       - Can be checked by `ml show mpich`
+    - New pipeline algorithm disabled by default
+- Frameworks/2025.3.1
+    - Major packages:
+        - torch 2.10.0a0+git449b176
+        - torchao 0.15.0+git9338966da
+        - torchdata 0.11.0+377e64c
+        - torchvision 0.25.0+8ac84ee
+        - torchcomms 0.1.0
+        - intel-extension-for-pytorch 2.10.10+gitd0f992f
+        - pytorch-triton-xpu 3.6.0+git225cdbde
+        - vllm 0.15.0+xpu
+        - scikit_learn_intelex-20260205.124755 (tag: 2025.10.1)
+        - dpnp 0.19.1
+        - dpctl 0.21.1
+    - Major Change:
+        - `ONEAPI_DEVICE_SELECTOR="opencl:gpu;level_zero:gpu"`
+            - Exposing both to ensure functionality of `torch` , `triton-xpu` , `vLLM`, `ray` and `dpctl` 
+            - We warn the users to this change upon loading the module
+            - Request switch to `ONEAPI_DEVICE_SELECTOR="level_zero:gpu"` and report unusual behaviors
+            - **Temporary**, with proposed fixes included `triton-xpu` release we will switch back to `ONEAPI_DEVICE_SELECTOR="level_zero:gpu"`
+
+
 ## 2026-02-02
 Flare is scheduled to be upgraded Feb 2 - Feb 5, 2026 resulting in Aurora being unavailable during this time. 
 
diff --git a/scripts/summarize_commits.py b/scripts/summarize_commits.py
@@ -0,0 +1,98 @@
+import pandas as pd
+import re
+import datetime
+
+
+base_github_url = 'https://github.com/argonne-lcf/user-guides/blob/main/docs/'
+base_doc_url = 'https://docs.alcf.anl.gov/'
+
+
+def split_git_log(l, delim='\|\|', file_path=None):
+    splits = l.split(delim)
+    fullmsg_stat = re.split(r'\n [^\n]* \| ', splits[-1])
+    try:
+        fullmsg, stat = fullmsg_stat[0].strip(), int(fullmsg_stat[-1].split(' ')[0])
+    except (IndexError, ValueError) as e:
+        fullmsg, stat = fullmsg_stat, 0
+    return splits[:-1] + [fullmsg, stat]
+
+
+def create_df_commits(commits_output, d='\|\|', s='__\|__'):
+    # Extract author names from commit lines
+    commits_parsed = [split_git_log(l, delim=d) for l in commits_output.split(s)[1:]]
+    columns = ['file_path', 'commit_hash', 'message_title', 'author_name', 'author_email', 'relative_date', 'commit_date', 'full_message', 'num_edits']
+    df_comm = pd.DataFrame(commits_parsed, columns=columns)
+    df_comm['commit_hash'] = df_comm['commit_hash'].str[:7]
+    df_comm['commit_date'] = pd.to_datetime(df_comm['commit_date'], format="%a %b %d %H:%M:%S %Y %z", utc=True).dt.tz_localize(None)
+    return df_comm
+
+
+def process_file_commits(df_comm):
+    df = pd.DataFrame([[]])
+
+    # edits and activity
+    edits_total, edits_this_year = agg_col_ever_and_this_year(df_comm, col='num_edits', func='sum')
+    df['edits_total'] = edits_total
+    df['edits_this_year'] = edits_this_year
+    commits_total, commits_this_year = agg_col_ever_and_this_year(df_comm, col='num_edits', func='count')
+    df['commits_total'] = commits_total
+    df['commits_this_year'] = commits_this_year
+    df['date_last_commit'] = df_comm['commit_date'].max()
+
+    # authors
+    user_col = 'author_email'
+    top4_authors_w_most_edits = sort_authors_by_number_of_edits(df_comm, in_the_last_year=False, user_col=user_col)[:4].tolist()
+    df["top4_authors_w_most_edits"] = [top4_authors_w_most_edits]
+    df["author_w_most_edits"] = top4_authors_w_most_edits[0]
+    try:
+        author_w_most_edits = sort_authors_by_number_of_edits(df_comm, in_the_last_year=True, user_col=user_col)[0]
+    except IndexError:
+        author_w_most_edits = None
+    df["author_w_most_edits_this_year"] = author_w_most_edits
+
+    # system and page name
+    file_path = df_comm['file_path'].values[0].removeprefix('./docs/')
+    df['system'] = file_path.split('/')[0]
+    df['name'] = file_path.split('/')[-1]
+    # add github url
+    df['github_url'] = base_github_url + file_path
+    df['url'] = df['github_url'].str.replace(base_github_url, base_doc_url)
+    df['url'] = df['url'].str.replace(".md$", "/", regex=True).values
+    return df
+
+
+def sort_authors_by_number_of_edits(df_comm, in_the_last_year=True, user_col='author_name'):
+    c = df_comm
+    if in_the_last_year:
+        one_year_ago = datetime.datetime.today() - datetime.timedelta(days=365)
+        authors_by_num_edits = c[c['commit_date'] > one_year_ago].groupby(user_col)['num_edits'].sum()
+    else:
+        authors_by_num_edits = c.groupby(user_col)['num_edits'].sum()
+    return authors_by_num_edits.sort_values(ascending=False).index.values
+
+
+def agg_col_ever_and_this_year(df_comm, col, func='sum'):
+    c = df_comm
+    one_year_ago = datetime.datetime.today() - datetime.timedelta(days=365)
+    tot_this_year = c[c['commit_date'] > one_year_ago][col].agg(func)
+    tot_ever = c[col].agg(func)
+    return tot_ever, tot_this_year
+
+
+def main(commit_history_path, d='||', s='__|__', file_delim='_/^\_'):
+    with open(commit_history_path, 'r', encoding='utf-8') as file:
+        commit_history = file.read().split(file_delim)
+
+    log_entries = []
+    for commits_output in commit_history:
+        df_comm = create_df_commits(commits_output, d=d, s=s)
+        df = process_file_commits(df_comm)
+        log_entries.append(df)
+
+    _df = pd.concat(log_entries)
+    _df = _df.sort_values(['system', 'date_last_commit'], ascending=[True, False]).reset_index(drop=True)
+    _df.to_csv('commits_summary.csv', index=False)
+
+
+if __name__ == '__main__':
+    main('commit_history.txt')