Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 19 additions & 32 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This is a basic workflow to help you get started with Actions

#Turing commands are commented out , we regress QV100,RTX3070,A100
#Turing commands are commented out , we regress QV100,A100

name: CI Runs

Expand All @@ -15,6 +15,7 @@ on:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
PR_HEAD_REPO_FULL_NAME: ${{ github.event.pull_request.head.repo.full_name || github.repository }}

jobs:
check-format:
Expand Down Expand Up @@ -64,7 +65,7 @@ jobs:

# Try to checkout the same branch from the same owner's fork first
if [[ ${{ github.event_name }} == 'pull_request' ]]; then
current_owner=$(echo ${{ github.event.pull_request.head.repo.full_name }} | cut -d'/' -f1)
current_owner=$(echo $PR_HEAD_REPO_FULL_NAME | cut -d'/' -f1)
else
current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
fi
Expand Down Expand Up @@ -102,25 +103,19 @@ jobs:
./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100//hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}

./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}

./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}

./util/job_launching/run_simulations.py -B mlperf_inference -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/mlperf_rtx3070/traces/device-0/12.8/ -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}

./util/job_launching/monitor_func_test.py -v --sleep_time 300 -s stats-per-app-sass.csv -N sass-short-${{ github.run_number }}"_"${{ github.run_attempt}}
- name: Archive Stats
run: |
source ./env-setup/12.8_env_setup.sh
rm -rf ./statistics-archive
git clone --quiet [email protected]:accel-sim/statistics-archive.git
BRANCH_NAME=${{ github.repository }}/$BRANCH_NAME
BRANCH_NAME=$PR_HEAD_REPO_FULL_NAME/$BRANCH_NAME
# either create a new branch or check it out if it already exists
git -C ./statistics-archive checkout $BRANCH_NAME 2>/dev/null || git -C ./statistics-archive checkout -b $BRANCH_NAME
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C QV100-SASS -A | tee v100-ubench-sass-local.csv
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX3070-SASS -A | tee ampere-ubench-sass-local.csv
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C A100-SASS -A | tee ampere-a100-ubench-sass-local.csv

mkdir -p statistics-archive/ubench/
Expand All @@ -129,25 +124,19 @@ jobs:
| tee v100-ubench-sass.csv && mv v100-ubench-sass.csv ./statistics-archive/ubench/
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-local.csv \
# | tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass.csv,ampere-ubench-sass-local.csv \
| tee ampere-ubench-sass.csv && mv ampere-ubench-sass.csv ./statistics-archive/ubench/
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass.csv,ampere-ubench-sass-local.csv \
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass.csv,ampere-a100-ubench-sass-local.csv \
| tee ampere-a100-ubench-sass.csv && mv ampere-a100-ubench-sass.csv ./statistics-archive/ubench/
# Next we merge the latest run with the current run (used for correlation plots) then archive the current run as the new latest for the next time this action occurs
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass-latest.csv,v100-ubench-sass-local.csv \
| tee v100-ubench-sass-latest2.csv && mv v100-ubench-sass-local.csv ./statistics-archive/ubench/v100-ubench-sass-latest.csv
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-local.csv \
# | tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-local.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass-latest.csv,ampere-ubench-sass-local.csv \
| tee ampere-ubench-sass-latest2.csv && mv ampere-ubench-sass-local.csv ./statistics-archive/ubench/ampere-ubench-sass-latest.csv
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass-latest.csv,ampere-a100-ubench-sass-local.csv \
| tee ampere-a100-ubench-sass-latest2.csv && mv ampere-a100-ubench-sass-local.csv ./statistics-archive/ubench/ampere-a100-ubench-sass-latest.csv
if [[ $GITHUB_EVENT_NAME == 'push' ]]; then
git -C ./statistics-archive add --all
git -C ./statistics-archive commit \
-m "CI automated checkin $BRANCH_NAME Build:${{ github.run_number }}"_"${{ github.run_attempt}}" || echo "No Changes."
git -C ./statistics-archive push -u origin $BRANCH_NAME
fi
git -C ./statistics-archive add --all
git -C ./statistics-archive commit \
-m "CI automated checkin $BRANCH_NAME Build:${{ github.run_number }}"_"${{ github.run_attempt}}" || echo "No Changes."
git -C ./statistics-archive push -u origin $BRANCH_NAME
- name: Correlate Ubench
run: |
source ./env-setup/12.8_env_setup.sh
Expand All @@ -157,20 +146,18 @@ jobs:
mv ./util/plotting/correl-html/combined_per_kernel.html ./statistics-archive/ubench/v100-combined_per_kernel.html
mv ./util/plotting/correl-html/combined_per_app.html ./statistics-archive/ubench/v100-combined_per_app.html

./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/device-0/12.8/ | tee ampere-ubench-correl.txt

./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/device-0/12.8/ | tee ampere-a100-ubench-correl.txt
mv ./util/plotting/correl-html/combined_per_kernel.html ./statistics-archive/ubench/ampere-a100-combined_per_kernel.html
mv ./util/plotting/correl-html/combined_per_app.html ./statistics-archive/ubench/ampere-a100-combined_per_app.html

ssh ghci@tgrogers-pc01 mkdir -p /home/ghci/accel-sim/correl/git_$BRANCH_NAME"_"${{ github.run_number }}"_"${{ github.run_attempt}}/
rsync --delete -r ./util/plotting/correl-html/ ghci@tgrogers-pc01:/home/ghci/accel-sim/correl/git_$BRANCH_NAME"_"${{ github.run_number }}"_"${{ github.run_attempt}}/
git -C ./statistics-archive reset --soft HEAD~1
git -C ./statistics-archive add --all
git -C ./statistics-archive commit \
-m "CI automated checkin $PR_HEAD_REPO_FULL_NAME/$BRANCH_NAME Build:${{ github.run_number }}"_"${{ github.run_attempt}}" || echo "No Changes."
git -C ./statistics-archive push -f -u origin $PR_HEAD_REPO_FULL_NAME/$BRANCH_NAME
if [[ ${{ github.event_name }} == 'push' ]]; then
git -C ./statistics-archive reset --soft HEAD~1
git -C ./statistics-archive add --all
git -C ./statistics-archive commit \
-m "CI automated checkin ${{ github.repository }}/$BRANCH_NAME Build:${{ github.run_number }}"_"${{ github.run_attempt}}" || echo "No Changes."
git -C ./statistics-archive push -f -u origin ${{ github.repository }}/$BRANCH_NAME

rm -rf /scratch/tgrogers-disk01/a/tgrghci/ci/lastSuccess/${{ github.repository }}/$BRANCH_NAME
mkdir -p /scratch/tgrogers-disk01/a/tgrghci/ci/lastSuccess/${{ github.repository }}/$BRANCH_NAME
Expand Down Expand Up @@ -202,7 +189,7 @@ jobs:

# Try to checkout the same branch from the same owner's fork first
if [[ ${{ github.event_name }} == 'pull_request' ]]; then
current_owner=$(echo ${{ github.event.pull_request.head.repo.full_name }} | cut -d'/' -f1)
current_owner=$(echo $PR_HEAD_REPO_FULL_NAME | cut -d'/' -f1)
else
current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
fi
Expand Down Expand Up @@ -245,8 +232,8 @@ jobs:
srun --time=8:00:00 -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src
./gpu-app-collection/get_regression_data.sh

#./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-${{ github.run_number }}_${{ github.run_attempt}}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX3070-PTX,A100-PTX -N short-ptx-${{ github.run_number }}_${{ github.run_attempt}}
#./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,A100-PTX -N short-ptx-${{ github.run_number }}_${{ github.run_attempt}}
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,A100-PTX -N short-ptx-${{ github.run_number }}_${{ github.run_attempt}}

./util/job_launching/monitor_func_test.py -v --sleep_time 300 -s stats-per-app-ptx.csv -N short-ptx-${{ github.run_number }}_${{ github.run_attempt}}
Tracer-Tool:
Expand Down Expand Up @@ -274,7 +261,7 @@ jobs:

# Try to checkout the same branch from the same owner's fork first
if [[ ${{ github.event_name }} == 'pull_request' ]]; then
current_owner=$(echo ${{ github.event.pull_request.head.repo.full_name }} | cut -d'/' -f1)
current_owner=$(echo $PR_HEAD_REPO_FULL_NAME | cut -d'/' -f1)
else
current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1)
fi
Expand Down Expand Up @@ -380,7 +367,7 @@ jobs:
# stats are only archived on pushes. So the repo in the report url needs to be the head repo of the PR
# The stats htmls are generated by CI triggered by pushes to the head repo.
# Not so clean. But works for now. The htmls are not that important anyway.
export REPORT_URL="https://rawcdn.githack.com/accel-sim/statistics-archive/${{ github.event.pull_request.head.repo.full_name }}/$BRANCH_NAME/ubench/"
export REPORT_URL="https://rawcdn.githack.com/accel-sim/statistics-archive/$PR_HEAD_REPO_FULL_NAME/$BRANCH_NAME/ubench/"
python3 .github/scripts/send_ci_email.py -t success
fi

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ jobs:
source ./gpu-app-collection/src/setup_environment
rm -rf ./hw_run/
./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7
./util/tracer_nvbit/run_hw.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7
rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces
mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run
Expand Down
4 changes: 2 additions & 2 deletions util/hw_stats/run_hw.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@
"l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_miss.sum,idc__requests.sum,idc__requests_lookup_hit.sum,"
"sm__sass_inst_executed_op_shared_ld.sum,sm__sass_inst_executed_op_shared_st.sum,lts__t_sectors_srcunit_tex_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_lookup_miss.sum,sm__pipe_alu_cycles_active.sum,sm__pipe_fma_cycles_active.sum,sm__pipe_fp64_cycles_active.sum,sm__pipe_shared_cycles_active.sum,sm__pipe_tensor_cycles_active.sum,sm__pipe_tensor_op_hmma_cycles_active.sum,sm__cycles_active.sum,sm__cycles_active.avg,sm__cycles_elapsed.avg,sm__sass_thread_inst_executed_op_integer_pred_on.sum,sm__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.sum,sm__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.sum,sm__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.sum,sm__inst_executed_pipe_alu.sum,sm__inst_executed_pipe_fma.sum,sm__inst_executed_pipe_fp16.sum,sm__inst_executed_pipe_fp64.sum,sm__inst_executed_pipe_tensor.sum,sm__inst_executed_pipe_tex.sum,sm__inst_executed_pipe_xu.sum,sm__inst_executed_pipe_lsu.sum,"
"sm__sass_thread_inst_executed_op_fp16_pred_on.sum,sm__sass_thread_inst_executed_op_fp32_pred_on.sum,sm__sass_thread_inst_executed_op_fp64_pred_on.sum,sm__sass_thread_inst_executed_op_dmul_pred_on.sum,sm__sass_thread_inst_executed_op_dfma_pred_on.sum,sm__sass_inst_executed_op_memory_128b.sum,sm__sass_inst_executed_op_memory_64b.sum,sm__sass_inst_executed_op_memory_32b.sum,sm__sass_inst_executed_op_memory_16b.sum,sm__sass_inst_executed_op_memory_8b.sum,smsp__thread_inst_executed_per_inst_executed.ratio,sm__sass_thread_inst_executed.sum"
" --csv --page raw --target-processes all "
" --csv --page raw --target-processes all -f "
+ kernel_number
+ " -o "
+ os.path.join(this_run_dir, "ncu_stats")
Expand Down Expand Up @@ -306,7 +306,7 @@
)
elif options.nsight_profiler:
profile_command = (
"ncu --target-processes all --metrics gpc__cycles_elapsed.avg --csv "
"ncu --target-processes all --metrics gpc__cycles_elapsed.avg --csv -f "
+ kernel_number
+ " -o "
+ os.path.join(this_run_dir, "ncu_cycles.{0}".format(i))
Expand Down
55 changes: 38 additions & 17 deletions util/job_launching/apps/define-all-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,40 +54,40 @@ GPU_Microbenchmark:
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- l1_bw_32f:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
- args: --tpb 1024 --tpsm 1024 --blocks 1
accel-sim-mem: 1G
- l1_bw_64f:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
- args: --tpb 1024 --tpsm 1024 --blocks 1
accel-sim-mem: 1G
- l1_bw_128:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
- args: --tpb 1024 --tpsm 1024 --blocks 1
accel-sim-mem: 2G
- l1_lat:
- args: --blocks 1 --ws 32
- args: --blocks 1 --fast
accel-sim-mem: 1G
- l2_bw_32f:
- args: --tpb 1024 --tpsm 1024 --blocks 160 --ws 32
- args: --tpb 1024 --tpsm 1024 --fast
accel-sim-mem: 6G
- l2_bw_64f:
- args: --tpb 1024 --tpsm 1024 --blocks 160 --l2 786432 --ws 32
- args: --tpb 1024 --tpsm 1024 --fast
accel-sim-mem: 6G
# - l2_bw_128:
# - args:
# accel-sim-mem: 1G
- l2_lat:
- args: --tpb 1 --tpsm 1 --blocks 1 --l2 786432 --ws 32
- args: --fast
accel-sim-mem: 1G
- mem_bw:
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
- args: --tpb 1024 --tpsm 1024
accel-sim-mem: 2G
- mem_lat:
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
- args: --fast
accel-sim-mem: 1G
- shared_bw:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
- args: --tpb 1024 --tpsm 1024 --blocks 1
accel-sim-mem: 2G
- shared_lat:
- args: --blocks 1 --ws 32
- args: --blocks 1
accel-sim-mem: 1G
- shared_bank_conflicts:
## argument 1 kernel has conflicts
Expand All @@ -97,27 +97,27 @@ GPU_Microbenchmark:
- args: 2
accel-sim-mem: 1G
- MaxIops_int32:
- args: --tpb 1024 --blocks 1 --ws 32
- args: --tpb 1024 --blocks 1
accel-sim-mem: 1G
- l1_shared_bw:
- args: --tpb 1024 --blocks 1 --ws 32
- args: --tpb 1024 --blocks 1
accel-sim-mem: 1G
- l1_bw_32f_unroll:
- args: --tpb 1024 --blocks 1 --ws 32
- args: --tpb 1024 --blocks 1
accel-sim-mem: 1G
- l1_bw_32f_unroll_large:
- args:
- args: --tpb 1024 --blocks 1
accel-sim-mem: 1G

GPU_Atomic:
exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/"
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- atomic_add_bw:
- args: --tpb 1 --tpsm 1 --blocks 1 --ws 32
- args: --tpb 1 --tpsm 1 --blocks 1 --fast
accel-sim-mem: 1G
- atomic_add_bw_conflict:
- args: --tpb 1024 --tpsm 2048 --blocks 160 --ws 32
- args: --tpb 1024 --tpsm 2048 --fast
accel-sim-mem: 1G
- atomic_add_bw_profile:
- args: 16
Expand Down Expand Up @@ -1067,6 +1067,27 @@ huggingface:
- args:
accel-sim-mem: 10G

tma:
exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/tma"
data_dirs: ""
execs:
- tma_tensor_test:
- args: -w 1024 -h 1024 -o UTMAPF
- args: -w 1024 -h 1024 -o UTMALDG
- args: -w 1024 -h 1024 -o UTMASTG
- args: -w 1024 -h 1024 -o UTMAREDG
- args: -w 1024 -h 1024 -o REGULAR_LOAD
accel-sim-mem: 10G
- tma_bulk_test:
- args: -n 1024 -o UBLKPF
- args: -n 1024 -o UBLKCP_S_G
- args: -n 1024 -o UBLKCP_G_S
- args: -n 1024 -o UBLKRED_G_S
- args: -n 262144 -o UBLKPF
- args: -n 262144 -o UBLKCP_S_G
- args: -n 262144 -o UBLKCP_G_S
- args: -n 262144 -o UBLKRED_G_S
accel-sim-mem: 10G

vllm:
exec_dir: "$GPUAPPS_ROOT/bin/$CUDA_VERSION/release/vllm"
Expand Down
Loading
Loading