|
| 1 | +#Note: This configuration file is sourced into the bash environment for Chimbuko startup scripts, thus the user must follow correct shell conventions |
| 2 | +#Please do not remove any of the variables! |
| 3 | + |
| 4 | +#IMPORTANT NOTE: Variables that cannot be left as default are marked as <------------ ***SET ME*** |
| 5 | + |
| 6 | +#################################### |
| 7 | +#Options for visualization module |
| 8 | +#################################### |
| 9 | +use_viz=1 #enable or disable the visualization |
| 10 | +viz_root=${CHIMBUKO_VIZ_ROOT} #the root directory of the visualization module <------------ ***SET ME (if using viz)*** |
| 11 | +viz_worker_port=6379 #the port on which to run the redis server for the visualization backend |
| 12 | +viz_port=5002 #the port on which to run the webserver |
| 13 | +export C_FORCE_ROOT=1 #required only for docker runs, allows celery to execute properly as root user <----------------- *** SET ME (if using Docker) |
| 14 | + |
| 15 | +############################################################ |
| 16 | +#General options for Chimbuko backend (pserver, ad, provdb) |
| 17 | +############################################################ |
| 18 | +backend_root="infer" #The root install directory of the PerformanceAnalysis libraries. If set to "infer" it will be inferred from the path of the executables |
| 19 | +chimbuko_services="infer" #The location of the Chimbuko service script. If set to "infer" it will be inferred from backend_root |
| 20 | + |
| 21 | +#################################### |
| 22 | +#Options for the provenance database |
| 23 | +#################################### |
| 24 | +use_provdb=1 #enable or disable the provDB. If disabled the provenance data will be written as JSON ASCII into the ${provdb_writedir} set below |
| 25 | +provdb_extra_args="" #any extra command line arguments to pass |
| 26 | +provdb_nshards=4 #number of database shards |
| 27 | +provdb_ninstances=1 #number of database server instances. Shards are distributed over instances |
| 28 | +provdb_engine="ofi+tcp;ofi_rxm" #the OFI libfabric provider used for the Mochi stack |
| 29 | +provdb_port=5000 #the port of the provenance database. For >1 instance the port of instance i will be provdb_port+i |
| 30 | +provdb_writedir=chimbuko/provdb #the directory in which the provenance database is written. Chimbuko creates chimbuko/provdb which can be used as a default |
| 31 | +provdb_commit_freq=10000 #frequency ms at which the provenance database is committed to disk. If set to 0 it will commit only at the end |
| 32 | + |
| 33 | +#provdb_interface : network interface upon which communication to the provdb is performed. <------------ ***SET ME*** |
| 34 | +# This variable has several options: |
| 35 | +# auto - let Mercury automatically choose an interface for all instances |
| 36 | +# <iface> - a single interface used for all instances |
| 37 | +# <iface1>:<iface2>:<iface3> .... - a colon-separated list of interfaces, one per instance |
| 38 | +# Obtain a list of interfaces from, e.g. "ip link show" (cf https://www.cyberciti.biz/faq/linux-list-network-interfaces-names-command/). |
| 39 | +provdb_interface=auto |
| 40 | + |
| 41 | +#provdb_domain : With "verbs" provider (used for infiniband, iWarp, etc) we need to also specify the domain, which can be found by running fi_info (on a compute node) |
| 42 | +# If left blank it will be chosen automatically. <------------ ***SET ME (if using verbs)*** |
| 43 | +provdb_domain= |
| 44 | + |
| 45 | +#provdb_numa_bind : specify NUMA domain binding for the provdb instances (requires numactl) |
| 46 | +# This variable has several options: |
| 47 | +# <blank> - if left blank, no binding will be performed |
| 48 | +# <index> - a single NUMA domain for all instances |
| 49 | +# <idx1>:<idx2>:<idx3> ... - a colon-separated list of NUMA domains, one per instance |
| 50 | +provdb_numa_bind= |
| 51 | + |
| 52 | +commit_extra_args="" #extra arguments for the committer |
| 53 | + |
| 54 | +export FI_UNIVERSE_SIZE=1600 # Defines the expected number of provenance DB clients per instance <------------- *** SET ME (should be larger than the number of clients/instance) |
| 55 | +export FI_MR_CACHE_MAX_COUNT=0 # disable MR cache in libfabric; still problematic as of libfabric 1.10.1 |
| 56 | +export FI_OFI_RXM_USE_SRX=1 # use shared recv context in RXM; should improve scalability |
| 57 | + |
| 58 | +#################################### |
| 59 | +#Options for the parameter server |
| 60 | +#################################### |
| 61 | +use_pserver=1 #enable or disable the pserver |
| 62 | +pserver_extra_args="" #any extra command line arguments to pass |
| 63 | +pserver_interface=eth0 #network interface upon which communication to the pserver is performed. Obtain from, e.g. "ip link show" (cf https://www.cyberciti.biz/faq/linux-list-network-interfaces-names-command/). <------------ ***SET ME*** |
| 64 | +pserver_port=5559 #port for parameter server |
| 65 | +pserver_nt=2 #number of worker threads |
| 66 | +pserver_numa_bind= #specify NUMA domain binding for the pserver (requires numactl). If left blank, no binding will be performed |
| 67 | +#################################### |
| 68 | +#Options for the AD module |
| 69 | +#################################### |
| 70 | +ad_extra_args="-perf_outputpath chimbuko/logs -perf_step 1" #any extra command line arguments to pass. Note: chimbuko/logs is automatically created by services script |
| 71 | +ad_win_size=5 #number of events around an anomaly to store; provDB entry size is proportional to this so keep it small! |
| 72 | +ad_alg="hbos" #the anomaly detection algorithm. Valid values are "hbos" and "sstd" |
| 73 | +ad_outlier_hbos_threshold=0.99 #the percentile of events outside of which are considered anomalies by the HBOS algorithm |
| 74 | +ad_outlier_sstd_sigma=12 #number of standard deviations that defines an outlier in the SSTD algorithm |
| 75 | +#################################### |
| 76 | +#Options for TAU |
| 77 | +#Note: Only the TAU_ADIOS2_PATH, TAU_ADIOS2_FILE_PREFIX, EXE_NAME, TAU_ADIOS2_ENGINE and tau_monitoring_conf variables are used by the Chimbuko services script and there only to generate the suggested |
| 78 | +# command to launch the AD (output to chimbuko/vars/chimbuko_ad_cmdline.var); they can be overridden by the run script if desired providing the appropriate modifications |
| 79 | +# are made to the AD launch command. The remainder of the variables are used only by TAU and can be freely overridden. |
| 80 | +#################################### |
| 81 | +export TAU_ADIOS2_ENGINE=SST #online communication engine (alternative BP4 although this goes through the disk system and may be slower unless the BPfiles are stored on a burst disk) |
| 82 | +export TAU_ADIOS2_ONE_FILE=FALSE #a different connection file for each rank |
| 83 | +export TAU_ADIOS2_PERIODIC=1 #enable/disable ADIOS2 periodic output |
| 84 | +export TAU_ADIOS2_PERIOD=1000000 #period in us between ADIOS2 io steps |
| 85 | +export TAU_THREAD_PER_GPU_STREAM=1 #force GPU streams to appear as different TAU virtual threads |
| 86 | +export TAU_THROTTLE=0 #enable/disable throttling of short-running functions |
| 87 | + |
| 88 | +#export TAU_MAKEFILE=/opt/tau2/x86_64/lib/Makefile.tau-papi-mpi-pthread-pdt-adios2 #The TAU makefile to use. If using a TAU installation built by Spack, this variable is already set in the environment and can be commented out here <------------ ***SET ME*** |
| 89 | + |
| 90 | +tau_monitoring_conf="default" #Provide a configuration file for the TAU monitoring plugin. It will be copied to the work directory as "tau_monitoring.json" (unless it is already there!). If set to default, Chimbuko will generate one automatically |
| 91 | + |
| 92 | +#Note: the following 2 variables are not used by the service script but are included here for use from the user's run script allowing the application to be launched with either "${TAU_EXEC} <app>" or "${TAU_PYTHON} <app>" |
| 93 | +#Note: the "binding" -T ... is used by Tau to find the appropriate configuration. It can typically be inferred from the name of the Makefile. If using a non-MPI job the 'mpi' should be changed to 'serial' and a non-MPI build of |
| 94 | +# ADIOS2/TAU must exist |
| 95 | +#Suggestion: It is useful to test the command without Chimbuko first to ensure TAU picks up the correct binding; this can be done by 'export TAU_ADIOS2_ENGINE=BPFile' and then running the application with Tau but without Chimbuko. |
| 96 | +TAU_EXEC="tau_exec -T papi,mpi,pthread,pdt,adios2 -adios2_trace -monitoring" #how to execute tau_exec; the -T arguments should mirror the makefile name <------------ ***SET ME*** |
| 97 | +TAU_PYTHON="tau_python -T papi,mpi,pthread,pdt,adios2 -tau-python-interpreter=python3 -adios2_trace -tau-python-args=-u" #how to execute tau_python. Note that passing -u to python forces it to not buffer stdout so we can pipe it |
| 98 | + #to tee in realtime <--- SET ME (if !python3) |
| 99 | + |
| 100 | +export EXE_NAME=main #the name of the executable (without path). For multi-component workflows this argument also accepts a list, e.g. (main1 main2) <------------ ***SET ME*** |
| 101 | + |
| 102 | +TAU_ADIOS2_PATH=chimbuko/adios2 #path where the adios2 files are to be stored. Chimbuko services creates the directory chimbuko/adios2 in the working directory and this should be used by default |
| 103 | +TAU_ADIOS2_FILE_PREFIX=tau-metrics #the prefix of tau adios2 files; full filename is ${TAU_ADIOS2_PREFIX}-${EXE_NAME}-${RANK}.bp |
| 104 | + |
| 105 | + |
| 106 | + |
| 107 | + |
| 108 | + |
| 109 | + |
| 110 | + |
| 111 | + |
| 112 | + |
| 113 | + |
| 114 | + |
| 115 | + |
| 116 | + |
| 117 | +########################################################################### |
| 118 | +# NON-USER VARIABLES BELOW = DON'T MODIFY THESE!! |
| 119 | +########################################################################### |
| 120 | +#Extra processing |
| 121 | +export TAU_ADIOS2_FILENAME="${TAU_ADIOS2_PATH}/${TAU_ADIOS2_FILE_PREFIX}" |
| 122 | + |
| 123 | +if [[ ${backend_root} == "infer" ]]; then |
| 124 | + if [[ $(which provdb_admin) == "" ]]; then |
| 125 | + echo "When inferring the backend root directory, could not find provdb_admin in PATH. Please add your Chimbuko bin directory to PATH" |
| 126 | + exit 1 |
| 127 | + fi |
| 128 | + |
| 129 | + backend_root=$( readlink -f $(which provdb_admin | sed 's/provdb_admin//')/../ ) |
| 130 | +fi |
| 131 | + |
| 132 | +if [[ ${chimbuko_services} == "infer" ]]; then |
| 133 | + chimbuko_services="${backend_root}/scripts/launch/run_services.sh" |
| 134 | + if [ ! -f "${chimbuko_services}" ]; then |
| 135 | + echo "Could not infer service script location: service script does not exist at ${chimbuko_services}!" |
| 136 | + exit 1 |
| 137 | + fi |
| 138 | +fi |
| 139 | + |
0 commit comments