|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +scripts=`realpath $0` |
| 4 | +folder=`dirname $scripts` |
| 5 | + |
| 6 | +source "${folder}/rdma_metrics_collection_config.conf" |
| 7 | +hours="$hoursAgoFromNow" |
| 8 | +interval="$metricsCollectionIntervalInMinute" |
| 9 | +par_filename="$parFileName" |
| 10 | + |
| 11 | +if [ -z "$par_filename" ] |
| 12 | +then |
| 13 | + echo "Please create a PAR and save into a file. Then, in config file, set the path of PAR-file to parFileName" |
| 14 | + exit |
| 15 | +fi |
| 16 | + |
| 17 | +if [ ! -f ${par_filename} ] |
| 18 | +then |
| 19 | + echo "PAR file:${par_filename} does not exist. Please create PAR file and update the config file" |
| 20 | + exit |
| 21 | +fi |
| 22 | + |
| 23 | +dis_help() |
| 24 | +{ |
| 25 | + echo |
| 26 | + echo "Usage:" |
| 27 | + echo |
| 28 | + echo "./upload_rdma_nic_metrics.sh -l <limit:hours ago from now> -i <Metrics Interval>" |
| 29 | + echo |
| 30 | + echo "Options:" |
| 31 | + echo "l Hours Ago From Now (optional)" |
| 32 | + echo "n Metrics Collection Interval In Minute (optional)" |
| 33 | + echo "h Print this help." |
| 34 | + echo |
| 35 | + echo "RDMA metrics are uploaded to Object Storage using PAR" |
| 36 | + echo |
| 37 | + echo "e.g., sh ./upload_rdma_nic_metrics.sh -l 24 -i 5 " |
| 38 | + echo |
| 39 | + echo "Supported releases: 2.10.3+" |
| 40 | + echo |
| 41 | +} |
| 42 | + |
| 43 | +#Do this if number of arguments passed is greater than 0 |
| 44 | +if [ "$#" -gt "0" ] |
| 45 | +then |
| 46 | + while getopts "l:i:h" option |
| 47 | + do |
| 48 | + case $option in |
| 49 | + l) hours=${OPTARG};; |
| 50 | + i) interval=${OPTARG};; |
| 51 | + h) dis_help |
| 52 | + exit;; |
| 53 | + \?) # Invalid option |
| 54 | + echo "Error: Invalid option" |
| 55 | + exit;; |
| 56 | + esac |
| 57 | + done |
| 58 | +fi |
| 59 | + |
| 60 | +monitoring_folder=$folder/../monitoring |
| 61 | + |
| 62 | +if [ -f $monitoring_folder/activated ] |
| 63 | +then |
| 64 | + timestamp=$(date +%s) |
| 65 | + for i in {0..16} |
| 66 | + do |
| 67 | + measurementname="infiniband_mlx5_"$i"_hw_counters" |
| 68 | + measurementnameBackup="infiniband_mlx5_"$i"_hw_counters_backup" |
| 69 | + echo "Checking device mlx5_${i} for RDMA HW metrics...." |
| 70 | + query="SELECT MEAN(*) INTO ${measurementnameBackup} FROM ${measurementname} WHERE time < now() AND time > now() - ${hours}h GROUP BY time(${interval}m)" |
| 71 | + rows=$(influx -database 'telegraf' -execute "${query}" -format json | jq '.results[0].series[0].values[0][1]') |
| 72 | + |
| 73 | + if [ "$rows" -eq 0 ]; then |
| 74 | + echo "Device mlx5_${i} does not have metrics to collect" |
| 75 | + echo "......................................................" |
| 76 | + continue |
| 77 | + fi |
| 78 | + |
| 79 | + filename="infiniband_mlx5_${i}_${timestamp}" |
| 80 | + filename_csv="${filename}.csv" |
| 81 | + filename_zip="${filename}.zip" |
| 82 | + |
| 83 | + echo "Collecting RDMA HW metrics of device mlx5_${i}...." |
| 84 | + query="SELECT * FROM ${measurementnameBackup}" |
| 85 | + influx -database 'telegraf' -execute "${query}" -format csv > $filename_csv |
| 86 | + filename_csv_path="${folder}/${filename_csv}" |
| 87 | + if [ ! -f ${filename_csv_path} ] |
| 88 | + then |
| 89 | + echo "ERROR:${filename_csv_path} was not created." |
| 90 | + continue |
| 91 | + fi |
| 92 | + |
| 93 | + zip ${filename_zip} ${filename_csv} |
| 94 | + rm ${filename_csv} |
| 95 | + filename_zip_path="${folder}/${filename_zip}" |
| 96 | + if [ ! -f ${filename_zip_path} ] |
| 97 | + then |
| 98 | + echo "ERROR:${filename_zip_path} was not created." |
| 99 | + continue |
| 100 | + fi |
| 101 | + |
| 102 | + par=$(cat "${par_filename}") |
| 103 | + echo "Uploading RDMA HW Metrics to Object Stroage for device mlx5_${i}" |
| 104 | + curl -X PUT --data-binary @${filename_zip} "$par""$filename_zip" |
| 105 | + echo "Uploaded RDMA HW metrics to Object Storage for device mlx5_${i}" |
| 106 | + echo "Object storage URL for device mlx_5${i}: ${par}${filename_zip}" |
| 107 | + |
| 108 | + sqldelete="DELETE FROM ${measurementnameBackup}" |
| 109 | + influx -database 'telegraf' -execute "${sqldelete}" |
| 110 | + echo "......................................................" |
| 111 | + done |
| 112 | + |
| 113 | + measurementname="infiniband" |
| 114 | + measurementnameBackup="infiniband_backup" |
| 115 | + |
| 116 | + echo "Checking for Infiniband counter metrics...." |
| 117 | + query="SELECT MEAN(*) INTO ${measurementnameBackup} FROM ${measurementname} WHERE time < now() AND time > now() - ${hours}h GROUP BY time(${interval}m)" |
| 118 | + rows=$(influx -database 'telegraf' -execute "${query}" -format json | jq '.results[0].series[0].values[0][1]') |
| 119 | + |
| 120 | + if [ "$rows" -eq 0 ]; then |
| 121 | + echo "It does not have Infiniband counter metrics to collect" |
| 122 | + fi |
| 123 | + |
| 124 | + filename="infiniband_${timestamp}" |
| 125 | + filename_csv="${filename}.csv" |
| 126 | + filename_zip="${filename}.zip" |
| 127 | + |
| 128 | + echo "Collecting Infiniband counter metrics...." |
| 129 | + query="SELECT * FROM ${measurementnameBackup}" |
| 130 | + influx -database 'telegraf' -execute "${query}" -format csv > $filename_csv |
| 131 | + filename_csv_path="${folder}/${filename_csv}" |
| 132 | + if [ ! -f ${filename_csv_path} ] |
| 133 | + then |
| 134 | + echo "ERROR:${filename_csv_path} was not created." |
| 135 | + continue |
| 136 | + fi |
| 137 | + |
| 138 | + zip ${filename_zip} ${filename_csv} |
| 139 | + rm ${filename_csv} |
| 140 | + filename_zip_path="${folder}/${filename_zip}" |
| 141 | + if [ ! -f ${filename_zip_path} ] |
| 142 | + then |
| 143 | + echo "ERROR:${filename_zip_path} was not created." |
| 144 | + continue |
| 145 | + fi |
| 146 | + |
| 147 | + echo "Uploading Infiniband counter metrics to Object Stroage" |
| 148 | + curl -X PUT --data-binary @${filename_zip} "$par""$filename_zip" |
| 149 | + echo "Uploaded Infiniband counter metrics to Object Storage" |
| 150 | + echo "Object storage URL for Infiniband counter metrics: ${par}${filename_zip}" |
| 151 | + |
| 152 | + sqldelete="DELETE FROM ${measurementnameBackup}" |
| 153 | + influx -database 'telegraf' -execute "${sqldelete}" |
| 154 | + |
| 155 | +fi |
0 commit comments