|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +################################################################################ |
| 4 | +# This script is used to warm an FSx for ONTAP volume. It does that simply |
| 5 | +# by reading every byte of every file in a volume. By doing reading all the |
| 6 | +# contents of a file, that forces the data to be read from the capacity tier |
| 7 | +# and stored in the performance tier. The assumption is that the user has |
| 8 | +# changed the tiering policy to something other than "all". |
| 9 | +# |
| 10 | +# To try and speed up the process, it will spawn multiple threads to process |
| 11 | +# the volume. It will spawn a separate thread for each directory |
| 12 | +# in the volume, and then a separate thread for each file it reads. The |
| 13 | +# number of directory threads is controlled by the -t option. The number |
| 14 | +# of reader threads is controlled by the -x option. Note that the script |
| 15 | +# will spawn -x reader threads PER directory thread. So, if you have 4 |
| 16 | +# directory threads and 10 reader threads, you could have up to 40 reader |
| 17 | +# threads running at once. |
| 18 | +# |
| 19 | +# If you provide the -f option, then the script will try to mount the volume |
| 20 | +# for you. It will mount it read-only. If the volume is already mounted, it |
| 21 | +# will not try to mount it again. If you provide the -f option, you must also |
| 22 | +# provide the -v option. |
| 23 | +################################################################################ |
| 24 | + |
| 25 | +################################################################################ |
| 26 | +# This function is used to print the usage of the script. |
| 27 | +################################################################################ |
| 28 | +usage () { |
| 29 | + cat 1>&2 <<EOF |
| 30 | +
|
| 31 | +Usage: $(basename $0) [-f filesystem_endpoint] [-v volume_name] [-d directory] [-t max_directory_threads] [-x max_read_threads] [-n nfs_type] [-h] [-V] |
| 32 | +where |
| 33 | + -f filesystem_endpoint - Is the hostname or IP address of the FSx for ONTAP file system. |
| 34 | + -v volume_name - Is the ID of the volume. |
| 35 | + -n nfs_type - Is the NFS version to use. Default is nfs4. |
| 36 | + -d directory - Is the root directory to start the process from. |
| 37 | + -t max_directory_threads - Is the maximum number of threads to use to process directories. Default is 10. |
| 38 | + -x max_read_threads - Is the maximum number of threads to use to read files. Default is 4. |
| 39 | + -V - Enable verbose output. Displays the thread ID, date (in epoch seconds), then the directory or file being processed. |
| 40 | + -h - Prints this help information. |
| 41 | +
|
| 42 | +Notes: |
| 43 | + * The filesystem_endpoint, volume_name and nfs_type are used to mount the volume |
| 44 | + if it is not already mounted. It will be mounted read-only. It assumes that |
| 45 | + the junction path is the same as the volume name. |
| 46 | + * For each directory thread, there will be a maximum of max_read_threads threads |
| 47 | + reading files. |
| 48 | +EOF |
| 49 | + |
| 50 | +} |
| 51 | + |
| 52 | +################################################################################ |
| 53 | +# This function is used to see if a volume is already mounted. It will echo |
| 54 | +# to standard output the mount point if it is mounted, otherwise it will echo |
| 55 | +# an empty string and return 1. |
| 56 | +################################################################################ |
| 57 | +isMounted () { |
| 58 | + |
| 59 | + hostname=$1 |
| 60 | + volumeName=$2 |
| 61 | + # |
| 62 | + # Normalize the hostname to its IP address. |
| 63 | + hostIP=$(getent hosts $hostname | awk '{print $1}') |
| 64 | + if [ -z "$hostIP" ]; then |
| 65 | + echo "Error, cannot resolve the hostname '$hostname'." 1>&2 |
| 66 | + exit 1 |
| 67 | + fi |
| 68 | + |
| 69 | + while read mountHost mountVolume mountPoint; do |
| 70 | + # |
| 71 | + # Normalize the hostname to its IP address. |
| 72 | + if [ "$(echo "$mountHost" | egrep '^[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}$')" == "$mountHost" ]; then |
| 73 | + mountIP=$mountHost |
| 74 | + else |
| 75 | + mountIP=$(getent hosts $mountHost | awk '{print $1}') |
| 76 | + fi |
| 77 | + |
| 78 | + if [ -z "$mountIP" ]; then |
| 79 | + echo "Error, when trying to detect if the volume was already mount, could not resolve the hostname '$mountHost'." 1>&2 |
| 80 | + exit 1 |
| 81 | + fi |
| 82 | + |
| 83 | + if [ "$mountIP" = "$hostIP" -a "$mountVolume" = "$volumeName" ]; then |
| 84 | + echo $mountPoint |
| 85 | + return 0 |
| 86 | + fi |
| 87 | + done < <(mount -t nfs -t nfs2 -t nfs3 -t nfs4 | awk '{split($1, str, ":"); print str[1], substr(str[2], 2), $3}') |
| 88 | + |
| 89 | + echo "" |
| 90 | + return 1 |
| 91 | +} |
| 92 | + |
| 93 | +################################################################################ |
| 94 | +# Read all the files in the passed directory. |
| 95 | +################################################################################ |
| 96 | +processDirectory () { |
| 97 | + |
| 98 | + local runningPIDs |
| 99 | + |
| 100 | + if [ "$verbose" = "true" ]; then |
| 101 | + echo "$BASHPID $(date +%s) - Processing $1" |
| 102 | + fi |
| 103 | + |
| 104 | + cd "$1" |
| 105 | + while read file; do |
| 106 | + if [ -f "$file" ]; then |
| 107 | + # |
| 108 | + # If there are too many threads running already, wait until one finishes. |
| 109 | + if [ ${#runningPIDs[*]} -ge $maxFileThreads ]; then |
| 110 | + wait -n -p pid |
| 111 | + if [ $? -eq 127 ]; then |
| 112 | + echo "$BASHPID $(date +%s) - Warning, 'wait -n' returned 127" 1>&2 |
| 113 | + else |
| 114 | + # |
| 115 | + # Check to make sure the PID is one that we recorded. |
| 116 | + if [ "${runningPIDs[$pid]}" != $pid ]; then |
| 117 | + echo "$BASHPID $(date +%s) - Warning, expected $pid, got '${runningPIDs[$pid]}'" 1>&2 |
| 118 | + fi |
| 119 | + unset runningPIDs[$pid] |
| 120 | + fi |
| 121 | + fi |
| 122 | + if [ "$verbose" == "true" ]; then |
| 123 | + echo "$BASHPID $(date +%s) - Reading $1/$file and discarding output. Number of threads: $((${#runningPIDs[*]}+1))" |
| 124 | + fi |
| 125 | + cat "$file" > /dev/null 2>&1 || echo "$BASHPID $(date +%s) - Warning, cannot read '${file}'." & |
| 126 | + # |
| 127 | + # Record the PID of the process mostly so we can keep up with how many are running. |
| 128 | + runningPIDs[$!]=$! |
| 129 | + fi |
| 130 | + done < <(ls -1A) |
| 131 | + # |
| 132 | + # Wait for all the file threads to finish. |
| 133 | + wait |
| 134 | + if [ "$verbose" = "true" ]; then |
| 135 | + echo "$BASHPID $(date +%s) - Done processing $1." |
| 136 | + fi |
| 137 | +} |
| 138 | + |
| 139 | +################################################################################ |
| 140 | +# Main logic starts here. |
| 141 | +################################################################################ |
| 142 | +# |
| 143 | +# Set some defaults. |
| 144 | +maxDirThreads=4 |
| 145 | +maxFileThreads=10 |
| 146 | +nfsType=nfs4 |
| 147 | +verbose=false |
| 148 | +# |
| 149 | +# process command line options. |
| 150 | +while getopts "f:v:t:d:n:x:hV" option; do |
| 151 | + case $option in |
| 152 | + f) |
| 153 | + filesystemHostname=$OPTARG |
| 154 | + ;; |
| 155 | + v) |
| 156 | + volumeName=$OPTARG |
| 157 | + ;; |
| 158 | + d) |
| 159 | + rootDirectory=$OPTARG |
| 160 | + ;; |
| 161 | + t) |
| 162 | + maxDirThreads=$OPTARG |
| 163 | + ;; |
| 164 | + x) |
| 165 | + maxFileThreads=$OPTARG |
| 166 | + ;; |
| 167 | + n) |
| 168 | + nfsType=$OPTARG |
| 169 | + ;; |
| 170 | + V) |
| 171 | + verbose=true |
| 172 | + ;; |
| 173 | + *) |
| 174 | + usage |
| 175 | + exit 1 |
| 176 | + ;; |
| 177 | + esac |
| 178 | +done |
| 179 | +# |
| 180 | +# If the user provided a filesystem endpoint, then we need to try to mount the volume. |
| 181 | +if [ ! -z "$filesystemHostname" ]; then |
| 182 | + if [ -z "$volumeName" ]; then |
| 183 | + echo "Error, if you provide a filesystem endpoint, you must specify the volume_name to mount." 1>&2 |
| 184 | + usage |
| 185 | + exit 1 |
| 186 | + fi |
| 187 | + |
| 188 | + rootDirectory=$(isMounted $filesystemHostname $volumeName) |
| 189 | + if [ -z "$rootDirectory" ]; then |
| 190 | + id=$(id -u) |
| 191 | + if [ "$id" != "0" ]; then |
| 192 | + echo "Error, volume '${volumeName}' from file system '${filesystemHostname}' is not mounted and this script isn't running as root so it can't mount it." 1>&2 |
| 193 | + exit 1 |
| 194 | + fi |
| 195 | + |
| 196 | + tmpMount=$(mktemp -d /mnt/XXXXXXX-$volumeName) |
| 197 | + if [ -z "$tmpMount" ]; then |
| 198 | + echo "Error, cannot create a temporary mount point." 1>&2 |
| 199 | + exit 1 |
| 200 | + fi |
| 201 | + |
| 202 | + if mount -t $nfsType -o hard,ro $filesystemHostname:/$volumeName $tmpMount; then |
| 203 | + if [ "$verbose" == "true" ]; then |
| 204 | + echo "Volume '${volumeName}' from file system '${filesystemHostname}' mounted at '${tmpMount}'." |
| 205 | + fi |
| 206 | + trap 'umount $tmpMount; rmdir $tmpMount' EXIT |
| 207 | + else |
| 208 | + echo "Error, cannot mount ${filesystemHostname}:${volumeName}." 1>&2 |
| 209 | + rmdir $tmpMount |
| 210 | + exit 1 |
| 211 | + fi |
| 212 | + rootDirectory=$tmpMount |
| 213 | + else |
| 214 | + if [ $verbose == "true" ]; then |
| 215 | + echo "Volume '${volumeName}' from file system '${filesystemHostname}' is already mounted at '${rootDirectory}'." |
| 216 | + fi |
| 217 | + fi |
| 218 | +fi |
| 219 | +# |
| 220 | +# At this point we should have the volume mounted. |
| 221 | +if [ -z "$rootDirectory" ]; then |
| 222 | + echo "Error, you must specify the root directory to start the process from." 1>&2 |
| 223 | + usage |
| 224 | + exit 1 |
| 225 | +fi |
| 226 | + |
| 227 | +if [ ! -d "$rootDirectory" ]; then |
| 228 | + echo "Error, the directory '$rootDirectory' does not exist." 1>&2 |
| 229 | + exit 1 |
| 230 | +fi |
| 231 | +# |
| 232 | +# Process all the driectories in the volume. |
| 233 | +while read directory; do |
| 234 | + # |
| 235 | + # If there are too many threads running already, wait until one finishes. |
| 236 | + if [ ${#runningPIDs[*]} -ge $maxDirThreads ]; then |
| 237 | + wait -n -p pid |
| 238 | + if [ $? = 127 ]; then |
| 239 | + echo "$$ $(date +%s) - Warning, 'wait -n' returned 127." 1>&2 |
| 240 | + else |
| 241 | + # |
| 242 | + # Check to make sure the PID is one that we recorded. |
| 243 | + if [ "${runningPIDs[$pid]}" != $pid ]; then |
| 244 | + echo "$$ $(date +%s) - Warning, expected $pid, got '${runningPIDs[$pid]}'" 1>&2 |
| 245 | + else |
| 246 | + unset runningPIDs[$pid] |
| 247 | + fi |
| 248 | + fi |
| 249 | + fi |
| 250 | + if [ "$verbose" == "true" ]; then |
| 251 | + echo "$$ $(date +%s) - Processing directory $directory. Number of threads: $((${#runningPIDs[*]}+1))." |
| 252 | + fi |
| 253 | + processDirectory "$directory" & |
| 254 | + runningPIDs[$!]=$! |
| 255 | +done < <(find $rootDirectory -type d) |
| 256 | + |
| 257 | +if [ "$verbose" = "true" ]; then |
| 258 | + echo "$$ $(date +%s) - Waiting for all directory threads to finish." |
| 259 | +fi |
| 260 | +wait |
0 commit comments