|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +################################################################## |
| 4 | +## (C)Copyright 2019-2022 Hewlett Packard Enterprise Development LP |
| 5 | +## Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 6 | +## not use this file except in compliance with the License. You may obtain |
| 7 | +## a copy of the License at |
| 8 | +## |
| 9 | +## http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +## |
| 11 | +## Unless required by applicable law or agreed to in writing, software |
| 12 | +## distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | +## WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | +## License for the specific language governing permissions and limitations |
| 15 | +## under the License. |
| 16 | +################################################################## |
| 17 | + |
| 18 | +################################################################################## |
| 19 | +# Script to start Swarm Learning container and, optionally, the user ML container |
| 20 | +################################################################################## |
| 21 | + |
| 22 | + |
| 23 | +## EXAMPLES |
| 24 | +## -------- |
| 25 | +## 1. SL, with user-provided Identity |
| 26 | +## run-sl \ |
| 27 | +## --name=sl-2 --host-ip=sl-2 \ |
| 28 | +## --sn-ip=$(docker exec sn-2 hostname -I) \ |
| 29 | +## --ml-i --ml-image mnist:1.0.0 \ |
| 30 | +## --key=/tmp/id/sl-2-key.pem \ |
| 31 | +## --cert=/tmp/id/sl-2-certchain.pem \ |
| 32 | +## --capath=/tmp/id/ca/capath |
| 33 | + |
| 34 | +## 2. SL, with SPIFFE ID |
| 35 | +## The value to this variable "swarm_node_type" is the custom name provided |
| 36 | +## by the user while creating a registration entry inside spire-server. |
| 37 | +## |
| 38 | +## run-sl \ |
| 39 | +## --name=sl-1 --host-ip=sl-1 \ |
| 40 | +## --ml-i --ml-image mnist:1.0.0 \ |
| 41 | +## -e swarm_node_type=sl \ |
| 42 | +## --socket-path $( \ |
| 43 | +## docker ps --format '{{.Names}}' \ |
| 44 | +## --filter=ancestor=gcr.io/spiffe-io/spire-agent:1.1.3 |
| 45 | +## ) |
| 46 | + |
| 47 | +progName=$(basename "${0}") |
| 48 | +progDir=$(realpath $(dirname "${0}")) |
| 49 | + |
| 50 | +# Source a bunch of code that is common to all our scripts. |
| 51 | +source "${progDir}/common" |
| 52 | + |
| 53 | +funcCallOrder=("Common" "APLS" "UserID" "SpiffeID" "Component") |
| 54 | + |
| 55 | +# Used by printCommonUsage. |
| 56 | +defaultImageName="${swarmDockerHub}/${swarmOrg}/${slRepoName}" |
| 57 | +defaultImageTag="${swarmVer}" |
| 58 | + |
| 59 | +internalFSPort=30305 |
| 60 | + |
| 61 | + |
| 62 | +printComponentUsage() |
| 63 | +{ |
| 64 | + printf -- "--host-ip <IP address or DNS name>\n" |
| 65 | + printf -- "\tThe IP address or DNS name of the host system on which this\n" |
| 66 | + printf -- "\tSwarm Learning node will be created.\n" |
| 67 | + printf -- "\tMandatory Parameter\n\n" |
| 68 | + |
| 69 | + printf -- "--sn-ip <IP address or DNS name>\n" |
| 70 | + printf -- "\tThe IP address or DNS name of the host system on which the\n" |
| 71 | + printf -- "\tSwarm Network (SN) node with which this Swarm Learning node must.\n" |
| 72 | + printf -- "\tassociate, is running.\n\n" |
| 73 | + |
| 74 | + printf -- "--sn-api-port <port number>\n" |
| 75 | + printf -- "\tHost port for the API Server of the associated Swarm Network node.\n" |
| 76 | + printf -- "\tDefault: 30304\n\n" |
| 77 | + |
| 78 | + printf -- "--sl-fs-port <port number>\n" |
| 79 | + printf -- "\tHost port for this Swarm Learning node's File Server.\n" |
| 80 | + printf -- "\tDefault: 30305\n\n" |
| 81 | + |
| 82 | + printf -- "--sn-docker-name <container name>\n" |
| 83 | + printf -- "\tDocker container name for the associated Swarm Network node.\n" |
| 84 | + printf -- "\tDefault: None\n\n" |
| 85 | + |
| 86 | + |
| 87 | + printf -- "Machine Learning container parameters\n" |
| 88 | + printf -- "-------------------------------------\n\n" |
| 89 | + printf -- "--ml-image <ML image name>\n" |
| 90 | + printf -- "\tName of the User's Machine Learning image \n" |
| 91 | + printf -- "\tOptional parameter.\n\n" |
| 92 | + |
| 93 | + printf -- "--ml-entrypoint <entrypoint>\n" |
| 94 | + printf -- "\tEntrypoint to the Machine Learning container \n" |
| 95 | + printf -- "\tOptional parameter \n\n" |
| 96 | + |
| 97 | + printf -- "--ml-cmd <command>\n" |
| 98 | + printf -- "\tCommand to the Machine Learning container \n" |
| 99 | + printf -- "\tOptional parameter \n\n" |
| 100 | + |
| 101 | + printf -- "--ml-w <directory path>\n" |
| 102 | + printf -- "\tWorking directory of the Machine Learning container \n" |
| 103 | + printf -- "\tOptional parameter \n\n" |
| 104 | + |
| 105 | + printf -- "--ml-name <container name>\n" |
| 106 | + printf -- "\tName of the Machine Learning container \n" |
| 107 | + printf -- "\tOptional parameter \n\n" |
| 108 | + |
| 109 | + printf -- "--ml-v <host-path:container-path>\n" |
| 110 | + printf -- "\tBind mount a volume for the Machine Learning container\n" |
| 111 | + printf -- "\tOptional parameter \n\n" |
| 112 | + |
| 113 | + printf -- "--ml-e <environmental-variable-name=value>\n" |
| 114 | + printf -- "\tTo pass environmental variable to the Machine Learning container\n" |
| 115 | + printf -- "\tOptional parameter \n\n" |
| 116 | + |
| 117 | + return 0 |
| 118 | +} |
| 119 | + |
| 120 | + |
| 121 | +processComponentBatchOpt() |
| 122 | +{ |
| 123 | + local sidecar="${1}" # Ignored. |
| 124 | + local origParam="${2}" |
| 125 | + local opt="${3}" |
| 126 | + local optarg="${4}" |
| 127 | + |
| 128 | + case "${opt}" in |
| 129 | + --host-ip) checkAndAssign "${opt}" "${optarg}";; |
| 130 | + --sl-fs-port) checkAndAssign "${opt}" "${optarg}";; |
| 131 | + --sn-@(api-port|docker-name|ip)) checkAndAssign "${opt}" "${optarg}";; |
| 132 | + *) unprocessedOpts+=("${origParam}"); nShift=1;; |
| 133 | + esac |
| 134 | + |
| 135 | + return 0 |
| 136 | +} |
| 137 | + |
| 138 | + |
| 139 | +onMlComponentBatchEnd() |
| 140 | +{ |
| 141 | + local sidecar="${1}" |
| 142 | + [[ -n "${sidecar}" ]] && local sidecarPrefix="${sidecar}-" |
| 143 | + |
| 144 | + declare -n sidecarEnvvarVar=$(makeVarName "${sidecarPrefix}" envvar) |
| 145 | + declare -n mainEnvvarVar="envvar" |
| 146 | + declare -n mainMountsVar="mounts" |
| 147 | + |
| 148 | + mainMountsVar+=(-v /tmp/channel) |
| 149 | + mainEnvvarVar+=(-e SL_REQUEST_CHANNEL="/tmp/channel/request.channel") |
| 150 | + mainEnvvarVar+=(-e SL_RESPONSE_CHANNEL="/tmp/channel/response.channel") |
| 151 | + sidecarEnvvarVar+=(-e SL_REQUEST_CHANNEL="/tmp/channel/request.channel") |
| 152 | + sidecarEnvvarVar+=(-e SL_RESPONSE_CHANNEL="/tmp/channel/response.channel") |
| 153 | + |
| 154 | + assignVar "${sidecarPrefix}" volumesFromMain "y" |
| 155 | + |
| 156 | + return 0 |
| 157 | +} |
| 158 | + |
| 159 | + |
| 160 | +onTrainEnd() |
| 161 | +{ |
| 162 | + [[ -z "${hostIp}" ]] && error "mandatory parameter --host-ip not specified" |
| 163 | + |
| 164 | + if [[ -n "${hostIp}" ]] |
| 165 | + then |
| 166 | + envvar+=(-e "THIS_NODE_IP=${hostIp}") |
| 167 | + fi |
| 168 | + |
| 169 | + if [[ -z "${snIp}" ]] |
| 170 | + then |
| 171 | + snExec=$(genDockerExec \ |
| 172 | + "${snImage}" "${snDockerName}" "Swarm Network node") |
| 173 | + snIp="$(${snExec} hostname -i | tr -d '\r\n')" |
| 174 | + fi |
| 175 | + |
| 176 | + [[ -n "${snIp}" ]] && envvar+=(-e "SENTINEL_NODE_IP=${snIp}") |
| 177 | + [[ -n "${snApiPort}" ]] && envvar+=(-e "API_SERVER_PORT=${snApiPort}") |
| 178 | + |
| 179 | + if [[ -n "${slFsPort}" ]] |
| 180 | + then |
| 181 | + envvar+=(-e "ML_FS_PORT=${slFsPort}") |
| 182 | + ports+=(-p "${slFsPort}:${internalFSPort}") |
| 183 | + fi |
| 184 | + |
| 185 | + cmd+=("${unprocessedOpts[@]}") |
| 186 | + unprocessedOpts=() |
| 187 | + |
| 188 | + return 0 |
| 189 | +} |
| 190 | + |
| 191 | +# We use "ml" as the default name for the User ML App sidecar. The user can pass |
| 192 | +# in its properties (image, env, entrypoint, etc.) by adding a "--ml-" prefix to |
| 193 | +# the corresponding parameters. However, the user may or might not want to run a |
| 194 | +# ML container as a sidecar. Adding "ml" to the list of sidecars when there are |
| 195 | +# no corresponding configuration parameters would produce errors (missing image |
| 196 | +# specification, etc.). Therefore, we scan the command line first and check for |
| 197 | +# at least one property. We ignore the corner case of a "--sidecar ml". We will |
| 198 | +# do at least two things incorrectly for this case: |
| 199 | +# 1. We will add "ml" twice to the list of sidecars - here for the first time |
| 200 | +# and then, again for a second time, when we process the "--sidecar". So, |
| 201 | +# we will error out trying to create the container for a second time. |
| 202 | +# 2. We will invoke onMlComponentBatchEnd on the assumption that this is the |
| 203 | +# User ML container when, in fact, it could be a completely unrelated one. |
| 204 | +[[ "${@}" =~ (^|[[:space:]])--ml-[[:alnum:]] ]] && sidecars+=(ml) |
| 205 | + |
| 206 | +main "${@}" |
0 commit comments