Skip to content
Merged
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
daf876e
fix race condition
sdickhoven Feb 15, 2025
e391969
address shell linter complaint
sdickhoven Feb 19, 2025
70fa5ae
undo unrelated changes
sdickhoven Feb 20, 2025
f19925b
simplify initial `inotifywait` trigger logic
sdickhoven May 27, 2025
967e463
remove all but one improbable race condition
sdickhoven May 30, 2025
f35a71c
nit
sdickhoven May 31, 2025
0d24d29
revert non-functional edits
sdickhoven Jun 9, 2025
f26afb3
address shellcheck complaints
sdickhoven Jun 11, 2025
af1282f
Bump k8s.io/client-go from 0.32.0 to 0.32.2 (#476)
dependabot[bot] Feb 17, 2025
d0e67bb
Bump docker/setup-qemu-action from 3.2.0 to 3.4.0 (#471)
dependabot[bot] Feb 17, 2025
3ce9c37
Bump sigstore/cosign-installer from 3.7.0 to 3.8.0 (#470)
dependabot[bot] Feb 17, 2025
ca8381f
Bump golang.org/x/net from 0.30.0 to 0.33.0 (#479)
dependabot[bot] Feb 17, 2025
a9dbdda
Bump golang in /proxy-init/integration/iptables (#472)
dependabot[bot] Feb 17, 2025
28e335e
Bump DavidAnson/markdownlint-cli2-action from 18.0.0 to 19.1.0 (#468)
dependabot[bot] Feb 17, 2025
ee6ec4a
Bump softprops/action-gh-release from 2.2.0 to 2.2.1 (#462)
dependabot[bot] Feb 17, 2025
52daf12
Bump linkerd/dev from 44 to 45 (#459)
dependabot[bot] Feb 17, 2025
555336d
build(deps): bump libc to 0.2.169, ring to 0.17.9, openssl to 0.10.71…
alpeb Feb 17, 2025
6fbc0a9
build(deps): bump github.com/spf13/cobra from 1.8.1 to 1.9.1 (#481)
dependabot[bot] Feb 17, 2025
798aab6
build(deps): bump actions/cache from 4.2.0 to 4.2.1 (#485)
dependabot[bot] Feb 20, 2025
5f984e0
test(deps): use setup-rust in cni-plugin-integration/repair-controlle…
alpeb Feb 20, 2025
a97c205
fix(linkerd-cni): improve SA token rotation detection (#478)
alpeb Feb 20, 2025
f94d0e3
build(deps): bump EmbarkStudios/cargo-deny-action from 2.0.4 to 2.0.5…
dependabot[bot] Feb 25, 2025
59b7fd8
build(deps): bump sigstore/cosign-installer from 3.8.0 to 3.8.1 (#487)
dependabot[bot] Feb 25, 2025
1867a94
build(deps): bump dev to v45 (#483)
alpeb Feb 25, 2025
e61fd73
build(deps): bump alpine from 3.21.0 to 3.21.3 (#482)
dependabot[bot] Feb 25, 2025
82b0910
Bump golang from 1.23-alpine to 1.24-alpine (#473)
dependabot[bot] Feb 25, 2025
fe8a1e4
build(deps): bump EmbarkStudios/cargo-deny-action from 2.0.5 to 2.0.6…
dependabot[bot] Feb 26, 2025
2ec5177
build(deps): bump actions/download-artifact from 4.1.8 to 4.1.9 (#491)
dependabot[bot] Feb 26, 2025
7574345
build(deps): bump docker/setup-qemu-action from 3.4.0 to 3.5.0 (#490)
dependabot[bot] Feb 26, 2025
49cd49a
build(deps): bump docker/setup-qemu-action from 3.5.0 to 3.6.0 (#493)
dependabot[bot] Feb 28, 2025
8de3761
build(deps): bump actions/cache from 4.2.1 to 4.2.2 (#492)
dependabot[bot] Feb 28, 2025
7aa3f83
fix(linkerd-cni): fix cleanup logic (#494)
alpeb Mar 3, 2025
8437fc2
fix(ci): remove EmbarkStudios/cargo-deny-action (#495)
olix0r Mar 3, 2025
c9e3218
build(deps): bump tokio from 1.35.1 to 1.38.2 (#507)
dependabot[bot] Apr 8, 2025
cd9a99c
fix(cni-plugin): append inbound skip ports instead of replacing (#518)
adleong May 17, 2025
ca37702
build(deps): bump actions/download-artifact from 4.1.9 to 4.3.0 (#517)
dependabot[bot] May 26, 2025
6e200be
build(deps): bump sigstore/cosign-installer from 3.8.1 to 3.8.2 (#510)
dependabot[bot] May 26, 2025
9119c32
build(deps): bump ring from 0.17.9 to 0.17.14 (#519)
dependabot[bot] May 26, 2025
d9a7077
build(deps): bump golang.org/x/net from 0.33.0 to 0.38.0 (#508)
dependabot[bot] May 26, 2025
10845da
build(deps): bump softprops/action-gh-release from 2.2.1 to 2.2.2 (#509)
dependabot[bot] May 26, 2025
36f44fd
build(deps): bump github.com/containernetworking/cni from 1.2.3 to 1.…
dependabot[bot] May 26, 2025
a5af4b3
build(deps): bump actions/cache from 4.2.2 to 4.2.3 (#504)
dependabot[bot] May 26, 2025
2615f0b
build(deps): bump docker/login-action from 3.3.0 to 3.4.0 (#501)
dependabot[bot] May 26, 2025
6908957
build(deps): bump DavidAnson/markdownlint-cli2-action (#523)
dependabot[bot] May 26, 2025
00db641
build(deps): bump alpine from 3.21.3 to 3.22.0 (#525)
dependabot[bot] Jun 5, 2025
63ea7dc
deps: have dependabot update composite actions (#524)
alpeb Jun 5, 2025
6c19648
build(deps): bump actions/checkout in /.github/actions/version-mode (…
dependabot[bot] Jun 9, 2025
e5cba87
Merge branch 'main' into BUG-fix-race-condition
sdickhoven Jun 11, 2025
834ed72
make script stylistically more consistent
sdickhoven Jun 11, 2025
0c4bb3a
address shellcheck complaints
sdickhoven Jun 11, 2025
6b3b0f3
Merge branch 'main' into CHORE-fix-inconsistencies
sdickhoven Jun 11, 2025
3017863
break comment lines consistently
sdickhoven Jun 11, 2025
97c421f
more efficient and concise find logic
sdickhoven Jun 11, 2025
218a121
Merge branch 'main' into CHORE-fix-inconsistencies
sdickhoven Jun 12, 2025
e43a87d
use braces for all variables
sdickhoven Jun 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 76 additions & 73 deletions cni-plugin/deployment/scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
# 2) https://github.com/istio/cni/blob/c63a509539b5ed165a6617548c31b686f13c2133/deployments/kubernetes/install/scripts/install-cni.sh

# Script to install Linkerd CNI on a Kubernetes host.
# - Expects the host CNI binary path to be mounted at /host/opt/cni/bin.
# - Expects the host CNI network config path to be mounted at /host/etc/cni/net.d.
# - Expects the desired CNI config in the CNI_NETWORK_CONFIG env variable.
# - Expects the host CNI binary path to be mounted at /host/opt/cni/bin
# - Expects the host CNI network config path to be mounted at /host/etc/cni/net.d
# - Expects the desired CNI config in the CNI_NETWORK_CONFIG env variable

# Ensure all variables are defined, and that the script fails when an error is hit.
set -u -e -o pipefail
# Ensure all variables are defined, and that the script fails when an error is
# hit.
set -u -e -o pipefail +o noclobber

# Helper function for raising errors
# Usage:
Expand Down Expand Up @@ -66,36 +67,37 @@ SERVICEACCOUNT_PATH=/var/run/secrets/kubernetes.io/serviceaccount
# *conflist files, then linkerd-cni configuration parameters will be removed
# from them.
cleanup() {
# First, kill both 'inotifywait' processes so we don't process any DELETE/CREATE events
# First, kill both 'inotifywait' processes so we don't process any
# DELETE/CREATE events.
pids=$(pgrep inotifywait)
if [ -n "$pids" ]; then
if [ -n "${pids}" ]; then
while read -r pid; do
log "Sending SIGKILL to inotifywait (PID: $pid)"
kill -s KILL "$pid"
done <<< "$pids"
log "Sending SIGKILL to inotifywait (PID: ${pid})"
kill -s KILL "${pid}"
done <<< "${pids}"
fi

log 'Removing linkerd-cni artifacts.'

# Find all conflist files and print them out using a NULL separator instead of
# writing each file in a new line. We will subsequently read each string and
# attempt to rm linkerd config from it using jq helper.
local cni_data=''
local cni_data
find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' \) -print0 |
while read -r -d $'\0' file; do
log "Removing linkerd-cni config from $file"
cni_data=$(jq 'del( .plugins[]? | select( .type == "linkerd-cni" ))' "$file")
log "Removing linkerd-cni config from ${file}"
cni_data=$(jq 'del( .plugins[]? | select( .type == "linkerd-cni" ))' "${file}")
# TODO (matei): we should write this out to a temp file and then do a `mv`
# to be atomic.
echo "$cni_data" > "$file"
echo "${cni_data}" > "${file}"
done

# Remove binary and kubeconfig file
if [ -e "${HOST_CNI_NET}/${KUBECONFIG_FILE_NAME}" ]; then
log "Removing linkerd-cni kubeconfig: ${HOST_CNI_NET}/${KUBECONFIG_FILE_NAME}"
rm -f "${HOST_CNI_NET}/${KUBECONFIG_FILE_NAME}"
fi
if [ -e "${CONTAINER_MOUNT_PREFIX}${DEST_CNI_BIN_DIR}"/linkerd-cni ]; then
if [ -e "${CONTAINER_MOUNT_PREFIX}${DEST_CNI_BIN_DIR}/linkerd-cni" ]; then
log "Removing linkerd-cni binary: ${CONTAINER_MOUNT_PREFIX}${DEST_CNI_BIN_DIR}/linkerd-cni"
rm -f "${CONTAINER_MOUNT_PREFIX}${DEST_CNI_BIN_DIR}/linkerd-cni"
fi
Expand All @@ -117,7 +119,7 @@ install_cni_bin() {
exit_with_error "${dir} is non-writeable, failure"
fi
for path in "${CONTAINER_CNI_BIN_DIR}"/*; do
cp "${path}" "${dir}"/ || exit_with_error "Failed to copy ${path} to ${dir}."
cp "${path}" "${dir}/" || exit_with_error "Failed to copy ${path} to ${dir}."
done

log "Wrote linkerd CNI binaries to ${dir}"
Expand All @@ -126,7 +128,7 @@ install_cni_bin() {
create_kubeconfig() {
KUBE_CA_FILE=${KUBE_CA_FILE:-${SERVICEACCOUNT_PATH}/ca.crt}
SKIP_TLS_VERIFY=${SKIP_TLS_VERIFY:-false}
SERVICEACCOUNT_TOKEN=$(cat ${SERVICEACCOUNT_PATH}/token)
SERVICEACCOUNT_TOKEN=$(cat "${SERVICEACCOUNT_PATH}/token")

# Check if we're not running as a k8s pod.
if [[ ! -f "${SERVICEACCOUNT_PATH}/token" ]]; then
Expand Down Expand Up @@ -184,40 +186,43 @@ create_cni_conf() {
cp "${CNI_NETWORK_CONFIG_FILE}" "${TMP_CONF}"
elif [ "${CNI_NETWORK_CONFIG}" ]; then
log 'Using CNI config template from CNI_NETWORK_CONFIG environment variable.'
cat >"${TMP_CONF}" <<EOF
cat <<EOF > "${TMP_CONF}"
${CNI_NETWORK_CONFIG}
EOF
fi

# Use alternative command character "~", since these include a "/".
sed -i s~__KUBECONFIG_FILEPATH__~"${DEST_CNI_NET_DIR}/${KUBECONFIG_FILE_NAME}"~g ${TMP_CONF}
sed -i s~__KUBECONFIG_FILEPATH__~"${DEST_CNI_NET_DIR}/${KUBECONFIG_FILE_NAME}"~g "${TMP_CONF}"

log "CNI config: $(cat ${TMP_CONF})"
log "CNI config: $(cat "${TMP_CONF}")"
}

install_cni_conf() {
local cni_conf_path=$1
local cni_conf_path=${1}

# Add the linkerd-cni plugin to the existing list.
local tmp_data
local conf_data
tmp_data=$(cat "$TMP_CONF")
conf_data=$(jq --argjson CNI_TMP_CONF_DATA "$tmp_data" -f /linkerd/filter.jq "$cni_conf_path" || true)
tmp_data=$(cat "${TMP_CONF}")
conf_data=$(jq --argjson CNI_TMP_CONF_DATA "${tmp_data}" -f /linkerd/filter.jq "${cni_conf_path}" || true)

# Ensure that CNI config file did not disappear during processing.
[ -n "$conf_data" ] || return 0
[ -n "${conf_data}" ] || return 0

echo "$conf_data" > "$TMP_CONF"
echo "${conf_data}" > "${TMP_CONF}"

# If the old config filename ends with .conf, rename it to .conflist, because it has changed to be a list
# If the old config filename ends with .conf, rename it to .conflist because
# it has changed to be a list.
local filename
local extension
filename=${cni_conf_path##*/}
extension=${filename##*.}
# When this variable has a file, we must delete it later.
old_file_path=
if [ "${filename}" != '01-linkerd-cni.conf' ] && [ "${extension}" = 'conf' ]; then
old_file_path=${cni_conf_path}
log "Renaming ${cni_conf_path} extension to .conflist"
cni_conf_path="${cni_conf_path}list"
old_file_path=${cni_conf_path}
log "Renaming ${cni_conf_path} extension to .conflist"
cni_conf_path=${cni_conf_path}list
fi

# Store SHA of each patched file in global `CNI_CONF_SHA` variable.
Expand All @@ -234,35 +239,36 @@ install_cni_conf() {
# "/etc/cni/net.d/10-bar.conflist": "7d865e959b2466918c9863afca942d0fb89d7c9ac0c99bafc3749504ded97730"
# }
local new_sha
new_sha=$( (sha256sum "$TMP_CONF" || true) | awk '{print $1}' )
CNI_CONF_SHA=$(jq -c --arg f "$cni_conf_path" --arg sha "$new_sha" '. * {$f: $sha}' <<< "$CNI_CONF_SHA")
new_sha=$( (sha256sum "${TMP_CONF}" || true) | awk '{print $1}' )
CNI_CONF_SHA=$(jq -c --arg f "${cni_conf_path}" --arg sha "${new_sha}" '. * {$f: $sha}' <<< "${CNI_CONF_SHA}")

# Move the temporary CNI config into place.
mv "${TMP_CONF}" "${cni_conf_path}" || exit_with_error 'Failed to mv files.'
[ -n "$old_file_path" ] && rm -f "${old_file_path}" && log "Removing unwanted .conf file"
[ -n "${old_file_path}" ] && rm -f "${old_file_path}" && log "Removing unwanted .conf file"

log "Created CNI config ${cni_conf_path}"
}

# Sync() is responsible for reacting to file system changes. It is used in
# conjunction with inotify events; sync() is called with the event type (which
# can be either 'CREATE', 'MOVED_TO' or 'MODIFY'), and the name of the file that
# `sync()` is responsible for reacting to file system changes. It is used in
# conjunction with inotify events; `sync()` is called with the event type (which
# can be either 'CREATE', 'MOVED_TO', or 'MODIFY') and the name of the file that
# has changed.
#
# Based on the changed file, sync() might re-install the CNI configuration file.
# Based on the changed file, `sync()` might re-install the CNI configuration
# file.
sync() {
local ev=$1
local ev=${1}
local file=${2//\/\//\/} # replace "//" with "/"

[[ "$file" =~ .*.(conflist|conf)$ ]] || return 0
[[ "${file}" =~ .*.(conflist|conf)$ ]] || return 0

log "Detected event: $ev $file"
log "Detected event: ${ev} ${file}"

# Retrieve previous SHA of detected file (if any) and compute current SHA.
local previous_sha
local current_sha
previous_sha=$(jq -r --arg f "$file" '.[$f] | select(.)' <<< "$CNI_CONF_SHA")
current_sha=$( (sha256sum "$file" || true) | awk '{print $1}' )
previous_sha=$(jq -r --arg f "${file}" '.[$f] | select(.)' <<< "${CNI_CONF_SHA}")
current_sha=$( (sha256sum "${file}" || true) | awk '{print $1}' )

# If the SHA hasn't changed or the detected file has disappeared, ignore it.
# When the SHA is the same, we can get into infinite loops whereby a file
Expand All @@ -274,21 +280,21 @@ sync() {
# creates a config file and then _immediately_ removes it again _while_ we are
# in the process of patching it. If this happens, we may create a patched CNI
# config file that should *not* exist.
if [ -n "$current_sha" ] && [ "$current_sha" != "$previous_sha" ]; then
log "New/changed file [$file] detected; re-installing"
if [ -n "${current_sha}" ] && [ "${current_sha}" != "${previous_sha}" ]; then
log "New/changed file [${file}] detected; re-installing"
create_kubeconfig
create_cni_conf
install_cni_conf "$file"
install_cni_conf "${file}"
else
log "Ignoring event: $ev $file; no real changes detected or file disappeared"
log "Ignoring event: ${ev} ${file}; no real changes detected or file disappeared"
fi
}

# monitor_cni_config starts a watch on the host's CNI config directory
monitor_cni_config() {
inotifywait -m "${HOST_CNI_NET}" -e create,moved_to,modify |
while read -r directory action filename; do
sync "$action" "$directory/$filename"
sync "${action}" "${directory}/${filename}"
done
}

Expand All @@ -302,22 +308,23 @@ monitor_cni_config() {
# Indeed, as per atomic writer's Write function docs, in the final steps the
# ..data_tmp symlink points to a new timestamped directory containing the new
# files, which is then atomically renamed to ..data:
# > 8. A symlink to the new timestamped directory ..data_tmp is created that will
# > become the new data directory.
# > 9. The new data directory symlink is renamed to the data directory; rename is atomic.
# > 8. A symlink to the new timestamped directory ..data_tmp is created that
# > will become the new data directory.
# > 9. The new data directory symlink is renamed to the data directory; rename
# > is atomic.
# See https://github.com/kubernetes/kubernetes/blob/release-1.32/pkg/volume/util/atomic_writer.go
monitor_service_account_token() {
inotifywait -m "${SERVICEACCOUNT_PATH}" -e moved_to |
while read -r _ _ filename; do
if [[ "$filename" == "..data" ]]; then
inotifywait -m "${SERVICEACCOUNT_PATH}" -e moved_to |
while read -r _ _ filename; do
if [[ "${filename}" == "..data" ]]; then
log "Detected change in service account files; recreating kubeconfig file"
create_kubeconfig
fi
done
fi
done
}

log() {
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$1"
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "${1}"
}

################################
Expand All @@ -339,24 +346,19 @@ CNI_CONF_SHA='{}'
monitor_cni_config &

# Append our config to any existing config file (*.conflist or *.conf)
config_files=$(find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \))
if [ -z "$config_files" ]; then
log "No active CNI configuration files found"
config_files=$(find "${HOST_CNI_NET}" -maxdepth 1 -type f ! -name '*linkerd*' \( -iname '*conflist' -o -iname '*conf' \))
if [ -z "${config_files}" ]; then
log "No active CNI configuration files found"
else
config_file_count=$(echo "$config_files" | grep -v linkerd | sort | wc -l)
if [ "$config_file_count" -eq 0 ]; then
log "No active CNI configuration files found"
else
find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \) -print0 |
while read -r -d $'\0' file; do
log "Trigger CNI config detection for $file"
tmp_file="$(mktemp -u /tmp/linkerd-cni.patch-candidate.XXXXXX)"
cp -fp "$file" "$tmp_file"
# The following will trigger the `sync()` function via filesystem event.
# This requires `monitor_cni_config()` to be up and running!
mv "$tmp_file" "$file" || exit_with_error 'Failed to mv files.'
done
fi
find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \) -print0 |
while read -r -d $'\0' file; do
log "Trigger CNI config detection for ${file}"
tmp_file="$(mktemp -u /tmp/linkerd-cni.patch-candidate.XXXXXX)"
cp -fp "${file}" "${tmp_file}"
# The following will trigger the `sync()` function via filesystem event.
# This requires `monitor_cni_config()` to be up and running!
mv "${tmp_file}" "${file}" || exit_with_error 'Failed to mv files.'
done
fi

# Watch in bg so we can receive interrupt signals through 'trap'. From 'man
Expand All @@ -368,5 +370,6 @@ fi
# the wait builtin to return immediately with an exit status greater than 128,
# immediately after which the trap is executed."
monitor_service_account_token &
# uses -n so that we exit when the first background job exits (when there's an error)
# uses -n so that we exit when the first background job exits (when there's an
# error)
wait -n