Skip to content
Merged
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
daf876e
fix race condition
sdickhoven Feb 15, 2025
e391969
address shell linter complaint
sdickhoven Feb 19, 2025
70fa5ae
undo unrelated changes
sdickhoven Feb 20, 2025
f19925b
simplify initial `inotifywait` trigger logic
sdickhoven May 27, 2025
967e463
remove all but one improbable race condition
sdickhoven May 30, 2025
f35a71c
nit
sdickhoven May 31, 2025
0d24d29
revert non-functional edits
sdickhoven Jun 9, 2025
f26afb3
address shellcheck complaints
sdickhoven Jun 11, 2025
af1282f
Bump k8s.io/client-go from 0.32.0 to 0.32.2 (#476)
dependabot[bot] Feb 17, 2025
d0e67bb
Bump docker/setup-qemu-action from 3.2.0 to 3.4.0 (#471)
dependabot[bot] Feb 17, 2025
3ce9c37
Bump sigstore/cosign-installer from 3.7.0 to 3.8.0 (#470)
dependabot[bot] Feb 17, 2025
ca8381f
Bump golang.org/x/net from 0.30.0 to 0.33.0 (#479)
dependabot[bot] Feb 17, 2025
a9dbdda
Bump golang in /proxy-init/integration/iptables (#472)
dependabot[bot] Feb 17, 2025
28e335e
Bump DavidAnson/markdownlint-cli2-action from 18.0.0 to 19.1.0 (#468)
dependabot[bot] Feb 17, 2025
ee6ec4a
Bump softprops/action-gh-release from 2.2.0 to 2.2.1 (#462)
dependabot[bot] Feb 17, 2025
52daf12
Bump linkerd/dev from 44 to 45 (#459)
dependabot[bot] Feb 17, 2025
555336d
build(deps): bump libc to 0.2.169, ring to 0.17.9, openssl to 0.10.71…
alpeb Feb 17, 2025
6fbc0a9
build(deps): bump github.com/spf13/cobra from 1.8.1 to 1.9.1 (#481)
dependabot[bot] Feb 17, 2025
798aab6
build(deps): bump actions/cache from 4.2.0 to 4.2.1 (#485)
dependabot[bot] Feb 20, 2025
5f984e0
test(deps): use setup-rust in cni-plugin-integration/repair-controlle…
alpeb Feb 20, 2025
a97c205
fix(linkerd-cni): improve SA token rotation detection (#478)
alpeb Feb 20, 2025
f94d0e3
build(deps): bump EmbarkStudios/cargo-deny-action from 2.0.4 to 2.0.5…
dependabot[bot] Feb 25, 2025
59b7fd8
build(deps): bump sigstore/cosign-installer from 3.8.0 to 3.8.1 (#487)
dependabot[bot] Feb 25, 2025
1867a94
build(deps): bump dev to v45 (#483)
alpeb Feb 25, 2025
e61fd73
build(deps): bump alpine from 3.21.0 to 3.21.3 (#482)
dependabot[bot] Feb 25, 2025
82b0910
Bump golang from 1.23-alpine to 1.24-alpine (#473)
dependabot[bot] Feb 25, 2025
fe8a1e4
build(deps): bump EmbarkStudios/cargo-deny-action from 2.0.5 to 2.0.6…
dependabot[bot] Feb 26, 2025
2ec5177
build(deps): bump actions/download-artifact from 4.1.8 to 4.1.9 (#491)
dependabot[bot] Feb 26, 2025
7574345
build(deps): bump docker/setup-qemu-action from 3.4.0 to 3.5.0 (#490)
dependabot[bot] Feb 26, 2025
49cd49a
build(deps): bump docker/setup-qemu-action from 3.5.0 to 3.6.0 (#493)
dependabot[bot] Feb 28, 2025
8de3761
build(deps): bump actions/cache from 4.2.1 to 4.2.2 (#492)
dependabot[bot] Feb 28, 2025
7aa3f83
fix(linkerd-cni): fix cleanup logic (#494)
alpeb Mar 3, 2025
8437fc2
fix(ci): remove EmbarkStudios/cargo-deny-action (#495)
olix0r Mar 3, 2025
c9e3218
build(deps): bump tokio from 1.35.1 to 1.38.2 (#507)
dependabot[bot] Apr 8, 2025
cd9a99c
fix(cni-plugin): append inbound skip ports instead of replacing (#518)
adleong May 17, 2025
ca37702
build(deps): bump actions/download-artifact from 4.1.9 to 4.3.0 (#517)
dependabot[bot] May 26, 2025
6e200be
build(deps): bump sigstore/cosign-installer from 3.8.1 to 3.8.2 (#510)
dependabot[bot] May 26, 2025
9119c32
build(deps): bump ring from 0.17.9 to 0.17.14 (#519)
dependabot[bot] May 26, 2025
d9a7077
build(deps): bump golang.org/x/net from 0.33.0 to 0.38.0 (#508)
dependabot[bot] May 26, 2025
10845da
build(deps): bump softprops/action-gh-release from 2.2.1 to 2.2.2 (#509)
dependabot[bot] May 26, 2025
36f44fd
build(deps): bump github.com/containernetworking/cni from 1.2.3 to 1.…
dependabot[bot] May 26, 2025
a5af4b3
build(deps): bump actions/cache from 4.2.2 to 4.2.3 (#504)
dependabot[bot] May 26, 2025
2615f0b
build(deps): bump docker/login-action from 3.3.0 to 3.4.0 (#501)
dependabot[bot] May 26, 2025
6908957
build(deps): bump DavidAnson/markdownlint-cli2-action (#523)
dependabot[bot] May 26, 2025
00db641
build(deps): bump alpine from 3.21.3 to 3.22.0 (#525)
dependabot[bot] Jun 5, 2025
63ea7dc
deps: have dependabot update composite actions (#524)
alpeb Jun 5, 2025
6c19648
build(deps): bump actions/checkout in /.github/actions/version-mode (…
dependabot[bot] Jun 9, 2025
e5cba87
Merge branch 'main' into BUG-fix-race-condition
sdickhoven Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 76 additions & 64 deletions cni-plugin/deployment/scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,16 @@ EOF
install_cni_conf() {
local cni_conf_path=$1

local tmp_data=''
local conf_data=''
if [ -e "${cni_conf_path}" ]; then
# Add the linkerd-cni plugin to the existing list
tmp_data=$(cat "${TMP_CONF}")
conf_data=$(jq --argjson CNI_TMP_CONF_DATA "${tmp_data}" -f /linkerd/filter.jq "${cni_conf_path}")
echo "${conf_data}" > ${TMP_CONF}
fi
# Add the linkerd-cni plugin to the existing list.
local tmp_data
local conf_data
tmp_data=$(cat "$TMP_CONF")
conf_data=$(jq --argjson CNI_TMP_CONF_DATA "$tmp_data" -f /linkerd/filter.jq "$cni_conf_path" || true)

# Ensure that CNI config file did not disappear during processing.
[ -n "$conf_data" ] || return 0

echo "$conf_data" > "$TMP_CONF"

# If the old config filename ends with .conf, rename it to .conflist, because it has changed to be a list
filename=${cni_conf_path##*/}
Expand All @@ -218,6 +220,23 @@ install_cni_conf() {
cni_conf_path="${cni_conf_path}list"
fi

# Store SHA of each patched file in global `CNI_CONF_SHA` variable.
#
# This must happen in a non-concurrent access context!
#
# The below logic assumes that the `CNI_CONF_SHA` variable is already a
# valid JSON object. So this variable must be initialized with '{}'!
#
# E.g. (pretty-printed; actual variable stores compact JSON object)
#
# {
# "/etc/cni/net.d/05-foo.conflist": "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c",
# "/etc/cni/net.d/10-bar.conflist": "7d865e959b2466918c9863afca942d0fb89d7c9ac0c99bafc3749504ded97730"
# }
local new_sha
new_sha=$( (sha256sum "$TMP_CONF" || true) | awk '{print $1}' )
CNI_CONF_SHA=$(jq -c --arg f "$cni_conf_path" --arg sha "$new_sha" '. * {$f: $sha}' <<< "$CNI_CONF_SHA")

# Move the temporary CNI config into place.
mv "${TMP_CONF}" "${cni_conf_path}" || exit_with_error 'Failed to mv files.'
[ -n "$old_file_path" ] && rm -f "${old_file_path}" && log "Removing unwanted .conf file"
Expand All @@ -226,57 +245,50 @@ install_cni_conf() {
}

# Sync() is responsible for reacting to file system changes. It is used in
# conjunction with inotify events; sync() is called with the name of the file
# that has changed, the event type (which can be either 'CREATE', 'DELETE',
# 'MOVED_TO' or 'MODIFY', and the previously observed SHA of the configuration
# file.
# conjunction with inotify events; sync() is called with the event type (which
# can be either 'CREATE', 'MOVED_TO' or 'MODIFY'), and the name of the file that
# has changed.
#
# Based on the changed file and event type, sync() might re-install the CNI
# plugin's configuration file.
# Based on the changed file, sync() might re-install the CNI configuration file.
sync() {
local filename=$1
local ev=$2
local filepath="${HOST_CNI_NET}/$filename"

local prev_sha=$3

local config_file_count
local new_sha
if [ "$ev" = 'CREATE' ] || [ "$ev" = 'MOVED_TO' ] || [ "$ev" = 'MODIFY' ]; then
# When the event type is 'CREATE', 'MOVED_TO' or 'MODIFY', we check the
# previously observed SHA (updated with each file watch) and compare it
# against the new file's SHA. If they differ, it means something has
# changed.
new_sha=$(sha256sum "${filepath}" | while read -r s _; do echo "$s"; done)
if [ "$new_sha" != "$prev_sha" ]; then
# Create but don't rm old one since we don't know if this will be configured
# to run as _the_ cni plugin.
log "New/changed file [$filename] detected; re-installing"
create_kubeconfig
create_cni_conf
install_cni_conf "$filepath"
else
# If the SHA hasn't changed or we get an unrecognised event, ignore it.
# When the SHA is the same, we can get into infinite loops whereby a file has
# been created and after re-install the watch keeps triggering CREATE events
# that never end.
log "Ignoring event: $ev $filepath; no real changes detected"
fi
local ev=$1
local file=${2//\/\//\/} # replace "//" with "/"

[[ "$file" =~ .*.(conflist|conf)$ ]] || return 0

log "Detected event: $ev $file"

# Retrieve previous SHA of detected file (if any) and compute current SHA.
local previous_sha
local current_sha
previous_sha=$(jq -r --arg f "$file" '.[$f] | select(.)' <<< "$CNI_CONF_SHA")
current_sha=$( (sha256sum "$file" || true) | awk '{print $1}' )

# If the SHA hasn't changed or the detected file has disappeared, ignore it.
# When the SHA is the same, we can get into infinite loops whereby a file
# has been created and after re-install the watch keeps triggering MOVED_TO
# events that never end.
# If the `current_sha` variable is blank then the detected CNI config file has
# disappeared and no further action is required.
# There exists an unhandled (highly improbable) edge case where a CNI plugin
# creates a config file and then _immediately_ removes it again _while_ we are
# in the process of patching it. If this happens, we may create a patched CNI
# config file that should *not* exist.
if [ -n "$current_sha" ] && [ "$current_sha" != "$previous_sha" ]; then
log "New/changed file [$file] detected; re-installing"
create_kubeconfig
create_cni_conf
install_cni_conf "$file"
else
log "Ignoring event: $ev $file; no real changes detected or file disappeared"
fi
}

# monitor_cni_config starts a watch on the host's CNI config directory
monitor_cni_config() {
inotifywait -m "${HOST_CNI_NET}" -e create,moved_to,modify |
while read -r directory action filename; do
if [[ "$filename" =~ .*.(conflist|conf)$ ]]; then
log "Detected change in $directory: $action $filename"
sync "$filename" "$action" "$cni_conf_sha"
# calculate file SHA to use in the next iteration
if [[ -e "$directory/$filename" ]]; then
cni_conf_sha="$(sha256sum "$directory/$filename" | while read -r s _; do echo "$s"; done)"
fi
fi
sync "$action" "$directory/$filename"
done
}

Expand Down Expand Up @@ -318,6 +330,14 @@ rm -f "${DEFAULT_CNI_CONF_PATH}"

install_cni_bin

# The CNI config monitor must be set up _before_ we start patching existing CNI
# config files!
# Otherwise, new CNI config files can be created just _after_ the initial round
# of patching and just _before_ we set up the `inotifywait` loop to detect new
# CNI config files.
CNI_CONF_SHA='{}'
monitor_cni_config &

# Append our config to any existing config file (*.conflist or *.conf)
config_files=$(find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \))
if [ -z "$config_files" ]; then
Expand All @@ -329,23 +349,16 @@ else
else
find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \) -print0 |
while read -r -d $'\0' file; do
log "Installing CNI configuration for $file"
create_kubeconfig
create_cni_conf
install_cni_conf "$file"
log "Trigger CNI config detection for $file"
tmp_file="$(mktemp -u /tmp/linkerd-cni.patch-candidate.XXXXXX)"
cp -fp "$file" "$tmp_file"
# The following will trigger the `sync()` function via filesystem event.
# This requires `monitor_cni_config()` to be up and running!
mv "$tmp_file" "$file" || exit_with_error 'Failed to mv files.'
done
fi
fi

# Compute SHA for first config file found; this will be updated after every iteration.
# First config file is likely to be chosen as the de facto CNI config by the
# host.
conf="$(find "${HOST_CNI_NET}" -maxdepth 1 -type f \( -iname '*conflist' -o -iname '*conf' \) | sort | head -n 1)"
cni_conf_sha=""
if [[ -n "$conf" ]]; then
cni_conf_sha="$(sha256sum "$conf" | while read -r s _; do echo "$s"; done)"
fi

# Watch in bg so we can receive interrupt signals through 'trap'. From 'man
# bash':
# "If bash is waiting for a command to complete and receives a signal
Expand All @@ -354,7 +367,6 @@ fi
# builtin, the reception of a signal for which a trap has been set will cause
# the wait builtin to return immediately with an exit status greater than 128,
# immediately after which the trap is executed."
monitor_cni_config &
monitor_service_account_token &
# uses -n so that we exit when the first background job exits (when there's an error)
wait -n