Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ libpcap0.8-dev \
iproute2 \
udev \
gawk \
jq \
bash-completion \
&& rm -rf /var/lib/apt/lists/*

Expand Down
128 changes: 84 additions & 44 deletions hack/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set -Eeuo pipefail

OPT_MULTIPORT=false

BLUEFIELD_IDENTIFIERS=("MT_0000000543", "MT_0000000541")
BLUEFIELD_IDENTIFIERS=("MT_0000000543" "MT_0000000541")
MAX_NUMVFS_POSSIBLE=126
NUMVFS_DESIRED=126
CONFIG="/tmp/dp_service.conf"
Expand All @@ -32,18 +32,17 @@ function err() {
}

function get_pfs() {
if [[ "$OPT_MULTIPORT" == "true" ]]; then
readarray -t devs < <(devlink dev | grep '^pci/' | awk -F/ '{print $2}')
else
readarray -t devs < <(devlink dev | awk -F/ '{print $2}')
fi
readarray -t devs < <(devlink -js dev | jq -r '.dev | keys[] | select(startswith("pci/")) | sub("pci/"; "")')
}

function detect_card_and_arch_type() {
local pf="${devs[0]}"
local is_bluefield=false
local devlink_info
devlink_info=$(devlink -js dev info "pci/$pf")

for id in "${BLUEFIELD_IDENTIFIERS[@]}"; do
if devlink dev info pci/$pf | grep -q "$id"; then
if echo "$devlink_info" | jq -e '.. | strings | select(. == "'"$id"'")' > /dev/null; then
is_bluefield=true
log "Detected BlueField card with identifier $id on pf: $pf"
break
Expand Down Expand Up @@ -84,10 +83,12 @@ function detect_card_and_arch_type() {
}

function validate() {
# we check if we have devlink available
if ! command -v devlink 2> /dev/null; then
if ! command -v devlink &> /dev/null; then
err "devlink not available, exiting"
fi
if ! command -v jq &> /dev/null; then
err "jq not available, exiting"
fi
}

function validate_pf() {
Expand All @@ -101,7 +102,6 @@ function validate_pf() {
return
fi

# we check if sr-iov is enabled and the dev is using the mlx5 driver
unset valid_devs
for pf in "${devs[@]}"; do
log "check pf $pf"
Expand All @@ -123,7 +123,7 @@ process_switchdev_mode() {
local pf=$1

log "enabling switchdev for $pf"
if ! devlink dev eswitch set pci/$pf mode switchdev; then
if ! devlink dev eswitch set "pci/$pf" mode switchdev; then
err "can't set eswitch mode"
fi
log "now waiting for everything to settle"
Expand All @@ -134,7 +134,7 @@ process_multiport_eswitch_mode() {
local pf=$1

log "enabling multiport eswitch mode for $pf"
if ! devlink dev param set pci/$pf name esw_multiport value true cmode runtime; then
if ! devlink dev param set "pci/$pf" name esw_multiport value true cmode runtime; then
err "can't enable multiport eswitch mode"
fi
log "now waiting for everything to settle"
Expand All @@ -158,8 +158,6 @@ function vfio_bind() {

modified_pci=${pf0::-3}

# pci address of vfs are organized in a block of 8
# e.g., 3b:00.0 ... 3b:00.7, 3b:01.0 ... 3b:01.7, etc.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment was here to explain the strange for loop, because for those that do not know the VF numbering scheme it makes no sense just by reading the loop.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes totally sense. I will add all the removed original comments.

for i in $(seq $vf_offset $((vf_offset + numvfs - 1))); do
vf_in_block_count=$(($i%8))
if [ $vf_in_block_count -eq 0 ]; then
Expand Down Expand Up @@ -192,7 +190,6 @@ function create_vf() {
if [[ "$IS_ARM_WITH_BLUEFIELD" == "true" ]]; then
actualvfs=$NUMVFS_DESIRED
log "Skipping VF creation for BlueField card on ARM"
# enable switchdev mode, this operation takes most time
process_switchdev_mode "$pf0"
return
fi
Expand All @@ -210,13 +207,11 @@ function create_vf() {
return
fi

# we disable automatic binding so that VFs don't get created, saves a lot of time
# plus we don't need to unbind them before enabling switchdev mode
log "disabling automatic binding of VFs on pf0 '$pf0'"
echo 0 > /sys/bus/pci/devices/$pf0/sriov_drivers_autoprobe

if [[ "$IS_X86_WITH_MLX" == "true" ]]; then
# enable switchdev mode, this operation takes most time
if [[ "$IS_X86_WITH_MLX" == "true" || "$IS_X86_WITH_BLUEFIELD" == "true" ]]; then
log "Enabling switchdev mode for MLX or BlueField card on x86"
if [[ "$OPT_MULTIPORT" == "true" ]]; then
for pf in "${devs[@]}"; do
process_switchdev_mode "$pf"
Expand All @@ -232,7 +227,6 @@ function create_vf() {
done
fi

# calculating amount of VFs to create, 126 if more are available, or maximum available
totalvfs=$(cat /sys/bus/pci/devices/$pf0/sriov_totalvfs)
actualvfs=$((NUMVFS_DESIRED<totalvfs ? NUMVFS_DESIRED : totalvfs))
log "creating $actualvfs virtual functions"
Expand All @@ -247,43 +241,85 @@ function create_vf() {
}


get_raw_vf_pattern() {
local dev=$1
devlink -js port | \
jq -r --arg dev "$dev" '.port | to_entries[] | select(.key | startswith("pci/" + $dev)) | select(.value.flavour=="pcivf") | .value.netdev' | \
sed -rn 's/(.*[a-z_])[0-9]{1,3}$/\1/p' | \
uniq
}

function get_pattern() {
local dev=$1
pattern=$(devlink port | grep pci/$dev/ | grep "virtual\|pcivf" | awk '{print $5}' | sed -rn 's/(.*[a-z_])[0-9]{1,3}$/\1/p' | uniq)
if [ -z "$pattern" ]; then
err "can't determine the vf pattern for $dev"
elif [ $(wc -l <<< "$pattern") -ne 1 ]; then
err "multiple vf patterns found for $dev"
local pattern

if [[ "$IS_ARM_WITH_BLUEFIELD" == "true" ]]; then
local timeout_seconds=120
local interval_seconds=2
local end_time=$(( $(date +%s) + timeout_seconds ))

while [ $(date +%s) -lt $end_time ]; do
pattern=$(get_raw_vf_pattern "$dev")
if [ -n "$pattern" ]; then
if [ $(wc -l <<< "$pattern") -eq 1 ]; then
sleep $interval_seconds
echo "$pattern"
return
fi
fi
sleep $interval_seconds
done
err "timed out after ${timeout_seconds}s waiting for the vf pattern for $dev"
else
pattern=$(get_raw_vf_pattern "$dev")
if [ -z "$pattern" ]; then
err "can't determine the vf pattern for $dev"
elif [ $(wc -l <<< "$pattern") -ne 1 ]; then
err "multiple vf patterns found for $dev"
fi
echo "$pattern"
fi
echo "$pattern"
}

function get_ifname() {
local port=$1
devlink port | grep "physical port $port" | awk '{ print $5}'
local port_idx=$1
devlink -js port | jq -r --argjson idx "$port_idx" '.port | .[] | select(.flavour=="physical" and .port==$idx) | .netdev'
}

function get_ipv6() {
# TODO: this needs to be done in a better way
while read -r l1; do
if [ "$l1" != "::1/128" ]; then
echo ${l1%/*}
return
fi
done < <(ip -6 -o addr show lo | awk '{print $4}')
err "no ipv6 found"
}
function get_ipv6() {
local ip_json
ip_json=$(ip -js -6 addr show lo)

function make_config() {
if [[ "$IS_X86_WITH_BLUEFIELD" == "true" ]]; then
log "Skipping config file creation on AMD/Intel 64-bit host with Bluefield"
if [ -z "$ip_json" ]; then
err "Could not retrieve address information for interface 'lo'."
return 1
fi

# 1. Have a "global" scope.
# 2. Have a prefix length of 64 or more.
# 3. Must be in Globally Unique Address (GUA) range (2 or 3).
local global_address
global_address=$(echo "$ip_json" | jq -r '
.[0].addr_info[]
| select(
.scope == "global" and
.prefixlen >= 64 and
(.local | test("^(2|3)"))
)
| .local' | head -n 1)

if [ -n "$global_address" ]; then
echo "$global_address"
return
fi

# To make error propagation work, need to assign separately
err "No Global Unicast IPv6 address with prefix >= 64 found on 'lo'."
}

function make_config() {
conf_pf0="$(get_ifname 0)"
conf_pf1="$(get_ifname 1)"
conf_vf_pattern="$(get_pattern ${devs[0]})"
conf_vf_pattern="$(get_pattern "${devs[0]}")"
conf_ipv6="$(get_ipv6)"

{ echo "# This has been generated by prepare.sh"
Expand Down Expand Up @@ -345,5 +381,9 @@ validate
get_pfs
detect_card_and_arch_type
validate_pf
# Bluefield relies on host side creating VFs, so wait here for systemd to settle and dont rush with the next steps
if [[ "$IS_ARM_WITH_BLUEFIELD" == "true" ]]; then
udevadm settle
fi
create_vf
make_config
make_config
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please do not remove endlines, when I sometimes cat files it messes up my terminal prompt :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:) will correct.

Loading