8585# "Failed to initialize SSD: [xxxx:xx:xx.x]' when DAOS engines are started.
8686SPDK_SETUP_CMD=" /usr/share/daos/spdk/scripts/setup.sh"
8787
88- function check_spdk_setup_cmd {
88+ check_spdk_setup_cmd () {
8989 if [ ! -d " $( dirname " $SPDK_SETUP_CMD " ) " ] || [ ! -f " $SPDK_SETUP_CMD " ]; then
9090 echo -n " Required SPDK scripts directory $( dirname " $SPDK_SETUP_CMD " ) "
9191 echo " or setup.sh not found!"
@@ -94,27 +94,47 @@ function check_spdk_setup_cmd {
9494 return 0
9595}
9696
97- function get_nvme_count_devices {
97+ get_nvme_count_devices () {
9898 lspci -D | grep -c -E " Non-Volatile memory controller" || true
9999}
100100
101- function pci_device_is_mounted {
102- local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
103- $SPDK_SETUP_CMD status 2>&1 | grep " $pci_device_address " | grep -q " mount@"
101+ declare -A MOUNTED_PCI_DEVICES
102+ declare -A PCI_DEVICES_WITH_DATA
103+ pci_device_create_cache () {
104+ MOUNTED_PCI_DEVICES=()
105+ PCI_DEVICES_WITH_DATA=()
106+ if check_spdk_setup_cmd; then
107+ local status_output line pci_device_address
108+ status_output=" $( $SPDK_SETUP_CMD status 2>&1 ) "
109+ while read -r line; do
110+ pci_device_address=" ${line%% * } "
111+ if [[ " $pci_device_address " =~ ^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\. [0-9]$ ]]; then
112+ [[ " $line " == * " Active devices: mount@" * ]] && MOUNTED_PCI_DEVICES[" $pci_device_address " ]=1
113+ [[ " $line " == * " Active devices: data@" * ]] && PCI_DEVICES_WITH_DATA[" $pci_device_address " ]=1
114+ fi
115+ done <<< " $status_output"
116+ fi
117+ echo " Cached ${# MOUNTED_PCI_DEVICES[@]} mounted PCI devices"
118+ echo " Cached ${# PCI_DEVICES_WITH_DATA[@]} PCI devices with data"
119+ }
120+
121+ pci_device_is_mounted () {
122+ local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
123+ [[ -v MOUNTED_PCI_DEVICES[$pci_device_address ] ]]
104124}
105125
106- function pci_device_has_data {
107- local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
108- $SPDK_SETUP_CMD status 2>&1 | grep " $pci_device_address " | grep -q " data@ "
126+ pci_device_has_data () {
127+ local pci_device_address=" ${1:? Usage: pci_device_has_data <pci_device_address>} "
128+ [[ -v PCI_DEVICES_WITH_DATA[ $pci_device_address ] ]]
109129}
110130
111- function pci_device_get_numa {
131+ pci_device_get_numa () {
112132 local pci_device=" ${1:? Usage: pci_device_get_numa <pci_device_address>} "
113133 local pci_device_numa_path=" /sys/bus/pci/devices/${pci_device} /numa_node"
114134 cat " ${pci_device_numa_path} "
115135}
116136
117- function nvme_dev_get_first_by_pcie_addr() {
137+ nvme_dev_get_first_by_pcie_addr () {
118138 local pci_device_address=" ${1:? Usage: nvme_dev_get_first_by_pcie_addr <pci_device_address>} "
119139 local nvme_dir=" /sys/bus/pci/devices/$pci_device_address /nvme"
120140 local nvme_device symlink
@@ -133,7 +153,7 @@ function nvme_dev_get_first_by_pcie_addr() {
133153
134154
135155# Calculates --nsze and --ncap for a device so the namespace spans the full usable capacity
136- nvme_calc_full_nsze_ncap () {
156+ nvme_calc_full_nsze_ncap () {
137157 local nvme_device=" ${1:? Usage: nvme_calc_full_nsze_ncap <nvme_device>} "
138158 # Query the NVMe device info for total logical blocks and LBA size
139159 # Prefer tnvmcap, fallback to unvmcap if tnvmcap not found
@@ -170,7 +190,7 @@ nvme_calc_full_nsze_ncap() {
170190 printf -- " --nsze=0x%x --ncap=0x%x\n" " $lba_count " " $lba_count "
171191}
172192
173- function nvme_recreate_namespace {
193+ nvme_recreate_namespace () {
174194# lbaf 0 : ms:0 lbads:9 rp:0x1 (in use) → 512B blocks
175195# lbaf 1 : ms:0 lbads:12 rp:0 → 4096B blocks (4K)
176196# lbaf 2 : ms:8 lbads:9 rp:0x3 → 512B + 8B metadata
@@ -187,7 +207,6 @@ function nvme_recreate_namespace {
187207 local nvme_device_path=" /dev/${nvme_device} "
188208 local nvme_device_ns_path=" ${nvme_device_path} n1"
189209 local nvme_create_ns_opts
190- # echo "Recreating namespace on $nvme_device_path ..."
191210 # Optionally skip delete step
192211 if [[ " $skip_delete " != " true" ]]; then
193212 nvme delete-ns " $nvme_device_path " -n 0x1 || \
@@ -197,17 +216,16 @@ function nvme_recreate_namespace {
197216 else
198217 echo " INFO: Skipping namespace delete on $nvme_device_path "
199218 fi
200- nvme delete-ns " $nvme_device_path " -n 0x1 || \
201- { echo " ERROR: delete the ${nvme_device_path} namespace failed" ; exit 1; }
202219 nvme reset " $nvme_device_path " || \
203220 { echo " ERROR: reset the ${nvme_device_path} device failed" ; exit 1; }
221+
204222 nvme_create_ns_opts=$( nvme_calc_full_nsze_ncap " ${nvme_device_path} " )
205223 nvme create-ns " $nvme_device_path " $nvme_create_ns_opts --flbas=0 || \
206224 { echo " ERROR: create the ${nvme_device_path} namespace failed" ; exit 1; }
207225 nvme attach-ns " $nvme_device_path " -n 0x1 -c 0x41 || \
208226 { echo " ERROR: attach the ${nvme_device_path} namespace failed" ; exit 1; }
209227 # Wait up to 5 seconds for device node to appear
210- for i in {1..10 }; do
228+ for i in {1..5 }; do
211229 if [ -b " $nvme_device_ns_path " ]; then
212230 break
213231 fi
@@ -223,11 +241,10 @@ function nvme_recreate_namespace {
223241 nvme reset " $nvme_device_path " || \
224242 { echo " ERROR: reset the ${nvme_device_path} namespace failed" ; exit 1; }
225243 nvme id-ns " $nvme_device_ns_path " | grep -E " lbaf|nvmcap|nsze|ncap|nuse"
226- # echo "Recreating namespace on ${nvme_device_ns_path} done"
227244}
228245
229246# Format ext4 on each element of array after "daos_reserved" is reached.
230- function mkfs_on_nvme_over_limit {
247+ mkfs_on_nvme_over_limit () {
231248 local daos_nvme_numa_limit=" ${1:? Usage: mkfs_on_nvme_over_limit <daos_nvme_numa_limit> <nvme_pci_address_array>} "
232249 shift
233250 local nvme_pci_address_array=(" $@ " )
@@ -260,22 +277,24 @@ function mkfs_on_nvme_over_limit {
260277 done
261278}
262279
263- function nvme_setup {
280+ nvme_setup () {
264281 local daos_nvme_numa_limit=" ${1:- ?Usage: nvme_setup <daos_nvme_numa_limit>} "
265282 local numa0_pci_devices=()
266283 local numa1_pci_devices=()
267284 local all_numas_pci_devices
268285 local nvme_count nvme_pcie_address_all nvme_pci_address numa_node
269286
270287 nvme_count=$( get_nvme_count_devices)
271- if [ " $nvme_count " -le 1 ]; then
288+ if [ " $nvme_count " -le 1 ]; then # Expect at least 2 NVMe devices for proper setup
272289 return 0
273290 fi
274291
275292 if ! check_spdk_setup_cmd; then
276293 exit 1
277294 fi
278-
295+
296+ pci_device_create_cache
297+
279298 nvme_pcie_address_all=$( lspci -D | awk ' /Non-Volatile memory controller/{print $1}' | sort)
280299
281300 for nvme_pci_address in $nvme_pcie_address_all ; do
@@ -284,8 +303,6 @@ function nvme_setup {
284303 echo " Skip already mounted namespace $nvme_pci_address "
285304 continue
286305 fi
287- # echo "Binding $nvme_pci_address"
288- # echo "$nvme_pci_address" | sudo tee /sys/bus/pci/drivers/nvme/bind
289306 numa_node=" $( pci_device_get_numa " $nvme_pci_address " ) "
290307 if [ " $numa_node " -eq 0 ]; then
291308 numa0_pci_devices+=(" $nvme_pci_address " )
0 commit comments