8585# "Failed to initialize SSD: [xxxx:xx:xx.x]' when DAOS engines are started.
8686SPDK_SETUP_CMD=" /usr/share/daos/spdk/scripts/setup.sh"
8787
88- function check_spdk_setup_cmd {
88+ check_spdk_setup_cmd () {
8989 if [ ! -d " $( dirname " $SPDK_SETUP_CMD " ) " ] || [ ! -f " $SPDK_SETUP_CMD " ]; then
9090 echo -n " Required SPDK scripts directory $( dirname " $SPDK_SETUP_CMD " ) "
9191 echo " or setup.sh not found!"
@@ -94,27 +94,47 @@ function check_spdk_setup_cmd {
9494 return 0
9595}
9696
97- function get_nvme_count_devices {
97+ get_nvme_count_devices () {
9898 lspci -D | grep -c -E " Non-Volatile memory controller" || true
9999}
100100
101- function pci_device_is_mounted {
102- local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
103- $SPDK_SETUP_CMD status 2>&1 | grep " $pci_device_address " | grep -q " mount@"
101+ declare -A MOUNTED_PCI_DEVICES
102+ declare -A PCI_DEVICES_WITH_DATA
103+ pci_device_create_cache () {
104+ MOUNTED_PCI_DEVICES=()
105+ PCI_DEVICES_WITH_DATA=()
106+ if check_spdk_setup_cmd; then
107+ local status_output line pci_device_address
108+ status_output=" $( $SPDK_SETUP_CMD status 2>&1 ) "
109+ while read -r line; do
110+ pci_device_address=" ${line%% * } "
111+ if [[ " $pci_device_address " =~ ^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\. [0-9]$ ]]; then
112+ [[ " $line " == * " Active devices: mount@" * ]] && MOUNTED_PCI_DEVICES[" $pci_device_address " ]=1
113+ [[ " $line " == * " Active devices: data@" * ]] && PCI_DEVICES_WITH_DATA[" $pci_device_address " ]=1
114+ fi
115+ done <<< " $status_output"
116+ fi
117+ echo " Cached ${# MOUNTED_PCI_DEVICES[@]} mounted PCI devices"
118+ echo " Cached ${# PCI_DEVICES_WITH_DATA[@]} PCI devices with data"
119+ }
120+
121+ pci_device_is_mounted () {
122+ local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
123+ [[ -v MOUNTED_PCI_DEVICES[$pci_device_address ] ]]
104124}
105125
106- function pci_device_has_data {
107- local pci_device_address=" ${1:? Usage: pci_device_is_mounted <pci_device_address>} "
108- $SPDK_SETUP_CMD status 2>&1 | grep " $pci_device_address " | grep -q " data@ "
126+ pci_device_has_data () {
127+ local pci_device_address=" ${1:? Usage: pci_device_has_data <pci_device_address>} "
128+ [[ -v PCI_DEVICES_WITH_DATA[ $pci_device_address ] ]]
109129}
110130
111- function pci_device_get_numa {
131+ pci_device_get_numa () {
112132 local pci_device=" ${1:? Usage: pci_device_get_numa <pci_device_address>} "
113133 local pci_device_numa_path=" /sys/bus/pci/devices/${pci_device} /numa_node"
114134 cat " ${pci_device_numa_path} "
115135}
116136
117- function nvme_dev_get_first_by_pcie_addr() {
137+ nvme_dev_get_first_by_pcie_addr () {
118138 local pci_device_address=" ${1:? Usage: nvme_dev_get_first_by_pcie_addr <pci_device_address>} "
119139 local nvme_dir=" /sys/bus/pci/devices/$pci_device_address /nvme"
120140 local nvme_device symlink
@@ -131,9 +151,8 @@ function nvme_dev_get_first_by_pcie_addr() {
131151 fi
132152}
133153
134-
135154# Calculates --nsze and --ncap for a device so the namespace spans the full usable capacity
136- nvme_calc_full_nsze_ncap () {
155+ nvme_calc_full_nsze_ncap () {
137156 local nvme_device=" ${1:? Usage: nvme_calc_full_nsze_ncap <nvme_device>} "
138157 # Query the NVMe device info for total logical blocks and LBA size
139158 # Prefer tnvmcap, fallback to unvmcap if tnvmcap not found
@@ -151,15 +170,15 @@ nvme_calc_full_nsze_ncap() {
151170 fi
152171
153172 # Extract the size of a logical block (lba size), usually from nvme id-ns or id-ctrl
154- local lbads id_ns lba_bytes lba_count
173+ local lbads= " " id_ns= " " lba_bytes= " " lba_count= " "
155174 id_ns=$( nvme id-ns " ${nvme_device} n1" 2> /dev/null || true)
156175 if [[ -n " $id_ns " ]]; then
157176 # Look for "lbads" line in id-ns output
158177 lbads=$( echo " $id_ns " | awk -F: ' /lbads/ {gsub(/[^0-9]/,"",$2); print $2; exit}' )
159178 fi
160179 if [[ -z " $lbads " ]]; then
161180 # fallback: Try to get LBA (logical block addressing) from id-ctrl if possible, else default to 512
162- lbads=9 # Default for 512 bytes (2^9 )
181+ lbads=12 # Default for 4096 bytes (2^12 = 4096 )
163182 fi
164183 lba_bytes=$(( 2 ** lbads))
165184
@@ -170,7 +189,7 @@ nvme_calc_full_nsze_ncap() {
170189 printf -- " --nsze=0x%x --ncap=0x%x\n" " $lba_count " " $lba_count "
171190}
172191
173- function nvme_recreate_namespace {
192+ nvme_recreate_namespace () {
174193# lbaf 0 : ms:0 lbads:9 rp:0x1 (in use) → 512B blocks
175194# lbaf 1 : ms:0 lbads:12 rp:0 → 4096B blocks (4K)
176195# lbaf 2 : ms:8 lbads:9 rp:0x3 → 512B + 8B metadata
@@ -187,7 +206,6 @@ function nvme_recreate_namespace {
187206 local nvme_device_path=" /dev/${nvme_device} "
188207 local nvme_device_ns_path=" ${nvme_device_path} n1"
189208 local nvme_create_ns_opts
190- # echo "Recreating namespace on $nvme_device_path ..."
191209 # Optionally skip delete step
192210 if [[ " $skip_delete " != " true" ]]; then
193211 nvme delete-ns " $nvme_device_path " -n 0x1 || \
@@ -197,17 +215,16 @@ function nvme_recreate_namespace {
197215 else
198216 echo " INFO: Skipping namespace delete on $nvme_device_path "
199217 fi
200- nvme delete-ns " $nvme_device_path " -n 0x1 || \
201- { echo " ERROR: delete the ${nvme_device_path} namespace failed" ; exit 1; }
202218 nvme reset " $nvme_device_path " || \
203219 { echo " ERROR: reset the ${nvme_device_path} device failed" ; exit 1; }
220+
204221 nvme_create_ns_opts=$( nvme_calc_full_nsze_ncap " ${nvme_device_path} " )
205222 nvme create-ns " $nvme_device_path " $nvme_create_ns_opts --flbas=0 || \
206223 { echo " ERROR: create the ${nvme_device_path} namespace failed" ; exit 1; }
207224 nvme attach-ns " $nvme_device_path " -n 0x1 -c 0x41 || \
208225 { echo " ERROR: attach the ${nvme_device_path} namespace failed" ; exit 1; }
209226 # Wait up to 5 seconds for device node to appear
210- for i in {1..10 }; do
227+ for i in {1..5 }; do
211228 if [ -b " $nvme_device_ns_path " ]; then
212229 break
213230 fi
@@ -223,11 +240,10 @@ function nvme_recreate_namespace {
223240 nvme reset " $nvme_device_path " || \
224241 { echo " ERROR: reset the ${nvme_device_path} namespace failed" ; exit 1; }
225242 nvme id-ns " $nvme_device_ns_path " | grep -E " lbaf|nvmcap|nsze|ncap|nuse"
226- # echo "Recreating namespace on ${nvme_device_ns_path} done"
227243}
228244
229245# Format ext4 on each element of array after "daos_reserved" is reached.
230- function mkfs_on_nvme_over_limit {
246+ mkfs_on_nvme_over_limit () {
231247 local daos_nvme_numa_limit=" ${1:? Usage: mkfs_on_nvme_over_limit <daos_nvme_numa_limit> <nvme_pci_address_array>} "
232248 shift
233249 local nvme_pci_address_array=(" $@ " )
@@ -260,22 +276,26 @@ function mkfs_on_nvme_over_limit {
260276 done
261277}
262278
263- function nvme_setup {
279+ nvme_setup () {
264280 local daos_nvme_numa_limit=" ${1:- ?Usage: nvme_setup <daos_nvme_numa_limit>} "
265281 local numa0_pci_devices=()
266282 local numa1_pci_devices=()
267283 local all_numas_pci_devices
268284 local nvme_count nvme_pcie_address_all nvme_pci_address numa_node
269285
270286 nvme_count=$( get_nvme_count_devices)
271- if [ " $nvme_count " -le 1 ]; then
287+ if [ " $nvme_count " -le 1 ]; then # Expect at least 2 NVMe devices for proper setup
272288 return 0
273289 fi
274290
275291 if ! check_spdk_setup_cmd; then
276292 exit 1
277293 fi
278294
295+ set +x
296+ pci_device_create_cache
297+ set -x
298+
279299 nvme_pcie_address_all=$( lspci -D | awk ' /Non-Volatile memory controller/{print $1}' | sort)
280300
281301 for nvme_pci_address in $nvme_pcie_address_all ; do
@@ -284,8 +304,6 @@ function nvme_setup {
284304 echo " Skip already mounted namespace $nvme_pci_address "
285305 continue
286306 fi
287- # echo "Binding $nvme_pci_address"
288- # echo "$nvme_pci_address" | sudo tee /sys/bus/pci/drivers/nvme/bind
289307 numa_node=" $( pci_device_get_numa " $nvme_pci_address " ) "
290308 if [ " $numa_node " -eq 0 ]; then
291309 numa0_pci_devices+=(" $nvme_pci_address " )
@@ -311,7 +329,7 @@ function nvme_setup {
311329function spdk_setup_status {
312330 set +e
313331 if check_spdk_setup_cmd; then
314- sudo " $SPDK_SETUP_CMD " status
332+ " $SPDK_SETUP_CMD " status
315333 fi
316334 set -e
317335}
0 commit comments