Skip to content

Commit 52761ff

Browse files
committed
[TransferEngine]: Fix open namespace problem in NVMeoFController::rescan
1 parent c041d61 commit 52761ff

File tree

2 files changed

+154
-65
lines changed

2 files changed

+154
-65
lines changed

mooncake-transfer-engine/include/transport/nvmeof_generic_transport/nvmeof_initiator.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@ class NVMeoFController : public std::enable_shared_from_this<NVMeoFController> {
8181
const std::string &traddr, const std::string &trsvcid,
8282
const std::string &subnqn);
8383

84-
nvme_ctrl_t findCtrl();
85-
8684
int connect();
8785

8886
int disconnect();

mooncake-transfer-engine/src/transport/nvmeof_generic_transport/nvmeof_initiator.cpp

Lines changed: 154 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,74 @@
1515
#include "transport/nvmeof_generic_transport/nvmeof_initiator.h"
1616

1717
#include <fcntl.h>
18+
#include <unordered_set>
1819

1920
namespace mooncake {
21+
static constexpr auto kMaxRescanDuration = std::chrono::seconds(15);
22+
23+
static nvme_ctrl_t nvme_find_ctrl(nvme_root_t root, nvme_host_t host,
24+
const std::string &trtype,
25+
const std::string &traddr,
26+
const std::string &trsvcid,
27+
const std::string &subnqn) {
28+
nvme_subsystem_t subsys;
29+
nvme_ctrl_t ctrl;
30+
31+
// Scan the topology first.
32+
nvme_scan_topology(root, NULL, NULL);
33+
34+
nvme_for_each_subsystem(host, subsys) {
35+
nvme_subsystem_for_each_ctrl(subsys, ctrl) {
36+
if (strcasecmp(nvme_ctrl_get_transport(ctrl), trtype.c_str())) {
37+
continue;
38+
}
39+
40+
if (strcmp(nvme_ctrl_get_traddr(ctrl), traddr.c_str())) {
41+
continue;
42+
}
43+
44+
if (strcmp(nvme_ctrl_get_trsvcid(ctrl), trsvcid.c_str())) {
45+
continue;
46+
}
47+
48+
if (strcmp(nvme_ctrl_get_subsysnqn(ctrl), subnqn.c_str())) {
49+
continue;
50+
}
51+
52+
return ctrl;
53+
}
54+
}
55+
56+
return nullptr;
57+
}
58+
59+
static int nvme_get_active_ns_list(nvme_ctrl_t ctrl,
60+
std::unordered_set<uint32_t> &ns_list) {
61+
struct nvme_ns_list ns_list_ = {0};
62+
63+
int fd = nvme_ctrl_get_fd(ctrl);
64+
if (fd < 0) {
65+
LOG(ERROR) << "Invalid fd " << fd << " of controller "
66+
<< nvme_ctrl_get_subsysnqn(ctrl);
67+
return -EINVAL;
68+
}
69+
70+
int rc = nvme_identify_active_ns_list(fd, 0, &ns_list_);
71+
if (rc != 0) {
72+
LOG(ERROR) << "Failed to identify active ns list of controller "
73+
<< nvme_ctrl_get_subsysnqn(ctrl) << ", rc=" << rc;
74+
return -EIO;
75+
}
76+
77+
for (size_t i = 0; i < NVME_ID_NS_LIST_MAX; i++) {
78+
if (ns_list_.ns[i] > 0) {
79+
ns_list.insert(ns_list_.ns[i]);
80+
}
81+
}
82+
83+
return 0;
84+
}
85+
2086
std::shared_ptr<NVMeoFInitiator> NVMeoFInitiator::create(bool direct_io) {
2187
auto initiator =
2288
std::shared_ptr<NVMeoFInitiator>(new NVMeoFInitiator(direct_io));
@@ -104,40 +170,9 @@ NVMeoFController::~NVMeoFController() {
104170
}
105171
}
106172

107-
nvme_ctrl_t NVMeoFController::findCtrl() {
108-
nvme_subsystem_t subsys;
109-
nvme_ctrl_t ctrl;
110-
111-
// Scan the topology first.
112-
nvme_scan_topology(initiator->root, NULL, NULL);
113-
114-
nvme_for_each_subsystem(initiator->host, subsys) {
115-
nvme_subsystem_for_each_ctrl(subsys, ctrl) {
116-
if (strcasecmp(nvme_ctrl_get_transport(ctrl), trtype.c_str())) {
117-
continue;
118-
}
119-
120-
if (strcmp(nvme_ctrl_get_traddr(ctrl), traddr.c_str())) {
121-
continue;
122-
}
123-
124-
if (strcmp(nvme_ctrl_get_trsvcid(ctrl), trsvcid.c_str())) {
125-
continue;
126-
}
127-
128-
if (strcmp(nvme_ctrl_get_subsysnqn(ctrl), subnqn.c_str())) {
129-
continue;
130-
}
131-
132-
return ctrl;
133-
}
134-
}
135-
136-
return nullptr;
137-
}
138-
139173
int NVMeoFController::connect() {
140-
ctrl = findCtrl();
174+
ctrl = nvme_find_ctrl(initiator->root, initiator->host, trtype, traddr,
175+
trsvcid, subnqn);
141176
if (ctrl != nullptr) {
142177
// The controller has been connected.
143178
rescan();
@@ -161,9 +196,6 @@ int NVMeoFController::connect() {
161196
// We connected the controller, so we are responsible for disconnecting it.
162197
should_disconnect_ctrl = true;
163198

164-
// Wait a moment to ensure all namespaces are attached.
165-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
166-
167199
// Trigger rescan to open namespaces.
168200
rescan();
169201

@@ -176,41 +208,100 @@ void NVMeoFController::rescan() {
176208
return;
177209
}
178210

179-
// Rescan the topology.
180-
nvme_scan_topology(initiator->root, NULL, NULL);
181-
182211
RWSpinlock::WriteGuard guard(ns_lock);
183-
nvme_ns_t ns;
184-
char ns_dev[64];
185-
186-
nvme_ctrl_for_each_ns(ctrl, ns) {
187-
auto nsid = static_cast<NamespaceID>(nvme_ns_get_nsid(ns));
188-
auto it = namespaces.find(nsid);
189-
if (it != namespaces.end() && it->second.fd >= 0) {
190-
// Namespace has been open.
191-
continue;
212+
const auto rescan_timeout =
213+
std::chrono::steady_clock::now() + kMaxRescanDuration;
214+
215+
while (true) {
216+
// Retrieve active namespace list via NVMe Identify command.
217+
std::unordered_set<uint32_t> active_ns;
218+
int rc = nvme_get_active_ns_list(ctrl, active_ns);
219+
if (rc != 0) {
220+
LOG(ERROR) << "Failed to get active ns list of controller "
221+
<< nvme_ctrl_get_name(ctrl) << ", rc=" << rc;
222+
break;
223+
}
224+
225+
// Remove invalid namespaces.
226+
auto it = namespaces.begin();
227+
while (it != namespaces.end()) {
228+
if (!active_ns.contains(it->first)) {
229+
it = namespaces.erase(it);
230+
} else {
231+
it++;
232+
}
192233
}
193234

194-
const char *name = nvme_ns_get_name(ns);
195-
int rc = snprintf(ns_dev, sizeof(ns_dev), "/dev/%s", name);
196-
if (rc <= 0) {
197-
LOG(ERROR) << "Invalid namespace device name " << name;
198-
continue;
235+
// Scan controller sysfs directory to get attached namespaces.
236+
struct dirent **ns_dirents = NULL;
237+
int num_ns_dirents = nvme_scan_ctrl_namespaces(ctrl, &ns_dirents);
238+
if (num_ns_dirents < 0) {
239+
LOG(ERROR) << "Failed to scan namespaces of controller "
240+
<< nvme_ctrl_get_name(ctrl) << ", errno=" << errno;
241+
break;
199242
}
200243

201-
int flags = O_RDWR;
202-
if (initiator->direct_io) flags |= O_DIRECT;
244+
// Open namespace block devices.
245+
for (int i = 0; i < num_ns_dirents; i++) {
246+
char ns_dev[256];
247+
rc = snprintf(ns_dev, sizeof(ns_dev), "/dev/%s",
248+
ns_dirents[i]->d_name);
249+
if (rc <= 0) {
250+
LOG(ERROR) << "Invalid namespace device name "
251+
<< ns_dirents[i]->d_name;
252+
continue;
253+
}
254+
255+
int flags = O_RDWR;
256+
if (initiator->direct_io) flags |= O_DIRECT;
257+
258+
int fd = open(ns_dev, flags);
259+
if (fd < 0) {
260+
LOG(ERROR) << "Failed to open nvme namespace " << ns_dev
261+
<< ", errno=" << errno;
262+
continue;
263+
}
264+
265+
uint32_t nsid;
266+
rc = nvme_get_nsid(fd, &nsid);
267+
if (rc != 0) {
268+
LOG(ERROR) << "Failed to get nsid of namespace "
269+
<< ns_dirents[i]->d_name << ", errno=" << errno;
270+
close(fd);
271+
continue;
272+
}
273+
274+
if (namespaces.contains(nsid) && namespaces[nsid].fd >= 0) {
275+
// The namespace has been open.
276+
close(fd);
277+
continue;
278+
}
279+
280+
LOG(INFO) << "Added namespace " << nsid << " of controller "
281+
<< nvme_ctrl_get_name(ctrl);
282+
namespaces[nsid] = {nsid, fd};
283+
}
284+
285+
// Free dirents.
286+
for (int i = 0; i < num_ns_dirents; i++) {
287+
free(ns_dirents[i]);
288+
}
289+
free(ns_dirents);
290+
291+
// Check if all active namespaces are open.
292+
if (namespaces.size() == active_ns.size()) {
293+
break;
294+
}
203295

204-
int fd = open(ns_dev, flags);
205-
if (fd < 0) {
206-
LOG(ERROR) << "Failed to open nvme namespace " << ns_dev
207-
<< ", errno=" << errno;
208-
continue;
296+
if (std::chrono::steady_clock::now() >= rescan_timeout) {
297+
LOG(ERROR) << "Timedout to wait for namespaces of " << subnqn
298+
<< " to be attached, expected " << active_ns.size()
299+
<< ", attached " << namespaces.size();
300+
break;
209301
}
210302

211-
LOG(INFO) << "Added namespace " << nsid << " to controller "
212-
<< nvme_ctrl_get_name(ctrl);
213-
namespaces[nsid] = {nsid, fd};
303+
// Wait a moment for namespaces to be attached.
304+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
214305
}
215306
}
216307

0 commit comments

Comments
 (0)