Skip to content

Commit 5d63e80

Browse files
POSIX backend: LINUXAIO API introduced (#885)
* plugins/posix: getQueueType: error handling fixed If URING is not supported, the getQueueType() API now returns the correct return value (queue_t::UNSUPPORTED). The patch also cleans up the code by removing an unnecessary ifdef. Signed-off-by: Anton Nayshtut <[email protected]> * meson.build: POSIX AIO dependency fixed The current Linux POSIX AIO implementation is provided in user space by glibc. libaio, on the other hand, is the Linux kernel's AIO access library. This patch fixes the incorrect dependency. It will be restored as a dependency for the Linux AIO plugin in an upcoming patch. Signed-off-by: Anton Nayshtut <[email protected]> * plugins/posix: Linux AIO backend introduced Linux AIO, although only available on Linux, is known for much better performance than POSIX AIO. This patch implements a Linux AIO backend and integrates it into the NIXL build system so it is only built when the platform supports it. Signed-off-by: Anton Nayshtut <[email protected]> * benchmark/nixlbench: POSIX backend – LINUXAIO API This patch adds support for a POSIX LINUXAIO API using the Linux AIO plugin. Signed-off-by: Anton Nayshtut <[email protected]> * POSIX plugin: make Linux AIO the default This patch makes the Linux AIO API the default for the POSIX backend. The AIO API now corresponds to the Linux AIO. The POSIX AIO API can now be selected explicitly by specifying API type POSIXAIO. Signed-off-by: Anton Nayshtut <[email protected]> --------- Signed-off-by: Anton Nayshtut <[email protected]> Co-authored-by: Adit Ranadive <[email protected]>
1 parent 9b19467 commit 5d63e80

File tree

14 files changed

+308
-29
lines changed

14 files changed

+308
-29
lines changed

benchmark/kvbench/commands/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def nixl_bench_args(func):
170170
func = click.option(
171171
"--posix_api_type",
172172
type=str,
173-
help="API type for POSIX operations [AIO, URING] (only used with POSIX backend",
173+
help="API type for POSIX operations [AIO, URING, POSIXAIO] (only used with POSIX backend",
174174
)(func)
175175
func = click.option(
176176
"--enable_vmm",

benchmark/nixlbench/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ sudo systemctl start etcd && sudo systemctl enable etcd
481481

482482
**POSIX Backend:**
483483
```
484-
--posix_api_type TYPE # API type for POSIX operations [AIO, URING] (default: AIO)
484+
--posix_api_type TYPE # API type for POSIX operations [AIO, URING, POSIXAIO] (default: AIO)
485485
```
486486

487487
**GPUNETIO Backend:**

benchmark/nixlbench/src/utils/utils.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,10 @@ DEFINE_string(etcd_endpoints,
102102
"ETCD server endpoints for communication (optional for storage backends)");
103103

104104
// POSIX options - only used when backend is POSIX
105-
DEFINE_string (posix_api_type,
106-
XFERBENCH_POSIX_API_AIO,
107-
"API type for POSIX operations [AIO, URING] (only used with POSIX backend)");
105+
DEFINE_string(
106+
posix_api_type,
107+
XFERBENCH_POSIX_API_AIO,
108+
"API type for POSIX operations [AIO, URING, POSIXAIO] (only used with POSIX backend)");
108109

109110
// DOCA GPUNetIO options - only used when backend is DOCA GPUNetIO
110111
DEFINE_string(gpunetio_device_list, "0", "Comma-separated GPU CUDA device id to use for \
@@ -239,9 +240,10 @@ xferBenchConfig::loadFromFlags() {
239240

240241
// Validate POSIX API type
241242
if (posix_api_type != XFERBENCH_POSIX_API_AIO &&
242-
posix_api_type != XFERBENCH_POSIX_API_URING) {
243+
posix_api_type != XFERBENCH_POSIX_API_URING &&
244+
posix_api_type != XFERBENCH_POSIX_API_POSIXAIO) {
243245
std::cerr << "Invalid POSIX API type: " << posix_api_type
244-
<< ". Must be one of [AIO, URING]" << std::endl;
246+
<< ". Must be one of [AIO, URING, POSIXAIO]" << std::endl;
245247
return -1;
246248
}
247249
}
@@ -450,7 +452,7 @@ xferBenchConfig::printConfig() {
450452

451453
// Print POSIX options if backend is POSIX
452454
if (backend == XFERBENCH_BACKEND_POSIX) {
453-
printOption ("POSIX API type (--posix_api_type=[AIO,URING])", posix_api_type);
455+
printOption("POSIX API type (--posix_api_type=[AIO,URING,POSIXAIO])", posix_api_type);
454456
}
455457

456458
// Print OBJ options if backend is OBJ

benchmark/nixlbench/src/utils/utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
// POSIX API types
8181
#define XFERBENCH_POSIX_API_AIO "AIO"
8282
#define XFERBENCH_POSIX_API_URING "URING"
83+
#define XFERBENCH_POSIX_API_POSIXAIO "POSIXAIO"
8384

8485
// OBJ S3 scheme types
8586
#define XFERBENCH_OBJ_SCHEME_HTTP "http"

benchmark/nixlbench/src/worker/nixl/nixl_worker.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,15 @@ xferBenchNixlWorker::xferBenchNixlWorker(int *argc, char ***argv, std::vector<st
181181
if (xferBenchConfig::posix_api_type == XFERBENCH_POSIX_API_AIO) {
182182
backend_params["use_aio"] = "true";
183183
backend_params["use_uring"] = "false";
184+
backend_params["use_posix_aio"] = "false";
184185
} else if (xferBenchConfig::posix_api_type == XFERBENCH_POSIX_API_URING) {
185186
backend_params["use_aio"] = "false";
186187
backend_params["use_uring"] = "true";
188+
backend_params["use_posix_aio"] = "false";
189+
} else if (xferBenchConfig::posix_api_type == XFERBENCH_POSIX_API_POSIXAIO) {
190+
backend_params["use_aio"] = "false";
191+
backend_params["use_uring"] = "false";
192+
backend_params["use_posix_aio"] = "true";
187193
}
188194
std::cout << "POSIX backend with API type: " << xferBenchConfig::posix_api_type
189195
<< std::endl;

meson.build

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ dl_dep = cpp.find_library('dl', required: true)
2929
rt_dep = cpp.find_library('rt', required: true)
3030
thread_dep = dependency('threads')
3131

32-
# Check for libaio (for POSIX plugin and test)
32+
# Check for POSIX AIO support for the POSIX plugin and test (it is part of GLIBC)
33+
posix_aio = cpp.has_function('aio_cancel', prefix: '#include <aio.h>')
34+
35+
# Check for libaio (for LINUX AIO plugin and test)
3336
aio_dep = cpp.find_library('aio', required: false)
34-
posix_aio = false
37+
linux_aio = false
3538
if aio_dep.found()
36-
posix_aio = cpp.has_function('aio_cancel', prefix: '#include <aio.h>')
39+
linux_aio = cpp.has_function('io_setup', prefix: '#include <libaio.h>', dependencies: [aio_dep])
3740
endif
3841

3942
# Forced to ignore this error due to:
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#include "linux_aio_queue.h"
19+
#include "posix_backend.h"
20+
#include <errno.h>
21+
#include "common/nixl_log.h"
22+
#include <string.h>
23+
#include <time.h>
24+
#include <stdexcept>
25+
26+
linuxAioQueue::linuxAioQueue(int num_entries, nixl_xfer_op_t operation)
27+
: io_ctx(io_context_t()),
28+
ios(num_entries),
29+
num_entries(num_entries),
30+
num_ios_to_submit(0),
31+
completed(num_entries),
32+
num_completed(0),
33+
operation(operation) {
34+
if (num_entries <= 0) {
35+
throw std::runtime_error("Invalid number of entries for AIO queue");
36+
}
37+
38+
if (operation != NIXL_READ && operation != NIXL_WRITE) {
39+
throw std::runtime_error("Invalid operation for AIO queue");
40+
}
41+
42+
int res = io_queue_init(num_entries, &io_ctx);
43+
if (res) {
44+
throw std::runtime_error("io_queue_init (" + std::to_string(num_entries) +
45+
") failed with " + std::to_string(res));
46+
}
47+
48+
ios_to_submit.assign(num_entries, nullptr);
49+
}
50+
51+
linuxAioQueue::~linuxAioQueue() {
52+
io_queue_release(io_ctx);
53+
}
54+
55+
nixl_status_t
56+
linuxAioQueue::submit(const nixl_meta_dlist_t &, const nixl_meta_dlist_t &) {
57+
if (!num_ios_to_submit) {
58+
return NIXL_IN_PROG;
59+
}
60+
61+
int ret = io_submit(io_ctx, num_ios_to_submit, ios_to_submit.data());
62+
if (ret != num_ios_to_submit) {
63+
if (ret < 0) {
64+
NIXL_ERROR << absl::StrFormat("linux_aio submit failed: %s", nixl_strerror(-ret));
65+
} else {
66+
NIXL_ERROR << absl::StrFormat(
67+
"linux_aio submit failed. Partial submission: %d/%d", num_ios_to_submit, ret);
68+
}
69+
return NIXL_ERR_BACKEND;
70+
}
71+
72+
num_completed = 0;
73+
num_ios_to_submit = 0;
74+
return NIXL_IN_PROG;
75+
}
76+
77+
nixl_status_t
78+
linuxAioQueue::checkCompleted() {
79+
if (num_completed == num_entries) {
80+
return NIXL_SUCCESS;
81+
}
82+
83+
struct io_event events[32];
84+
int rc;
85+
struct timespec timeout = {0, 0};
86+
87+
rc = io_getevents(io_ctx, 0, 32, events, &timeout);
88+
if (rc < 0) {
89+
NIXL_ERROR << "io_getevents error: " << rc;
90+
return NIXL_ERR_BACKEND;
91+
}
92+
93+
for (int i = 0; i < rc; i++) {
94+
struct iocb *io = events[i].obj;
95+
size_t idx = (size_t)io->data;
96+
97+
ios_to_submit[idx] = nullptr; // Mark as completed
98+
99+
if (events[i].res < 0) {
100+
NIXL_ERROR << "AIO operation failed: " << events[i].res;
101+
return NIXL_ERR_BACKEND;
102+
}
103+
}
104+
105+
num_completed += rc;
106+
107+
return (num_completed == num_entries) ? NIXL_SUCCESS : NIXL_IN_PROG;
108+
}
109+
110+
nixl_status_t
111+
linuxAioQueue::prepIO(int fd, void *buf, size_t len, off_t offset) {
112+
if (num_ios_to_submit == num_entries) {
113+
NIXL_ERROR << "No available IOs";
114+
return NIXL_ERR_BACKEND;
115+
}
116+
117+
// Check if file descriptor is valid
118+
if (fd < 0) {
119+
NIXL_ERROR << "Invalid file descriptor provided to prepareIO";
120+
return NIXL_ERR_BACKEND;
121+
}
122+
123+
// Check buffer and length
124+
if (!buf || len == 0) {
125+
NIXL_ERROR << "Invalid buffer or length provided to prepareIO";
126+
return NIXL_ERR_BACKEND;
127+
}
128+
129+
int idx = num_ios_to_submit;
130+
auto io = &ios[idx];
131+
132+
if (operation == NIXL_READ) {
133+
io_prep_pread(io, fd, buf, len, offset);
134+
} else {
135+
io_prep_pwrite(io, fd, buf, len, offset);
136+
}
137+
138+
ios_to_submit[idx] = io;
139+
io->data = (void *)(uintptr_t)idx;
140+
num_ios_to_submit++;
141+
142+
return NIXL_SUCCESS;
143+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#ifndef LINUXAIO_QUEUE_H
19+
#define LINUXAIO_QUEUE_H
20+
21+
#include <vector>
22+
#include <libaio.h>
23+
#include "posix_queue.h"
24+
25+
// Forward declare Error class
26+
class nixlPosixBackendReqH;
27+
28+
class linuxAioQueue : public nixlPosixQueue {
29+
private:
30+
io_context_t io_ctx; // I/O context
31+
std::vector<struct iocb> ios; // Array of I/Os
32+
int num_entries; // Total number of entries expected
33+
std::vector<struct iocb *> ios_to_submit; // Array of I/Os to submit
34+
int num_ios_to_submit; // Total number of entries to submit
35+
std::vector<bool> completed; // Track completed I/Os
36+
int num_completed; // Number of completed operations
37+
nixl_xfer_op_t operation; // Whether this is a read operation
38+
39+
// Delete copy and move operations
40+
linuxAioQueue(const linuxAioQueue &) = delete;
41+
linuxAioQueue &
42+
operator=(const linuxAioQueue &) = delete;
43+
linuxAioQueue(linuxAioQueue &&) = delete;
44+
linuxAioQueue &
45+
operator=(linuxAioQueue &&) = delete;
46+
47+
public:
48+
linuxAioQueue(int num_entries, nixl_xfer_op_t operation);
49+
~linuxAioQueue();
50+
nixl_status_t
51+
submit(const nixl_meta_dlist_t &, const nixl_meta_dlist_t &) override;
52+
nixl_status_t
53+
checkCompleted() override;
54+
nixl_status_t
55+
prepIO(int fd, void *buf, size_t len, off_t offset) override;
56+
};
57+
58+
#endif // LINUXAIO_QUEUE_H

src/plugins/posix/meson.build

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ else
5252
message('liburing not found, building with AIO support only')
5353
endif
5454

55+
if linux_aio
56+
compile_defs += ['-DHAVE_LINUXAIO']
57+
posix_sources += ['linux_aio_queue.cpp']
58+
plugin_link_args += ['-laio']
59+
message('Linux AIO found, adding Linux AIO support')
60+
else
61+
message('Linux AIO not found, building with POSIX AIO support only')
62+
endif
63+
5564
if 'POSIX' in static_plugins
5665
posix_backend_lib = static_library('POSIX',
5766
posix_sources,

src/plugins/posix/posix_backend.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ namespace {
7171
switch (type) {
7272
case queue_t::AIO: return "AIO";
7373
case queue_t::URING: return "URING";
74+
case queue_t::POSIXAIO:
75+
return "POSIXAIO";
7476
case queue_t::UNSUPPORTED: return "UNSUPPORTED";
7577
default: return "UNKNOWN";
7678
}
@@ -85,6 +87,10 @@ namespace {
8587
if (custom_params->count("use_aio") > 0) {
8688
const auto& value = custom_params->at("use_aio");
8789
if (value == "true" || value == "1") {
90+
if (!QueueFactory::isLinuxAioAvailable()) {
91+
NIXL_ERROR << "linux_aio backend requested but not available at runtime";
92+
return queue_t::UNSUPPORTED;
93+
}
8894
return queue_t::AIO;
8995
}
9096
}
@@ -93,19 +99,30 @@ namespace {
9399
if (custom_params->count("use_uring") > 0) {
94100
const auto& value = custom_params->at("use_uring");
95101
if (value == "true" || value == "1") {
96-
#ifndef HAVE_LIBURING
97-
NIXL_ERROR << "io_uring backend requested but not available - not built with liburing support";
98-
return queue_t::UNSUPPORTED;
99-
#endif
100102
if (!QueueFactory::isUringAvailable()) {
101103
NIXL_ERROR << "io_uring backend requested but not available at runtime";
102-
return queue_t::URING;
104+
return queue_t::UNSUPPORTED;
103105
}
104106
return queue_t::URING;
105107
}
106108
}
109+
110+
// Then check if linux_aio is explicitly requested
111+
if (custom_params->count("use_posix_aio") > 0) {
112+
const auto &value = custom_params->at("use_posix_aio");
113+
if (value == "true" || value == "1") {
114+
return queue_t::POSIXAIO;
115+
}
116+
}
107117
}
108-
return queue_t::AIO;
118+
119+
if (QueueFactory::isLinuxAioAvailable()) {
120+
return queue_t::AIO;
121+
}
122+
if (QueueFactory::isUringAvailable()) {
123+
return queue_t::URING;
124+
}
125+
return queue_t::POSIXAIO;
109126
}
110127
}
111128

@@ -147,11 +164,14 @@ nixl_status_t nixlPosixBackendReqH::initQueues() {
147164
try {
148165
switch (queue_type_) {
149166
case nixlPosixQueue::queue_t::AIO:
150-
queue = QueueFactory::createAioQueue(queue_depth_, operation);
167+
queue = QueueFactory::createLinuxAioQueue(queue_depth_, operation);
151168
break;
152169
case nixlPosixQueue::queue_t::URING:
153170
queue = QueueFactory::createUringQueue(queue_depth_, operation);
154171
break;
172+
case nixlPosixQueue::queue_t::POSIXAIO:
173+
queue = QueueFactory::createPosixAioQueue(queue_depth_, operation);
174+
break;
155175
default:
156176
NIXL_ERROR << absl::StrFormat("Invalid queue type: %s", to_string(queue_type_));
157177
return NIXL_ERR_INVALID_PARAM;
@@ -246,6 +266,9 @@ nixl_status_t nixlPosixEngine::prepXfer(const nixl_xfer_op_t &operation,
246266
case nixlPosixQueue::queue_t::URING:
247267
params["use_uring"] = "true";
248268
break;
269+
case nixlPosixQueue::queue_t::POSIXAIO:
270+
params["use_posix_aio"] = "true";
271+
break;
249272
default:
250273
NIXL_ERROR << absl::StrFormat("Invalid queue type: %s", to_string(queue_type_));
251274
return NIXL_ERR_INVALID_PARAM;

0 commit comments

Comments
 (0)