Skip to content

Commit 879bd60

Browse files
committed
Implement duplicate-fds-as-needed for forks
1 parent 8af6a89 commit 879bd60

File tree

3 files changed

+72
-14
lines changed

3 files changed

+72
-14
lines changed

lib/tinykvm/linux/fds.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,15 @@ namespace tinykvm
7777
throw std::runtime_error("Invalid file descriptor in FileDescriptors::add()");
7878
}
7979
if (is_socket) {
80-
m_fds[m_next_socket_fd] = {fd, is_writable};
80+
m_fds[m_next_socket_fd] = {fd, is_writable, false};
8181
return m_next_socket_fd++;
8282
} else {
83-
m_fds[m_next_file_fd] = {fd, is_writable};
83+
m_fds[m_next_file_fd] = {fd, is_writable, false};
8484
return m_next_file_fd++;
8585
}
8686
}
8787

88-
std::optional<FileDescriptors::Entry*> FileDescriptors::entry_for_vfd(int vfd)
88+
std::optional<const FileDescriptors::Entry*> FileDescriptors::entry_for_vfd(int vfd) const
8989
{
9090
auto it = m_fds.find(vfd);
9191
if (it != m_fds.end()) {
@@ -94,7 +94,7 @@ namespace tinykvm
9494
return std::nullopt;
9595
}
9696

97-
int FileDescriptors::translate(int vfd) const
97+
int FileDescriptors::translate(int vfd)
9898
{
9999
if (vfd >= 0 && vfd < 3) {
100100
return this->m_stdout_redirects.at(vfd);
@@ -103,6 +103,39 @@ namespace tinykvm
103103
if (it != m_fds.end()) {
104104
return it->second.real_fd;
105105
}
106+
107+
if (this->m_find_ro_master_vm_fd) {
108+
auto opt_entry = this->m_find_ro_master_vm_fd(vfd);
109+
if (opt_entry) {
110+
auto& entry = *opt_entry;
111+
const int new_fd = dup(entry->real_fd);
112+
if (new_fd < 0) {
113+
throw std::runtime_error("Failed to duplicate file descriptor");
114+
}
115+
if (this->m_verbose) {
116+
fprintf(stderr, "TinyKVM: %d -> %d\n", entry->real_fd, new_fd);
117+
}
118+
// We need to manage the *same* virtual file descriptor as the main
119+
// VM, so we need to set the real_fd of the new entry to the new fd.
120+
m_fds[vfd] = {new_fd, entry->is_writable, true};
121+
return new_fd;
122+
}
123+
}
124+
throw std::runtime_error("Invalid virtual file descriptor: " + std::to_string(vfd));
125+
}
126+
127+
int FileDescriptors::translate_unless_forked(int vfd)
128+
{
129+
if (vfd >= 0 && vfd < 3) {
130+
return this->m_stdout_redirects.at(vfd);
131+
}
132+
auto it = m_fds.find(vfd);
133+
if (it != m_fds.end()) {
134+
if (it->second.is_forked) {
135+
return -1;
136+
}
137+
return it->second.real_fd;
138+
}
106139
throw std::runtime_error("Invalid virtual file descriptor: " + std::to_string(vfd));
107140
}
108141

lib/tinykvm/linux/fds.hpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ namespace tinykvm
1919
{
2020
int real_fd = -1;
2121
bool is_writable = false;
22+
bool is_forked = false;
2223
};
2324
using open_readable_t = std::function<bool(std::string&)>;
2425
using open_writable_t = std::function<bool(std::string&)>;
26+
using find_readonly_master_vm_fd_t = std::function<std::optional<const Entry*>(int)>;
2527

2628
FileDescriptors(Machine& machine);
2729
~FileDescriptors();
@@ -37,13 +39,22 @@ namespace tinykvm
3739
/// @param vfd The virtual file descriptor to remove.
3840
void free(int vfd);
3941

40-
std::optional<Entry*> entry_for_vfd(int vfd);
42+
std::optional<const Entry*> entry_for_vfd(int vfd) const;
4143

4244
/// @brief Translate a virtual file descriptor to a real file descriptor,
4345
/// or throw an exception, failing execution.
4446
/// @param vfd The virtual file descriptor to translate.
4547
/// @return The real file descriptor.
46-
int translate(int vfd) const;
48+
int translate(int vfd);
49+
50+
/// @brief Check if a file descriptor is a socket or a file. If this fd was
51+
/// created by duplicating an fd from the main VM, this function instead
52+
/// returns -1, preventing a disallowed operation on the fd. Eg. it's allowed
53+
/// to close a duplicated fd, but not use epoll_ctl() on it.
54+
/// @param vfd The virtual file descriptor to check.
55+
/// @return The real file descriptor, or -1 if the fd was created by
56+
/// duplicating an fd from the main VM.
57+
int translate_unless_forked(int vfd);
4758

4859
bool is_socket_vfd(int vfd) const noexcept {
4960
return (vfd & SOCKET_BIT) != 0;
@@ -97,6 +108,14 @@ namespace tinykvm
97108
m_verbose = verbose;
98109
}
99110

111+
/// @brief Set the callback for finding the read-only master VM file descriptor.
112+
/// This is used to find the real file descriptor for a virtual file
113+
/// descriptor that is a read-only master VM file descriptor.
114+
/// @param callback The callback to set.
115+
void set_find_readonly_master_vm_fd_callback(find_readonly_master_vm_fd_t callback) noexcept {
116+
m_find_ro_master_vm_fd = callback;
117+
}
118+
100119
private:
101120
Machine& m_machine;
102121
std::map<int, Entry> m_fds;
@@ -108,5 +127,6 @@ namespace tinykvm
108127
bool m_verbose = false;
109128
open_readable_t m_open_readable;
110129
open_writable_t m_open_writable;
130+
find_readonly_master_vm_fd_t m_find_ro_master_vm_fd;
111131
};
112132
}

lib/tinykvm/linux/system_calls.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,17 +1174,22 @@ void Machine::setup_linux_system_calls()
11741174
SYS_epoll_ctl, [](vCPU& cpu)
11751175
{
11761176
auto& regs = cpu.registers();
1177-
const int epollfd = cpu.machine().fds().translate(regs.rdi);
1177+
const int epollfd = cpu.machine().fds().translate_unless_forked(regs.rdi);
11781178
const int op = regs.rsi;
11791179
const int fd = cpu.machine().fds().translate(regs.rdx);
11801180
const uint64_t g_event = regs.r10;
1181-
struct epoll_event event;
1182-
cpu.machine().copy_from_guest(&event, g_event, sizeof(event));
1183-
if (epoll_ctl(epollfd, op, fd, &event) < 0) {
1184-
regs.rax = -errno;
1185-
}
1186-
else {
1187-
regs.rax = 0;
1181+
if (epollfd > 0 && fd > 0)
1182+
{
1183+
struct epoll_event event;
1184+
cpu.machine().copy_from_guest(&event, g_event, sizeof(event));
1185+
if (epoll_ctl(epollfd, op, fd, &event) < 0) {
1186+
regs.rax = -errno;
1187+
}
1188+
else {
1189+
regs.rax = 0;
1190+
}
1191+
} else {
1192+
regs.rax = -EBADF;
11881193
}
11891194
SYSPRINT("epoll_ctl(epollfd=%d (%lld), op=%d, fd=%d (%lld), g_event=0x%lX) = %lld\n",
11901195
epollfd, regs.rdi, op, fd, regs.rdx, g_event, regs.rax);

0 commit comments

Comments
 (0)