Skip to content

Commit 85289c4

Browse files
committed
lay down framework for [v]fork and clone[3]
1 parent 6706bbe commit 85289c4

File tree

4 files changed

+169
-1
lines changed

4 files changed

+169
-1
lines changed

kernel/interfaces/system/memory/pmm.cppm

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ export namespace pmm
5151
memory info();
5252

5353
page *page_for(std::uintptr_t addr);
54-
inline page *page_for(auto ptr)
54+
55+
template<typename Type> requires (std::is_pointer_v<Type>)
56+
inline page *page_for(Type ptr)
5557
{
5658
return page_for(reinterpret_cast<std::uintptr_t>(ptr));
5759
}

kernel/interfaces/system/syscall/proc.cppm

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,11 @@ export namespace syscall::proc
4242

4343
int prlimit(pid_t pid, int resource, const struct rlimit __user *new_limit, struct rlimit __user *old_limit);
4444

45+
long clone(unsigned long flags, void __user *stack, int __user *parent_tid, int __user *child_tid, unsigned long tls);
46+
long clone3(struct clone_args __user *cl_args, std::size_t size);
47+
48+
pid_t fork();
49+
pid_t vfork();
50+
4551
[[noreturn]] void exit_group(int status);
4652
} // export namespace syscall::proc

kernel/source/arch/x86_64/system/syscall.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ namespace x86_64::syscall
5050
[32] = { "dup", vfs::dup },
5151
[33] = { "dup2", vfs::dup2 },
5252
[39] = { "getpid", proc::getpid },
53+
[56] = { "clone", proc::clone },
54+
[57] = { "fork", proc::fork },
55+
[58] = { "vfork", proc::vfork },
5356
[63] = { "uname", misc::uname },
5457
[72] = { "fcntl", vfs::fcntl },
5558
[79] = { "getcwd", vfs::getcwd, [](std::uintptr_t val) { return val == 0; } },
@@ -86,6 +89,7 @@ namespace x86_64::syscall
8689
[302] = { "prlimit", proc::prlimit },
8790
[318] = { "getrandom", misc::getrandom },
8891
[334] = { "rseq", proc::rseq },
92+
[435] = { "clone3", proc::clone3 }
8993
};
9094

9195
cpu_local<bool> in_syscall;

kernel/source/system/syscall/proc.cpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,162 @@ namespace syscall::proc
355355
return (errno = ENOSYS, -1);
356356
}
357357

358+
namespace
359+
{
360+
struct kclone_args
361+
{
362+
std::uint64_t flags;
363+
int __user *pidfd;
364+
int __user *child_tid;
365+
int __user *parent_tid;
366+
int exit_signal;
367+
std::uint64_t stack;
368+
std::uint64_t stack_size;
369+
std::uint64_t tls;
370+
pid_t *set_tid;
371+
std::size_t set_tid_size;
372+
int cgroup;
373+
};
374+
375+
enum clone_flags : std::uint64_t
376+
{
377+
csignal = 0x000000FF, // signal mask to be sent at exit
378+
clone_vm = 0x00000100, // set if VM shared between processes
379+
clone_fs = 0x00000200, // set if fs info shared between processes
380+
clone_files = 0x00000400, // set if open files shared between processes
381+
clone_sighand = 0x00000800, // set if signal handlers and blocked signals shared
382+
clone_pidfd = 0x00001000, // set if a pidfd should be placed in parent
383+
clone_ptrace = 0x00002000, // set if we want to let tracing continue on the child too
384+
clone_vfork = 0x00004000, // set if the parent wants the child to wake it up on mm_release
385+
clone_parent = 0x00008000, // set if we want to have the same parent as the cloner
386+
clone_thread = 0x00010000, // same thread group?
387+
clone_newns = 0x00020000, // new mount namespace group
388+
clone_sysvsem = 0x00040000, // share system V SEM_UNDO semantics
389+
clone_settls = 0x00080000, // create a new TLS for the child
390+
clone_parent_settid = 0x00100000, // set the TID in the parent
391+
clone_child_cleartid = 0x00200000, // clear the TID in the child
392+
clone_detached = 0x00400000, // unused, ignored
393+
clone_untraced = 0x00800000, // set if the tracing process can't force CLONE_PTRACE on this clone
394+
clone_child_settid = 0x01000000, // set the TID in the child
395+
clone_newcgroup = 0x02000000, // new cgroup namespace
396+
clone_newuts = 0x04000000, // new utsname namespace
397+
clone_newipc = 0x08000000, // new ipc namespace
398+
clone_newuser = 0x10000000, // new user namespace
399+
clone_newpid = 0x20000000, // new pid namespace
400+
clone_newnet = 0x40000000, // new network namespace
401+
clone_io = 0x80000000, // clone io context
402+
clone_clear_sighand = 0x100000000ull, // clear any signal handler and reset to SIG_DFL.
403+
clone_into_cgroup = 0x200000000ull, // clone into a specific cgroup given the right permissions.
404+
clone_newtime = 0x00000080 // new time namespace
405+
406+
};
407+
408+
pid_t kclone(const kclone_args &args)
409+
{
410+
// TODO
411+
lib::unused(args);
412+
return (errno = ENOSYS, -1);
413+
}
414+
} // namespace
415+
416+
long clone(unsigned long flags, void __user *stack, int __user *parent_tid, int __user *child_tid, unsigned long tls)
417+
{
418+
return kclone({
419+
.flags = (flags & 0xFFFFFFFF) & ~csignal,
420+
.pidfd = parent_tid,
421+
.child_tid = child_tid,
422+
.parent_tid = parent_tid,
423+
.exit_signal = static_cast<int>((flags & 0xFFFFFFFF) & csignal),
424+
.stack = reinterpret_cast<std::uintptr_t>(stack),
425+
.stack_size = 0,
426+
.tls = tls,
427+
.set_tid = nullptr,
428+
.set_tid_size = 0,
429+
.cgroup = -1,
430+
});
431+
}
432+
433+
struct clone_args
434+
{
435+
std::uint64_t flags;
436+
std::uint64_t pidfd;
437+
std::uint64_t child_tid;
438+
std::uint64_t parent_tid;
439+
std::uint64_t exit_signal;
440+
std::uint64_t stack;
441+
std::uint64_t stack_size;
442+
std::uint64_t tls;
443+
std::uint64_t set_tid;
444+
std::uint64_t set_tid_size;
445+
std::uint64_t cgroup;
446+
};
447+
448+
long clone3(clone_args __user *cl_args, std::size_t size)
449+
{
450+
pid_t set_tid[32] { };
451+
452+
clone_args uargs { };
453+
if (size < 64 || size > sizeof(clone_args))
454+
return (errno = EINVAL, -1);
455+
456+
if (!lib::copy_from_user(&uargs, cl_args, size))
457+
return (errno = EFAULT, -1);
458+
459+
if (uargs.set_tid_size > 32)
460+
return (errno = EINVAL, -1);
461+
462+
if (!uargs.set_tid && uargs.set_tid_size > 0)
463+
return (errno = EINVAL, -1);
464+
465+
if (uargs.set_tid && uargs.set_tid_size == 0)
466+
return (errno = EINVAL, -1);
467+
468+
if ((uargs.exit_signal & ~csignal) || uargs.exit_signal > 64 /* _NSIG */)
469+
return -EINVAL;
470+
471+
if ((uargs.flags & clone_into_cgroup) && (uargs.cgroup > std::numeric_limits<int>::max() || size < sizeof(clone_args)))
472+
return -EINVAL;
473+
474+
kclone_args kargs
475+
{
476+
.flags = uargs.flags,
477+
.pidfd = reinterpret_cast<int __user *>(uargs.pidfd),
478+
.child_tid = reinterpret_cast<int __user *>(uargs.child_tid),
479+
.parent_tid = reinterpret_cast<int __user *>(uargs.parent_tid),
480+
.exit_signal = static_cast<int>(uargs.exit_signal),
481+
.stack = uargs.stack,
482+
.stack_size = uargs.stack_size,
483+
.tls = uargs.tls,
484+
.set_tid = set_tid,
485+
.set_tid_size = uargs.set_tid_size,
486+
.cgroup = static_cast<int>(uargs.cgroup),
487+
};
488+
489+
const auto uset_tid = reinterpret_cast<int __user *>(uargs.set_tid);
490+
const auto uset_tid_size_bytes = uargs.set_tid_size * sizeof(pid_t);
491+
if (uargs.set_tid && !lib::copy_from_user(set_tid, uset_tid, uset_tid_size_bytes))
492+
return (errno = EFAULT, -1);
493+
494+
return kclone(kargs);
495+
}
496+
497+
pid_t fork()
498+
{
499+
kclone_args args { };
500+
// TODO
501+
// args.exit_signal = sigchld;
502+
return kclone(args);
503+
}
504+
505+
pid_t vfork()
506+
{
507+
kclone_args args { };
508+
args.flags = clone_vfork | clone_vm;
509+
// TODO
510+
// args.exit_signal = sigchld;
511+
return kclone(args);
512+
}
513+
358514
[[noreturn]] void exit_group(int status)
359515
{
360516
// TODO

0 commit comments

Comments
 (0)