Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/src/syscall/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,11 @@ pub fn handle_syscall(uctx: &mut UserContext) {
uctx.arg3(),
uctx.arg4(),
),
Sysno::clone3 => sys_clone3(
uctx,
uctx.arg0() as _, // args_ptr
uctx.arg1() as _, // args_size
),
#[cfg(target_arch = "x86_64")]
Sysno::fork => sys_fork(uctx),
Sysno::exit => sys_exit(uctx.arg0() as _),
Expand Down
256 changes: 176 additions & 80 deletions api/src/syscall/task/clone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,114 +21,187 @@ use crate::{
};

bitflags! {
/// Options for use with [`sys_clone`].
/// Options for use with [`sys_clone`] and [`sys_clone3`].
#[derive(Debug, Clone, Copy, Default)]
struct CloneFlags: u32 {
/// The calling process and the child process run in the same
/// memory space.
const VM = CLONE_VM;
/// The caller and the child process share the same filesystem
/// information.
const FS = CLONE_FS;
/// The calling process and the child process share the same file
/// descriptor table.
const FILES = CLONE_FILES;
/// The calling process and the child process share the same table
/// of signal handlers.
const SIGHAND = CLONE_SIGHAND;
pub struct CloneFlags: u64 {
/// The calling process and the child process run in the same memory space.
const VM = CLONE_VM as u64;
/// The caller and the child process share the same filesystem information.
const FS = CLONE_FS as u64;
/// The calling process and the child process share the same file descriptor table.
const FILES = CLONE_FILES as u64;
/// The calling process and the child process share the same table of signal handlers.
const SIGHAND = CLONE_SIGHAND as u64;
/// Sets pidfd to the child process's PID file descriptor.
const PIDFD = CLONE_PIDFD;
/// If the calling process is being traced, then trace the child
/// also.
const PTRACE = CLONE_PTRACE;
/// The execution of the calling process is suspended until the
/// child releases its virtual memory resources via a call to
/// execve(2) or _exit(2) (as with vfork(2)).
const VFORK = CLONE_VFORK;
/// The parent of the new child (as returned by getppid(2))
/// will be the same as that of the calling process.
const PARENT = CLONE_PARENT;
/// The child is placed in the same thread group as the calling
/// process.
const THREAD = CLONE_THREAD;
const PIDFD = CLONE_PIDFD as u64;
/// If the calling process is being traced, then trace the child also.
const PTRACE = CLONE_PTRACE as u64;
/// The execution of the calling process is suspended until the child releases
/// its virtual memory resources via a call to execve(2) or _exit(2) (as with vfork(2)).
const VFORK = CLONE_VFORK as u64;
/// The parent of the new child (as returned by getppid(2)) will be the same
/// as that of the calling process.
const PARENT = CLONE_PARENT as u64;
/// The child is placed in the same thread group as the calling process.
const THREAD = CLONE_THREAD as u64;
/// The cloned child is started in a new mount namespace.
const NEWNS = CLONE_NEWNS;
/// The child and the calling process share a single list of System
/// V semaphore adjustment values
const SYSVSEM = CLONE_SYSVSEM;
const NEWNS = CLONE_NEWNS as u64;
/// The child and the calling process share a single list of System V
/// semaphore adjustment values.
const SYSVSEM = CLONE_SYSVSEM as u64;
/// The TLS (Thread Local Storage) descriptor is set to tls.
const SETTLS = CLONE_SETTLS;
const SETTLS = CLONE_SETTLS as u64;
/// Store the child thread ID in the parent's memory.
const PARENT_SETTID = CLONE_PARENT_SETTID;
/// Clear (zero) the child thread ID in child memory when the child
/// exits, and do a wakeup on the futex at that address.
const CHILD_CLEARTID = CLONE_CHILD_CLEARTID;
/// A tracing process cannot force `CLONE_PTRACE` on this child
/// process.
const UNTRACED = CLONE_UNTRACED;
const PARENT_SETTID = CLONE_PARENT_SETTID as u64;
/// Clear (zero) the child thread ID in child memory when the child exits,
/// and do a wakeup on the futex at that address.
const CHILD_CLEARTID = CLONE_CHILD_CLEARTID as u64;
/// A tracing process cannot force `CLONE_PTRACE` on this child process.
const UNTRACED = CLONE_UNTRACED as u64;
/// Store the child thread ID in the child's memory.
const CHILD_SETTID = CLONE_CHILD_SETTID;
const CHILD_SETTID = CLONE_CHILD_SETTID as u64;
/// Create the process in a new cgroup namespace.
const NEWCGROUP = CLONE_NEWCGROUP;
const NEWCGROUP = CLONE_NEWCGROUP as u64;
/// Create the process in a new UTS namespace.
const NEWUTS = CLONE_NEWUTS;
const NEWUTS = CLONE_NEWUTS as u64;
/// Create the process in a new IPC namespace.
const NEWIPC = CLONE_NEWIPC;
const NEWIPC = CLONE_NEWIPC as u64;
/// Create the process in a new user namespace.
const NEWUSER = CLONE_NEWUSER;
const NEWUSER = CLONE_NEWUSER as u64;
/// Create the process in a new PID namespace.
const NEWPID = CLONE_NEWPID;
const NEWPID = CLONE_NEWPID as u64;
/// Create the process in a new network namespace.
const NEWNET = CLONE_NEWNET;
const NEWNET = CLONE_NEWNET as u64;
/// The new process shares an I/O context with the calling process.
const IO = CLONE_IO;
const IO = CLONE_IO as u64;
/// Clear signal handlers on clone (since Linux 5.5).
const CLEAR_SIGHAND = 0x100000000u64;
/// Clone into specific cgroup (since Linux 5.7).
const INTO_CGROUP = 0x200000000u64;
}
}

pub fn sys_clone(
uctx: &UserContext,
flags: u32,
stack: usize,
parent_tid: usize,
#[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize,
tls: usize,
#[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize,
) -> AxResult<isize> {
const FLAG_MASK: u32 = 0xff;
let exit_signal = flags & FLAG_MASK;
let mut flags = CloneFlags::from_bits_truncate(flags & !FLAG_MASK);
if flags.contains(CloneFlags::VFORK) {
debug!("sys_clone: CLONE_VFORK slow path");
flags.remove(CloneFlags::VM);
/// Unified arguments for clone/clone3/fork/vfork.
///
/// This structure is used internally to homogenize parameters from different
/// clone syscall variants (clone, clone3, fork, vfork).
#[derive(Debug, Clone, Copy, Default)]
pub struct CloneArgs {
pub flags: CloneFlags,
pub exit_signal: u64,
pub stack: usize,
pub tls: usize,
pub parent_tid: usize,
pub child_tid: usize,
pub pidfd: usize,
}

impl CloneArgs {
/// Create CloneArgs from clone() syscall parameters.
///
/// Note: In clone(), the parent_tid parameter serves dual purpose:
/// - If CLONE_PIDFD: receives the pidfd
/// - If CLONE_PARENT_SETTID: receives the child TID
/// These two flags are mutually exclusive.
pub fn from_clone(
raw_flags: u32,
stack: usize,
parent_tid: usize,
child_tid: usize,
tls: usize,
) -> AxResult<Self> {
const FLAG_MASK: u32 = 0xff;
let flags = CloneFlags::from_bits_truncate((raw_flags & !FLAG_MASK) as u64);
let exit_signal = (raw_flags & FLAG_MASK) as u64;

if flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) {
return Err(AxError::InvalidInput);
}

Ok(Self {
flags,
exit_signal,
stack,
tls,
parent_tid,
child_tid,
pidfd: 0,
})
}
}

debug!(
"sys_clone <= flags: {flags:?}, exit_signal: {exit_signal}, stack: {stack:#x}, ptid: \
{parent_tid:#x}, ctid: {child_tid:#x}, tls: {tls:#x}"
);
fn validate_common(args: &CloneArgs) -> AxResult<()> {
let flags = args.flags;
let exit_signal = args.exit_signal;

if exit_signal != 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) {
if exit_signal > 0 && flags.contains(CloneFlags::THREAD | CloneFlags::PARENT) {
return Err(AxError::InvalidInput);
}
if flags.contains(CloneFlags::THREAD) && !flags.contains(CloneFlags::VM | CloneFlags::SIGHAND) {
return Err(AxError::InvalidInput);
}
if flags.contains(CloneFlags::PIDFD | CloneFlags::PARENT_SETTID) {
if flags.contains(CloneFlags::SIGHAND) && !flags.contains(CloneFlags::VM) {
return Err(AxError::InvalidInput);
}
if flags.contains(CloneFlags::VFORK) && flags.contains(CloneFlags::THREAD) {
return Err(AxError::InvalidInput);
}
if exit_signal >= 64 {
return Err(AxError::InvalidInput);
}
let exit_signal = Signo::from_repr(exit_signal as u8);

let namespace_flags = CloneFlags::NEWNS
| CloneFlags::NEWIPC
| CloneFlags::NEWNET
| CloneFlags::NEWPID
| CloneFlags::NEWUSER
| CloneFlags::NEWUTS
| CloneFlags::NEWCGROUP;

if flags.intersects(namespace_flags) {
warn!(
"sys_clone/sys_clone3: namespace flags detected ({:?}), stub support only",
flags & namespace_flags
);
}

Ok(())
}

/// Core implementation of clone/clone3/fork/vfork.
pub fn do_clone(uctx: &UserContext, args: CloneArgs) -> AxResult<isize> {
validate_common(&args)?;

let mut flags = args.flags;
let exit_signal = args.exit_signal;

if flags.contains(CloneFlags::VFORK) {
debug!("do_clone: CLONE_VFORK slow path");
flags.remove(CloneFlags::VM);
}

debug!(
"do_clone <= flags: {:?}, exit_signal: {}, stack: {:#x}, tls: {:#x}",
flags, exit_signal, args.stack, args.tls
);

let exit_signal = if exit_signal > 0 {
Signo::from_repr(exit_signal as u8)
} else {
None
};

let mut new_uctx = *uctx;
if stack != 0 {
new_uctx.set_sp(stack);
if args.stack != 0 {
new_uctx.set_sp(args.stack);
}
if flags.contains(CloneFlags::SETTLS) {
new_uctx.set_tls(tls);
new_uctx.set_tls(args.tls);
}
new_uctx.set_retval(0);

let set_child_tid = if flags.contains(CloneFlags::CHILD_SETTID) {
child_tid
args.child_tid
} else {
0
};
Expand All @@ -139,8 +212,8 @@ pub fn sys_clone(
let mut new_task = new_user_task(&curr.name(), new_uctx, set_child_tid);

let tid = new_task.id().as_u64() as Pid;
if flags.contains(CloneFlags::PARENT_SETTID) {
(parent_tid as *mut Pid).vm_write(tid).ok();
if flags.contains(CloneFlags::PARENT_SETTID) && args.parent_tid != 0 {
(args.parent_tid as *mut Pid).vm_write(tid)?;
}

let new_proc_data = if flags.contains(CloneFlags::THREAD) {
Expand Down Expand Up @@ -170,9 +243,12 @@ pub fn sys_clone(

let signal_actions = if flags.contains(CloneFlags::SIGHAND) {
old_proc_data.signal.actions.clone()
} else if flags.contains(CloneFlags::CLEAR_SIGHAND) {
Arc::new(SpinNoIrq::new(Default::default()))
} else {
Arc::new(SpinNoIrq::new(old_proc_data.signal.actions.lock().clone()))
};

let proc_data = ProcessData::new(
proc,
old_proc_data.exe_path.read().clone(),
Expand All @@ -182,7 +258,6 @@ pub fn sys_clone(
exit_signal,
);
proc_data.set_umask(old_proc_data.umask());
// Inherit heap pointers from parent to ensure child's heap state is consistent after fork
proc_data.set_heap_top(old_proc_data.get_heap_top());

{
Expand Down Expand Up @@ -213,12 +288,20 @@ pub fn sys_clone(

if flags.contains(CloneFlags::PIDFD) {
let pidfd = PidFd::new(&new_proc_data);
(parent_tid as *mut i32).vm_write(pidfd.add_to_fd_table(true)?)?;
let fd = pidfd.add_to_fd_table(true)?;
let target = if args.pidfd != 0 {
args.pidfd
} else {
args.parent_tid
};
if target != 0 {
(target as *mut i32).vm_write(fd)?;
}
}

let thr = Thread::new(tid, new_proc_data);
if flags.contains(CloneFlags::CHILD_CLEARTID) {
thr.set_clear_child_tid(child_tid);
if flags.contains(CloneFlags::CHILD_CLEARTID) && args.child_tid != 0 {
thr.set_clear_child_tid(args.child_tid);
}
*new_task.task_ext_mut() = Some(unsafe { AxTaskExt::from_impl(thr) });

Expand All @@ -228,6 +311,19 @@ pub fn sys_clone(
Ok(tid as _)
}

pub fn sys_clone(
uctx: &UserContext,
flags: u32,
stack: usize,
parent_tid: usize,
#[cfg(any(target_arch = "x86_64", target_arch = "loongarch64"))] child_tid: usize,
tls: usize,
#[cfg(not(any(target_arch = "x86_64", target_arch = "loongarch64")))] child_tid: usize,
) -> AxResult<isize> {
let args = CloneArgs::from_clone(flags, stack, parent_tid, child_tid, tls)?;
do_clone(uctx, args)
}

#[cfg(target_arch = "x86_64")]
pub fn sys_fork(uctx: &UserContext) -> AxResult<isize> {
sys_clone(uctx, SIGCHLD, 0, 0, 0, 0)
Expand Down
Loading
Loading