Skip to content

Commit 93df083

Browse files
feat: add sys_mincore (#98)
Also adjust a little bit on `sys_prctl` `PR_SET_MM` handling Signed-off-by: Weikang Guo <guoweikang@kylinos.cn> Co-authored-by: 朝倉水希 <asakuramizu111@gmail.com>
1 parent f82940e commit 93df083

File tree

4 files changed

+133
-7
lines changed

4 files changed

+133
-7
lines changed

api/src/syscall/mm/mincore.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// Copyright (C) 2025 KylinSoft Co., Ltd. <https://www.kylinos.cn/>
3+
// Copyright (C) 2025 Azure-stars <Azure_stars@126.com>
4+
// Copyright (C) 2025 Yuekai Jia <equation618@gmail.com>
5+
// See LICENSES for license details.
6+
//
7+
// This file has been modified by KylinSoft on 2025.
8+
9+
use alloc::vec;
10+
11+
use axerrno::{AxError, AxResult};
12+
use axhal::paging::MappingFlags;
13+
use axtask::current;
14+
use memory_addr::{MemoryAddr, PAGE_SIZE_4K, VirtAddr};
15+
use starry_core::task::AsThread;
16+
use starry_vm::vm_write_slice;
17+
18+
/// Check whether pages are resident in memory.
19+
///
20+
/// The mincore() system call determines whether pages of the calling process's
21+
/// virtual memory are resident in RAM.
22+
///
23+
/// # Arguments
24+
/// * `addr` - Starting address (must be a multiple of the page size)
25+
/// * `length` - Length of the region in bytes (effectively rounded up to next page boundary)
26+
/// * `vec` - Output array containing at least (length+PAGE_SIZE-1)/PAGE_SIZE bytes.
27+
///
28+
/// # Return Value
29+
/// * `Ok(0)` on success
30+
/// * `Err(EAGAIN)` - Kernel is temporarily out of resources (not implemented in StarryOS)
31+
/// * `Err(EFAULT)` - vec points to an invalid address (handled by vm_write_slice)
32+
/// * `Err(EINVAL)` - addr is not a multiple of the page size
33+
/// * `Err(ENOMEM)` - length is greater than (TASK_SIZE - addr), or negative length, or `addr` to `addr`+`length` contained unmapped memory
34+
///
35+
/// # Notes from Linux man page
36+
/// - The least significant bit (bit 0) is set if page is resident in memory
37+
/// - Bits 1-7 are reserved and currently cleared
38+
/// - Information is only a snapshot; pages can be swapped at any moment
39+
///
40+
/// # Linux Errors
41+
/// - EAGAIN: kernel temporarily out of resources
42+
/// - EFAULT: vec points to invalid address
43+
/// - EINVAL: addr not page-aligned
44+
/// - ENOMEM: length > (TASK_SIZE - addr), negative length, or unmapped memory
45+
pub fn sys_mincore(addr: usize, length: usize, vec: *mut u8) -> AxResult<isize> {
46+
let start_addr = VirtAddr::from(addr);
47+
48+
// EINVAL: addr must be a multiple of the page size
49+
if !start_addr.is_aligned(PAGE_SIZE_4K) {
50+
return Err(AxError::InvalidInput);
51+
}
52+
53+
// EFAULT: vec must not be null (basic check, vm_write_slice will do full validation)
54+
if vec.is_null() {
55+
return Err(AxError::BadAddress);
56+
}
57+
58+
debug!("sys_mincore <= addr: {addr:#x}, length: {length:#x}, vec: {vec:?}");
59+
60+
// Special case: length=0
61+
// According to Linux kernel (mm/mincore.c), length=0 returns success
62+
// WITHOUT validating that addr is mapped. This is intentional behavior
63+
// to match POSIX semantics where a zero-length operation is a no-op.
64+
if length == 0 {
65+
return Ok(0);
66+
}
67+
68+
// Calculate number of pages to check
69+
let page_count = length.div_ceil(PAGE_SIZE_4K);
70+
71+
// Get current address space
72+
let curr = current();
73+
let aspace = curr.as_thread().proc_data.aspace.lock();
74+
75+
let mut result = vec![0u8; page_count];
76+
let mut i = 0;
77+
78+
while i < page_count {
79+
let addr = start_addr + i * PAGE_SIZE_4K;
80+
81+
// ENOMEM: Check if this page is within a valid VMA
82+
let area = aspace.find_area(addr).ok_or(AxError::NoMemory)?;
83+
84+
// Verify we have at least USER access permission
85+
if !area.flags().contains(MappingFlags::USER) {
86+
return Err(AxError::NoMemory);
87+
}
88+
89+
// Query page table with batch awareness
90+
let (is_resident, size) = match aspace.page_table().query(addr) {
91+
Ok((_, _, size)) => {
92+
// Physical page exists and is resident
93+
// page_size tells us how many contiguous pages have the same status
94+
(true, size as _)
95+
}
96+
Err(_) => {
97+
// Page is mapped but not populated (lazy allocation)
98+
// We need to determine how many contiguous pages are also not populated
99+
// For safety, we check the next page or use PAGE_SIZE_4K as minimum step
100+
(false, PAGE_SIZE_4K)
101+
}
102+
};
103+
let n = size / PAGE_SIZE_4K;
104+
105+
if is_resident {
106+
let end = (i + n).min(page_count);
107+
result[i..end].fill(1);
108+
}
109+
110+
i += n;
111+
}
112+
113+
// EFAULT: Write result to user space
114+
// vm_write_slice will return EFAULT if vec is invalid
115+
vm_write_slice(vec, result.as_slice())?;
116+
117+
Ok(0)
118+
}

api/src/syscall/mm/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
mod brk;
2+
mod mincore;
23
mod mmap;
34

4-
pub use self::{brk::*, mmap::*};
5+
pub use self::{brk::*, mincore::*, mmap::*};

api/src/syscall/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ pub fn handle_syscall(uctx: &mut UserContext) {
347347
),
348348
Sysno::munmap => sys_munmap(uctx.arg0(), uctx.arg1() as _),
349349
Sysno::mprotect => sys_mprotect(uctx.arg0(), uctx.arg1() as _, uctx.arg2() as _),
350+
Sysno::mincore => sys_mincore(uctx.arg0() as _, uctx.arg1() as _, uctx.arg2() as _),
350351
Sysno::mremap => sys_mremap(
351352
uctx.arg0(),
352353
uctx.arg1() as _,

api/src/syscall/task/ctl.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ pub fn sys_get_mempolicy(
7474
Ok(0)
7575
}
7676

77+
/// prctl() is called with a first argument describing what to do, and further
78+
/// arguments with a significance depending on the first one.
79+
/// The first argument can be:
80+
/// - PR_SET_NAME: set the name of the calling thread, using the value pointed to by `arg2`
81+
/// - PR_GET_NAME: get the name of the calling
82+
/// - PR_SET_SECCOMP: enable seccomp mode, with the mode specified in `arg2`
83+
/// - PR_MCE_KILL: set the machine check exception policy
84+
/// - PR_SET_MM options: set various memory management options (start/end code/data/brk/stack)
7785
pub fn sys_prctl(
7886
option: u32,
7987
arg2: usize,
@@ -99,12 +107,10 @@ pub fn sys_prctl(
99107
}
100108
PR_SET_SECCOMP => {}
101109
PR_MCE_KILL => {}
102-
PR_SET_MM_START_CODE
103-
| PR_SET_MM_END_CODE
104-
| PR_SET_MM_START_DATA
105-
| PR_SET_MM_END_DATA
106-
| PR_SET_MM_START_BRK
107-
| PR_SET_MM_START_STACK => {}
110+
PR_SET_MM => {
111+
// not implemented; but avoid annoying warnings
112+
return Err(AxError::InvalidInput);
113+
}
108114
_ => {
109115
warn!("sys_prctl: unsupported option {option}");
110116
return Err(AxError::InvalidInput);

0 commit comments

Comments
 (0)