From afa13bae991a9f8b4821bcffc772f333fa0417c4 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 19:24:48 -0400 Subject: [PATCH 01/24] Honor GRATE_MEMORY_FLAG in mmap_syscall and brk_syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `mmap_syscall` used to do `useraddr = addr as u32` unconditionally, truncating the high 32 bits of any host pointer the grate had supplied via GRATE_MEMORY_FLAG. The truncated low bits then got re-translated through the calling cage's vmmap, landing the mapping at an arbitrary cage address — typically the end of the empty vmmap gap. The cage's subsequent memcpy clobbered whatever was already in that region (stack, locals, fds) and downstream syscalls failed with EBADF or crashes that didn't trace back to the mmap itself. `brk_syscall` had the inverse gap: it always interprets the address as a cage uaddr, so a grate that wanted to set the program break via a host sysaddr had no way to do it. Add two helpers in `typemap::datatype_conversion`: - `sc_convert_addr_to_sys(arg, arg_cageid, cageid)`: if `arg_cageid & GRATE_MEMORY_FLAG` is set, the grate has already computed a host system address — use it directly. Otherwise the arg is a uaddr in the calling cage's memory; translate via that cage's vmmap base. - `sc_convert_sys_to_user(sysaddr, cageid)`: inverse, for return-value bookkeeping and the brk case. Wire both into mmap_syscall (both MAP_FIXED and find_map_space paths) and brk_syscall (sysaddr → uaddr when the flag is set). Add `GRATE_MEMORY_FLAG` and `LIND_ARG_CAGEID_MASK` constants in sysdefs/lind_platform_const.rs to mirror glibc's addr_translation.h. Scope: only `mmap`/`mmap64` (uaddr convention per glibc `mmap.c`) and `brk` (raw uaddr per glibc `brk.c`) need this. `munmap`, `mprotect`, `shmat`, `shmdt` already receive pre-translated sysaddrs from their glibc wrappers (via `TRANSLATE_GUEST_POINTER_TO_HOST`), so the runtime already uses them correctly regardless of the flag — leaving them untouched avoids double-translating the cage path. Adds a grate test in `tests/grate-tests/simple-tests/` (mmap-flag.c + mmap-flag_grate.c): the grate registers an mmap handler that forwards to RawPOSIX with `GRATE_MEMORY_FLAG` set on the addr's cageid; the cage mmap-write-readback-munmap's a page. Pre-patch this trips the truncation bug. --- src/rawposix/src/fs_calls.rs | 98 +++++++++++------ .../src/constants/lind_platform_const.rs | 7 ++ src/typemap/src/datatype_conversion.rs | 66 ++++++++++- tests/grate-tests/simple-tests/mmap-flag.c | 45 ++++++++ .../simple-tests/mmap-flag_grate.c | 104 ++++++++++++++++++ 5 files changed, 286 insertions(+), 34 deletions(-) create mode 100644 tests/grate-tests/simple-tests/mmap-flag.c create mode 100644 tests/grate-tests/simple-tests/mmap-flag_grate.c diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 1ab3f3c291..7b10502167 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -15,7 +15,9 @@ use sysdefs::constants::fs_const::{ STDIN_FILENO, STDOUT_FILENO, TIOCGWINSZ, }; -use sysdefs::constants::lind_platform_const::{FDKIND_KERNEL, MAXFD, UNUSED_ARG, UNUSED_ID}; +use sysdefs::constants::lind_platform_const::{ + FDKIND_KERNEL, GRATE_MEMORY_FLAG, MAXFD, UNUSED_ARG, UNUSED_ID, +}; use sysdefs::constants::sys_const::{DEFAULT_GID, DEFAULT_UID, SIGPIPE}; use sysdefs::logging::lind_debug_panic; use typemap::cage_helpers::*; @@ -829,13 +831,6 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { - let addr = { - if addr_arg == 0 { - 0 as *mut u8 - } else { - sc_convert_to_u8_mut(addr_arg, addr_cageid, cageid) - } - }; let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -871,9 +866,11 @@ pub extern "C" fn mmap_syscall( lind_debug_panic("mmap protection flag PROT_EXEC is not allowed in Lind"); } - // check if the provided address is multiple of pages - let rounded_addr = round_up_page(addr as u64); - if rounded_addr != addr as u64 { + // Page-align check on the low bits of addr_arg. Page alignment is a + // numeric property of the low bits regardless of which cage's base + // address gets added, since base_address is itself page-aligned. + let rounded_addr = round_up_page(addr_arg); + if rounded_addr != addr_arg { return syscall_error(Errno::EINVAL, "mmap", "address it not aligned"); } @@ -889,31 +886,61 @@ pub extern "C" fn mmap_syscall( // round up length to be multiple of pages let rounded_length = round_up_page(len as u64); - let mut useraddr = addr as u32; - // if MAP_FIXED is not set, then we need to find an address for the user + // Resolve (useraddr in calling cage, sysaddr host pointer). Honors + // GRATE_MEMORY_FLAG on addr_cageid so a grate can supply an address + // it picked against its own (or any other cage's) vmmap. + let grate_supplied = (addr_cageid & GRATE_MEMORY_FLAG) != 0; + let mut useraddr: u32; + let sysaddr: usize; + if flags & MAP_FIXED as i32 == 0 { - let vmmap = cage.vmmap.write(); - let result; + // No fixed address — runtime picks via the calling cage's vmmap. + // Use addr_arg as a hint (translated via the flag-aware helper if + // grate-supplied). + let hint_useraddr = if grate_supplied && addr_arg != 0 { + match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) + .and_then(|s| sc_convert_sys_to_user(s, cageid)) + { + Ok(u) => u, + Err(_) => 0, + } + } else { + addr_arg as u32 + }; - // pick an address of appropriate size, anywhere - if useraddr == 0 { - result = vmmap.find_map_space(rounded_length as u32 >> PAGESHIFT, 1); + let vmmap = cage.vmmap.write(); + let result = if hint_useraddr == 0 { + vmmap.find_map_space(rounded_length as u32 >> PAGESHIFT, 1) } else { - // use address user provided as hint to find address - result = vmmap.find_map_space_with_hint( + vmmap.find_map_space_with_hint( rounded_length as u32 >> PAGESHIFT, 1, - addr as u32 >> PAGESHIFT, - ); - } + hint_useraddr >> PAGESHIFT, + ) + }; - // did not find desired memory region if result.is_none() { return syscall_error(Errno::ENOMEM, "mmap", "no memory"); } - let space = result.unwrap(); - useraddr = (space.start() << PAGESHIFT) as u32; + useraddr = (result.unwrap().start() << PAGESHIFT) as u32; + sysaddr = vmmap.user_to_sys(useraddr); + drop(vmmap); + } else { + // Caller specified an exact address. Use the flag-aware helper so + // a grate-supplied addr is resolved against the grate's base, and + // a cage-supplied addr against the calling cage's base. + sysaddr = match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) { + Ok(s) => s, + Err(e) => return syscall_error(e, "mmap", "invalid addr"), + }; + // Derive the calling cage's uaddr for the return value + vmmap + // bookkeeping. Errors here mean the sysaddr is outside the cage's + // linear memory range — invalid for MAP_FIXED in this cage. + useraddr = match sc_convert_sys_to_user(sysaddr, cageid) { + Ok(u) => u, + Err(e) => return syscall_error(e, "mmap", "addr outside cage"), + }; } flags |= MAP_FIXED as i32; @@ -923,12 +950,6 @@ pub extern "C" fn mmap_syscall( return syscall_error(Errno::EINVAL, "mmap", "invalid flags"); } - let vmmap = cage.vmmap.read(); - - let sysaddr = vmmap.user_to_sys(useraddr); - - drop(vmmap); - if rounded_length > 0 { if flags & MAP_ANONYMOUS as i32 > 0 { fildes = -1; @@ -1188,7 +1209,18 @@ pub extern "C" fn brk_syscall( arg6: u64, arg6_cageid: u64, ) -> i32 { - let brk = sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid); + // Cage-side glibc brk.c passes a raw uaddr; the runtime page-aligns it + // and compares against vmmap.heap_start in user space. A grate calling + // with GRATE_MEMORY_FLAG passes a host sysaddr instead — translate it + // back into the calling cage's user-address space before proceeding. + let brk = if (brk_cageid & GRATE_MEMORY_FLAG) != 0 { + match sc_convert_sys_to_user(brk_arg as usize, cageid) { + Ok(u) => u as i32, + Err(e) => return syscall_error(e, "brk", "addr outside cage"), + } + } else { + sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid) + }; // would sometimes check, sometimes be a no-op depending on the compiler settings if !(sc_unusedarg(arg2, arg2_cageid) && sc_unusedarg(arg3, arg3_cageid) diff --git a/src/sysdefs/src/constants/lind_platform_const.rs b/src/sysdefs/src/constants/lind_platform_const.rs index bfe959f10b..7c7686ee05 100644 --- a/src/sysdefs/src/constants/lind_platform_const.rs +++ b/src/sysdefs/src/constants/lind_platform_const.rs @@ -32,6 +32,13 @@ pub const MAXFD: usize = 1024; // Maximum file descriptors per cage pub const MAX_LINEAR_MEMORY_SIZE: u64 = 0xFFFF_FFFF; /// Placeholder for unused syscall argument pub const UNUSED_ARG: u64 = 0xDEADBEEF_DEADBEEF; +/// MSB of a syscall arg's cageid: signals that the arg should be treated as a +/// host-side reference into the named cage's linear memory, not as a uaddr +/// in the calling cage's memory. Mirrors `LIND_ARG_TRANSLATE_FLAG` in +/// `src/glibc/lind_syscall/addr_translation.h`. +pub const GRATE_MEMORY_FLAG: u64 = 1u64 << 63; +/// Mask to recover the actual cageid by clearing `GRATE_MEMORY_FLAG`. +pub const LIND_ARG_CAGEID_MASK: u64 = !GRATE_MEMORY_FLAG; /// Placeholder for unused cage/grate ID pub const UNUSED_ID: u64 = 0xCAFEBABE_CAFEBABE; /// Placeholder for unused syscall name diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index ee30e0f4b2..c1d90be62b 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -10,7 +10,7 @@ use crate::cage_helpers::validate_cageid; use cage::get_cage; use std::error::Error; use std::os::raw::c_char; -use sysdefs::constants::lind_platform_const::{MAX_CAGEID, PATH_MAX}; +use sysdefs::constants::lind_platform_const::{GRATE_MEMORY_FLAG, MAX_CAGEID, PATH_MAX}; use sysdefs::constants::lind_platform_const::{UNUSED_ARG, UNUSED_ID, UNUSED_NAME}; use sysdefs::constants::Errno; use sysdefs::data::fs_struct::{ @@ -260,6 +260,70 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { arg as *mut u8 } +/// Resolve a (uaddr, cageid) pair to a host system address, honoring +/// `GRATE_MEMORY_FLAG`. +/// +/// For path-style buffer args the runtime can just dereference the address as +/// a host pointer (see `get_cstr`) — bytes are bytes. For address args that +/// the runtime *interprets* rather than dereferences (mmap, munmap, mprotect, +/// brk, shmat, shmdt), we need the actual host system address. +/// +/// - **Flag unset** (the cage-side case): `arg` is a uaddr in the *calling* +/// cage's linear memory; translate via that cage's vmmap base. Cage-side +/// glibc wrappers like `mmap.c` pass raw uaddrs without translating. +/// - **Flag set** (the grate-side case): `arg` is already a host system +/// address. Either the grate computed it directly, or its glibc-side +/// `TRANSLATE_ARG_TO_HOST` macro converted before the call. Use as-is. +/// +/// The `arg_cageid` is otherwise informational; the flag bit alone decides +/// translation. +/// +/// ## Returns +/// - `Ok(sysaddr)` host system address. +/// - `Err(Errno::EINVAL)` if the calling cage can't be looked up or its vmmap +/// has no base address yet (only checked on the uaddr branch). +pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result { + #[cfg(feature = "secure")] + { + if !validate_cageid(arg_cageid, cageid) { + return Err(Errno::EINVAL); + } + } + + if (arg_cageid & GRATE_MEMORY_FLAG) != 0 { + // Grate has supplied a host system address directly. + return Ok(arg as usize); + } + + let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; + let vmmap = cage.vmmap.read(); + let base = vmmap.base_address.ok_or(Errno::EINVAL)?; + Ok(base + (arg as u32) as usize) +} + +/// Inverse of `sc_convert_addr_to_sys` — translate a host system address back +/// to a uaddr in the named cage's linear memory. Used for return values of +/// mmap-family syscalls and for bookkeeping into the cage's vmmap. +/// +/// ## Arguments +/// - `sysaddr`: the host system address. +/// - `cageid`: the cage whose user-address space we want. +/// +/// ## Returns +/// - `Ok(uaddr)` truncated to u32 (cage user addresses fit in 32 bits on +/// wasm32 lind). +/// - `Err(Errno::EINVAL)` if the cage can't be looked up, its vmmap has no +/// base, or `sysaddr` is below the cage's base. +pub fn sc_convert_sys_to_user(sysaddr: usize, cageid: u64) -> Result { + let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; + let vmmap = cage.vmmap.read(); + let base = vmmap.base_address.ok_or(Errno::EINVAL)?; + if sysaddr < base { + return Err(Errno::EINVAL); + } + Ok((sysaddr - base) as u32) +} + /// This function translates the buffer pointer from user buffer address to system address, because we are /// transferring between 32-bit WASM environment to 64-bit kernel /// diff --git a/tests/grate-tests/simple-tests/mmap-flag.c b/tests/grate-tests/simple-tests/mmap-flag.c new file mode 100644 index 0000000000..dfd08a2f3c --- /dev/null +++ b/tests/grate-tests/simple-tests/mmap-flag.c @@ -0,0 +1,45 @@ +/* Cage side of the mmap-with-GRATE_MEMORY_FLAG test. + * + * This is a vanilla mmap → write → read → munmap round-trip. Its job is + * to exercise the mmap_syscall code path when the grate interposes and + * forwards the call with `addr_cageid | GRATE_MEMORY_FLAG`. Before the + * runtime patch this trigggered a 32-bit-truncation bug in mmap_syscall + * that landed the mapping at an arbitrary cage address and clobbered the + * cage's stack — manifesting as e.g. an EBADF on a later syscall using a + * stack-resident fd whose value got memcpy'd over. + * + * If the runtime handles the flag correctly, write-then-readback in the + * mapped region preserves the data and munmap succeeds. + */ + +#include +#include +#include +#include + +int main(void) { + const size_t size = 4096; + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + return 1; + } + + memset(p, 0x42, size); + for (size_t i = 0; i < size; i++) { + if (((unsigned char *)p)[i] != 0x42) { + fprintf(stderr, "byte %zu mismatch (got 0x%x)\n", i, + ((unsigned char *)p)[i]); + return 1; + } + } + + if (munmap(p, size) != 0) { + perror("munmap"); + return 1; + } + + printf("[Cage|mmap-flag] PASS\n"); + return 0; +} diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c new file mode 100644 index 0000000000..b18fba6b78 --- /dev/null +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -0,0 +1,104 @@ +/* Grate side of the mmap-with-GRATE_MEMORY_FLAG test. + * + * Registers an mmap handler that forwards the cage's mmap call to the + * runtime via make_threei_call, with `addr_cage` tagged with + * `GRATE_MEMORY_FLAG`. This exercises the runtime's flag-aware path in + * mmap_syscall (skip the truncate-and-translate-via-cage-vmmap step, treat + * the addr as a host sysaddr when non-zero). + * + * The test uses MAP_ANONYMOUS|MAP_PRIVATE with addr=NULL; the runtime will + * pick an address. We're testing that the flag doesn't break the path, not + * MAP_FIXED placement. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Standard dispatcher used by every grate. Unchanged from the other + simple-tests grates. */ +int pass_fptr_to_wt(uint64_t fn_ptr_uint, uint64_t cageid, uint64_t arg1, + uint64_t arg1cage, uint64_t arg2, uint64_t arg2cage, + uint64_t arg3, uint64_t arg3cage, uint64_t arg4, + uint64_t arg4cage, uint64_t arg5, uint64_t arg5cage, + uint64_t arg6, uint64_t arg6cage) { + if (fn_ptr_uint == 0) { + fprintf(stderr, "[Grate|mmap-flag] Invalid function ptr\n"); + assert(0); + } + + int (*fn)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t) = + (int (*)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t))(uintptr_t)fn_ptr_uint; + + return fn(cageid, arg1, arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, + arg4cage, arg5, arg5cage, arg6, arg6cage); +} + +/* mmap interception. Forward to RawPOSIX (MMAP_SYSCALL = 9) with the + addr's cageid tagged with GRATE_MEMORY_FLAG, asserting the runtime + accepts the flag and returns a usable cage uaddr. */ +int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, + uint64_t arg2cage, uint64_t arg3, uint64_t arg3cage, + uint64_t arg4, uint64_t arg4cage, uint64_t arg5, + uint64_t arg5cage, uint64_t arg6, uint64_t arg6cage) { + int self_grate_id = getpid(); + + /* Forward with arg1cage tagged GRATE_MEMORY_FLAG. The cage's addr is + NULL (no MAP_FIXED) so the runtime picks; we're testing that the + flag-aware branch doesn't crash and returns the same useraddr the + non-flag branch would. */ + return make_threei_call( + 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, + self_grate_id | GRATE_MEMORY_FLAG, arg2, arg2cage, arg3, arg3cage, + arg4, arg4cage, arg5, arg5cage, arg6, arg6cage, + 0 /* translate_errno off — propagate raw return */ + ); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + assert(0); + } + + int grateid = getpid(); + pid_t pid = fork(); + if (pid < 0) { + perror("fork failed"); + assert(0); + } else if (pid == 0) { + int cageid = getpid(); + uint64_t fn_ptr_addr = (uint64_t)(uintptr_t)&mmap_grate; + register_handler(cageid, 9 /* MMAP_SYSCALL */, grateid, + fn_ptr_addr); + + if (execv(argv[1], &argv[1]) == -1) { + perror("execv failed"); + assert(0); + } + } + + int status; + while (wait(&status) > 0) { + if (status != 0) { + fprintf(stderr, + "[Grate|mmap-flag] FAIL: child exited with " + "status %d\n", + status); + assert(0); + } + } + + printf("[Grate|mmap-flag] PASS\n"); + return 0; +} From 1d2d47b5778ce729c367f08efc2c291106d94d48 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 19:36:48 -0400 Subject: [PATCH 02/24] sc_convert_addr_to_sys: short-circuit arg=0 to cage base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the runtime's own early_init_stack calls mmap(addr=0, MAP_FIXED, ...) on a cage whose mmap was registered to a grate (e.g. by a grate test), the grate forwards the call with GRATE_MEMORY_FLAG set. My helper treated FLAG-set + addr=0 as "the grate handed us a NULL host pointer", returned sysaddr=0, and the inverse sys_to_user(0) returned EINVAL because 0 < cage.base_address. Symptom: "failed to allocate stack" during cage exec. A NULL host pointer is meaningless for mmap. Treat arg=0 as cage-relative regardless of the flag — that matches the pre-flag behavior of `useraddr = addr as u32 (= 0); sysaddr = user_to_sys(0) = base`. --- src/typemap/src/datatype_conversion.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index c1d90be62b..8afe37c7a6 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -290,6 +290,18 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< } } + // arg=0 has cage-relative meaning ("no specific address" / "uaddr 0") + // regardless of the flag — a NULL host pointer would be meaningless. + // Resolve to the calling cage's base. This matches the pre-flag + // behavior of `useraddr = addr as u32; sysaddr = user_to_sys(useraddr)` + // when addr was 0. + if arg == 0 { + let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; + let vmmap = cage.vmmap.read(); + let base = vmmap.base_address.ok_or(Errno::EINVAL)?; + return Ok(base); + } + if (arg_cageid & GRATE_MEMORY_FLAG) != 0 { // Grate has supplied a host system address directly. return Ok(arg as usize); From 9dcdcb065630bfa6c628bb51b415b72bbbfa9639 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 19:38:49 -0400 Subject: [PATCH 03/24] mmap-flag cage test: stdout prints + fflush to localize failure --- tests/grate-tests/simple-tests/mmap-flag.c | 37 ++++++++++++++-------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/tests/grate-tests/simple-tests/mmap-flag.c b/tests/grate-tests/simple-tests/mmap-flag.c index dfd08a2f3c..9b9fdf0dcc 100644 --- a/tests/grate-tests/simple-tests/mmap-flag.c +++ b/tests/grate-tests/simple-tests/mmap-flag.c @@ -1,15 +1,11 @@ /* Cage side of the mmap-with-GRATE_MEMORY_FLAG test. * - * This is a vanilla mmap → write → read → munmap round-trip. Its job is - * to exercise the mmap_syscall code path when the grate interposes and - * forwards the call with `addr_cageid | GRATE_MEMORY_FLAG`. Before the - * runtime patch this trigggered a 32-bit-truncation bug in mmap_syscall - * that landed the mapping at an arbitrary cage address and clobbered the - * cage's stack — manifesting as e.g. an EBADF on a later syscall using a - * stack-resident fd whose value got memcpy'd over. + * Vanilla mmap → write → read → munmap round-trip. Exercises the + * mmap_syscall code path when the grate interposes and forwards with + * `addr_cage | GRATE_MEMORY_FLAG`. * - * If the runtime handles the flag correctly, write-then-readback in the - * mapped region preserves the data and munmap succeeds. + * Progress prints to stdout (not stderr) with explicit fflush so the + * harness can see where it died if any step fails. */ #include @@ -19,27 +15,42 @@ int main(void) { const size_t size = 4096; + + printf("[Cage|mmap-flag] calling mmap\n"); + fflush(stdout); + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (p == MAP_FAILED) { - perror("mmap"); + printf("[Cage|mmap-flag] mmap returned MAP_FAILED\n"); + fflush(stdout); return 1; } + printf("[Cage|mmap-flag] mmap returned %p\n", p); + fflush(stdout); memset(p, 0x42, size); + printf("[Cage|mmap-flag] memset done\n"); + fflush(stdout); + for (size_t i = 0; i < size; i++) { if (((unsigned char *)p)[i] != 0x42) { - fprintf(stderr, "byte %zu mismatch (got 0x%x)\n", i, - ((unsigned char *)p)[i]); + printf("[Cage|mmap-flag] byte %zu mismatch (got 0x%x)\n", + i, ((unsigned char *)p)[i]); + fflush(stdout); return 1; } } + printf("[Cage|mmap-flag] readback ok\n"); + fflush(stdout); if (munmap(p, size) != 0) { - perror("munmap"); + printf("[Cage|mmap-flag] munmap failed\n"); + fflush(stdout); return 1; } printf("[Cage|mmap-flag] PASS\n"); + fflush(stdout); return 0; } From 8b78e00e0a6e7eb8b6ca22e8db29a4cb067cd4c6 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 19:57:36 -0400 Subject: [PATCH 04/24] mmap-flag grate: log each interception with args + return value --- .../grate-tests/simple-tests/mmap-flag_grate.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c index b18fba6b78..45b22b4a48 100644 --- a/tests/grate-tests/simple-tests/mmap-flag_grate.c +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -53,16 +53,23 @@ int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, uint64_t arg5cage, uint64_t arg6, uint64_t arg6cage) { int self_grate_id = getpid(); - /* Forward with arg1cage tagged GRATE_MEMORY_FLAG. The cage's addr is - NULL (no MAP_FIXED) so the runtime picks; we're testing that the - flag-aware branch doesn't crash and returns the same useraddr the - non-flag branch would. */ - return make_threei_call( + printf("[Grate|mmap-flag] intercepting mmap: addr=0x%llx len=%llu " + "prot=0x%llx flags=0x%llx fd=%lld off=%lld\n", + (unsigned long long)arg1, (unsigned long long)arg2, + (unsigned long long)arg3, (unsigned long long)arg4, + (long long)arg5, (long long)arg6); + fflush(stdout); + + /* Forward with arg1cage tagged GRATE_MEMORY_FLAG. */ + int ret = make_threei_call( 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, self_grate_id | GRATE_MEMORY_FLAG, arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, arg5cage, arg6, arg6cage, 0 /* translate_errno off — propagate raw return */ ); + printf("[Grate|mmap-flag] mmap returned %d\n", ret); + fflush(stdout); + return ret; } int main(int argc, char *argv[]) { From 45ea1bb9fca5d36f1095ea734f3240e789f6ae7c Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 20:05:56 -0400 Subject: [PATCH 05/24] mmap_syscall: temporary eprintln diagnostic --- src/rawposix/src/fs_calls.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 7b10502167..0abfdd1d08 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -893,6 +893,17 @@ pub extern "C" fn mmap_syscall( let mut useraddr: u32; let sysaddr: usize; + eprintln!( + "[mmap_syscall] cageid={} addr_arg={:#x} addr_cageid={:#x} flags={:#x} \ + MAP_FIXED_bit={} grate_supplied={}", + cageid, + addr_arg, + addr_cageid, + flags, + flags & MAP_FIXED as i32, + grate_supplied + ); + if flags & MAP_FIXED as i32 == 0 { // No fixed address — runtime picks via the calling cage's vmmap. // Use addr_arg as a hint (translated via the flag-aware helper if From 7f1f583ba27e9612b1cbec522f4624f728d2c334 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 20:19:43 -0400 Subject: [PATCH 06/24] sc_convert_addr_to_sys: cross-cage base lookup, not "use as-is" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I had the semantic wrong. Looking at imfs-grate's actual usage (rust-grates/imfs-grate/src/imfs/mod.rs around the cage-side mmap forward): the grate passes target_addr = a uaddr in its OWN linear memory (returned from a prior SYS_MMAP that allocated the imfs content region) along with arg_cage = grate_id | GRATE_MEMORY_FLAG. The runtime is supposed to read this as "uaddr in the named cage's memory" and resolve via that cage's base — not as "use as-is". Update the helper so: - flag unset → owner = calling cage (existing cage-side path) - flag set → owner = arg_cageid & LIND_ARG_CAGEID_MASK - sysaddr = base(owner) + arg This matches Sanchit's "all within addr_translation.h" framing — the generalization of the glibc-side __lind_translate_uaddr_to_host to arbitrary cageid, which only the runtime can do. Removes the temporary diagnostic eprintln. --- src/rawposix/src/fs_calls.rs | 11 ------ src/typemap/src/datatype_conversion.rs | 51 +++++++++++++++----------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 0abfdd1d08..7b10502167 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -893,17 +893,6 @@ pub extern "C" fn mmap_syscall( let mut useraddr: u32; let sysaddr: usize; - eprintln!( - "[mmap_syscall] cageid={} addr_arg={:#x} addr_cageid={:#x} flags={:#x} \ - MAP_FIXED_bit={} grate_supplied={}", - cageid, - addr_arg, - addr_cageid, - flags, - flags & MAP_FIXED as i32, - grate_supplied - ); - if flags & MAP_FIXED as i32 == 0 { // No fixed address — runtime picks via the calling cage's vmmap. // Use addr_arg as a hint (translated via the flag-aware helper if diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index 8afe37c7a6..4aa3963aee 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -10,7 +10,9 @@ use crate::cage_helpers::validate_cageid; use cage::get_cage; use std::error::Error; use std::os::raw::c_char; -use sysdefs::constants::lind_platform_const::{GRATE_MEMORY_FLAG, MAX_CAGEID, PATH_MAX}; +use sysdefs::constants::lind_platform_const::{ + GRATE_MEMORY_FLAG, LIND_ARG_CAGEID_MASK, MAX_CAGEID, PATH_MAX, +}; use sysdefs::constants::lind_platform_const::{UNUSED_ARG, UNUSED_ID, UNUSED_NAME}; use sysdefs::constants::Errno; use sysdefs::data::fs_struct::{ @@ -268,20 +270,25 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { /// the runtime *interprets* rather than dereferences (mmap, munmap, mprotect, /// brk, shmat, shmdt), we need the actual host system address. /// -/// - **Flag unset** (the cage-side case): `arg` is a uaddr in the *calling* -/// cage's linear memory; translate via that cage's vmmap base. Cage-side -/// glibc wrappers like `mmap.c` pass raw uaddrs without translating. -/// - **Flag set** (the grate-side case): `arg` is already a host system -/// address. Either the grate computed it directly, or its glibc-side -/// `TRANSLATE_ARG_TO_HOST` macro converted before the call. Use as-is. +/// Resolution rule, applied to both flag branches: `sysaddr = base(owner) + arg`, +/// where the owner is identified by the flag bit: +/// +/// - **Flag unset** (the cage-side case): owner = the calling cage. `arg` is +/// a uaddr in the calling cage's linear memory. Cage-side glibc wrappers +/// like `mmap.c` pass raw uaddrs without translating. +/// - **Flag set** (the grate-side case): owner = the cage named by +/// `arg_cageid & LIND_ARG_CAGEID_MASK`, typically a grate. `arg` is a uaddr +/// in *that* cage's linear memory — e.g. a grate forwarding a shared-mmap +/// region it already mapped against its own vmmap, asking the runtime to +/// alias the same host pages into the calling cage's address space. /// -/// The `arg_cageid` is otherwise informational; the flag bit alone decides -/// translation. +/// arg=0 is special-cased to "start of the calling cage's memory" regardless +/// of flag — a NULL host pointer would be meaningless. /// /// ## Returns /// - `Ok(sysaddr)` host system address. -/// - `Err(Errno::EINVAL)` if the calling cage can't be looked up or its vmmap -/// has no base address yet (only checked on the uaddr branch). +/// - `Err(Errno::EINVAL)` if the owning cage can't be looked up or its vmmap +/// has no base address yet. pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result { #[cfg(feature = "secure")] { @@ -290,11 +297,10 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< } } - // arg=0 has cage-relative meaning ("no specific address" / "uaddr 0") - // regardless of the flag — a NULL host pointer would be meaningless. - // Resolve to the calling cage's base. This matches the pre-flag - // behavior of `useraddr = addr as u32; sysaddr = user_to_sys(useraddr)` - // when addr was 0. + // arg=0 has cage-relative "start of cage memory" meaning regardless of + // flag; a NULL host pointer would be meaningless. Anchor to the calling + // cage's base so early_init_stack-style mmaps (addr=0, MAP_FIXED) work + // whether or not a grate forwards the call with the flag set. if arg == 0 { let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; let vmmap = cage.vmmap.read(); @@ -302,13 +308,14 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< return Ok(base); } - if (arg_cageid & GRATE_MEMORY_FLAG) != 0 { - // Grate has supplied a host system address directly. - return Ok(arg as usize); - } + let owner_cageid = if (arg_cageid & GRATE_MEMORY_FLAG) != 0 { + arg_cageid & LIND_ARG_CAGEID_MASK + } else { + cageid + }; - let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; - let vmmap = cage.vmmap.read(); + let owner = get_cage(owner_cageid).ok_or(Errno::EINVAL)?; + let vmmap = owner.vmmap.read(); let base = vmmap.base_address.ok_or(Errno::EINVAL)?; Ok(base + (arg as u32) as usize) } From ae73bd8247fe289e969a8ec67dc8e0d64ddd4526 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 20:26:37 -0400 Subject: [PATCH 07/24] mmap_syscall: re-add diagnostic eprintln to identify why MAP_FIXED branch isn't taken --- src/rawposix/src/fs_calls.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 7b10502167..499eb1fce7 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -893,6 +893,19 @@ pub extern "C" fn mmap_syscall( let mut useraddr: u32; let sysaddr: usize; + eprintln!( + "[mmap_syscall DEBUG] cageid={} addr_arg=0x{:x} addr_cageid=0x{:x} flags=0x{:x} \ + MAP_FIXED={:#x} MAP_FIXED_bit=0x{:x} take_fixed_branch={} grate_supplied={}", + cageid, + addr_arg, + addr_cageid, + flags, + MAP_FIXED as i32, + flags & MAP_FIXED as i32, + flags & MAP_FIXED as i32 != 0, + grate_supplied + ); + if flags & MAP_FIXED as i32 == 0 { // No fixed address — runtime picks via the calling cage's vmmap. // Use addr_arg as a hint (translated via the flag-aware helper if From 790917c09e551efe861380c6a24e92f75bdae553 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 20:36:44 -0400 Subject: [PATCH 08/24] Switch mmap/brk runtime check from FLAG-bit to value-range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier GRATE_MEMORY_FLAG-aware branches in mmap_syscall and brk_syscall were dead code: by the time a syscall reaches the runtime, glibc's TRANSLATE_ARG_TO_HOST (invoked by make_threei_call for every arg) has already consumed the flag — translating uaddr → host pointer when the flag was set and stripping the flag bit from the cageid. The runtime always sees `(post-translation arg, flag-stripped cageid)`. For byte-buffer args (paths, read/write bufs, etc.) the distinction is invisible — the runtime dereferences the host pointer either way. For mmap/brk the runtime has to *interpret* the address, so it has to distinguish a uaddr (translate via calling cage's base) from a host sysaddr (use as-is). Use value range as the discriminator: wasm32 uaddrs are ≤ 4GB by definition; host base addresses (and therefore sysaddrs) are typically far above 4GB. The ranges don't overlap. - sc_convert_addr_to_sys now keys on `arg <= u32::MAX`. - brk_syscall now keys on the same range check, dropping the GRATE_MEMORY_FLAG check. - Removed unused GRATE_MEMORY_FLAG import from fs_calls.rs and the temporary diagnostic eprintln from mmap_syscall. --- src/rawposix/src/fs_calls.rs | 47 +++++++------------- src/typemap/src/datatype_conversion.rs | 60 +++++++++++++------------- 2 files changed, 46 insertions(+), 61 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 499eb1fce7..ff8f0cb720 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -15,9 +15,7 @@ use sysdefs::constants::fs_const::{ STDIN_FILENO, STDOUT_FILENO, TIOCGWINSZ, }; -use sysdefs::constants::lind_platform_const::{ - FDKIND_KERNEL, GRATE_MEMORY_FLAG, MAXFD, UNUSED_ARG, UNUSED_ID, -}; +use sysdefs::constants::lind_platform_const::{FDKIND_KERNEL, MAXFD, UNUSED_ARG, UNUSED_ID}; use sysdefs::constants::sys_const::{DEFAULT_GID, DEFAULT_UID, SIGPIPE}; use sysdefs::logging::lind_debug_panic; use typemap::cage_helpers::*; @@ -886,34 +884,20 @@ pub extern "C" fn mmap_syscall( // round up length to be multiple of pages let rounded_length = round_up_page(len as u64); - // Resolve (useraddr in calling cage, sysaddr host pointer). Honors - // GRATE_MEMORY_FLAG on addr_cageid so a grate can supply an address - // it picked against its own (or any other cage's) vmmap. - let grate_supplied = (addr_cageid & GRATE_MEMORY_FLAG) != 0; + // Resolve (useraddr in calling cage, sysaddr host pointer). Addresses + // arrive as either a cage uaddr (≤ u32::MAX, from cage-side mmap.c) or + // a host sysaddr (above u32::MAX, from a grate-forwarded call whose + // GRATE_MEMORY_FLAG was already consumed by glibc's make_threei_call / + // TRANSLATE_ARG_TO_HOST). sc_convert_addr_to_sys handles the split. let mut useraddr: u32; let sysaddr: usize; - eprintln!( - "[mmap_syscall DEBUG] cageid={} addr_arg=0x{:x} addr_cageid=0x{:x} flags=0x{:x} \ - MAP_FIXED={:#x} MAP_FIXED_bit=0x{:x} take_fixed_branch={} grate_supplied={}", - cageid, - addr_arg, - addr_cageid, - flags, - MAP_FIXED as i32, - flags & MAP_FIXED as i32, - flags & MAP_FIXED as i32 != 0, - grate_supplied - ); - if flags & MAP_FIXED as i32 == 0 { // No fixed address — runtime picks via the calling cage's vmmap. - // Use addr_arg as a hint (translated via the flag-aware helper if - // grate-supplied). - let hint_useraddr = if grate_supplied && addr_arg != 0 { - match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) - .and_then(|s| sc_convert_sys_to_user(s, cageid)) - { + // Use addr_arg as a hint; if a grate forwarded a host sysaddr + // (above u32 range), convert it back to a cage uaddr first. + let hint_useraddr = if addr_arg > u32::MAX as u64 { + match sc_convert_sys_to_user(addr_arg as usize, cageid) { Ok(u) => u, Err(_) => 0, } @@ -1222,11 +1206,12 @@ pub extern "C" fn brk_syscall( arg6: u64, arg6_cageid: u64, ) -> i32 { - // Cage-side glibc brk.c passes a raw uaddr; the runtime page-aligns it - // and compares against vmmap.heap_start in user space. A grate calling - // with GRATE_MEMORY_FLAG passes a host sysaddr instead — translate it - // back into the calling cage's user-address space before proceeding. - let brk = if (brk_cageid & GRATE_MEMORY_FLAG) != 0 { + // Cage-side glibc brk.c passes a raw uaddr (low 32 bits); the runtime + // page-aligns it and compares against vmmap.heap_start in user space. + // A grate forwarding the call via make_threei_call goes through + // glibc's TRANSLATE_ARG_TO_HOST which produces a host sysaddr (above + // u32 range) — convert that back to a cage uaddr before proceeding. + let brk = if brk_arg > u32::MAX as u64 { match sc_convert_sys_to_user(brk_arg as usize, cageid) { Ok(u) => u as i32, Err(e) => return syscall_error(e, "brk", "addr outside cage"), diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index 4aa3963aee..dead5043b6 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -10,9 +10,7 @@ use crate::cage_helpers::validate_cageid; use cage::get_cage; use std::error::Error; use std::os::raw::c_char; -use sysdefs::constants::lind_platform_const::{ - GRATE_MEMORY_FLAG, LIND_ARG_CAGEID_MASK, MAX_CAGEID, PATH_MAX, -}; +use sysdefs::constants::lind_platform_const::{MAX_CAGEID, PATH_MAX}; use sysdefs::constants::lind_platform_const::{UNUSED_ARG, UNUSED_ID, UNUSED_NAME}; use sysdefs::constants::Errno; use sysdefs::data::fs_struct::{ @@ -270,25 +268,28 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { /// the runtime *interprets* rather than dereferences (mmap, munmap, mprotect, /// brk, shmat, shmdt), we need the actual host system address. /// -/// Resolution rule, applied to both flag branches: `sysaddr = base(owner) + arg`, -/// where the owner is identified by the flag bit: +/// Distinguishes which form of address `arg` carries: +/// +/// - **u32 range (`arg <= u32::MAX`)**: a uaddr in the calling cage's linear +/// memory. This is what cage-side glibc wrappers (e.g. `mmap.c`'s +/// `(uintptr_t) addr`) pass — wasm32 uaddrs fit in u32. We translate via +/// the calling cage's vmmap base. +/// - **Above u32 range**: a host system address already, produced by glibc's +/// `TRANSLATE_ARG_TO_HOST` macro inside `make_threei_call` (e.g. when a +/// grate forwards with `GRATE_MEMORY_FLAG` set). By the time the runtime +/// sees the call, the FLAG bit has been stripped from `arg_cageid` and the +/// arg is the resolved host pointer. Use as-is. /// -/// - **Flag unset** (the cage-side case): owner = the calling cage. `arg` is -/// a uaddr in the calling cage's linear memory. Cage-side glibc wrappers -/// like `mmap.c` pass raw uaddrs without translating. -/// - **Flag set** (the grate-side case): owner = the cage named by -/// `arg_cageid & LIND_ARG_CAGEID_MASK`, typically a grate. `arg` is a uaddr -/// in *that* cage's linear memory — e.g. a grate forwarding a shared-mmap -/// region it already mapped against its own vmmap, asking the runtime to -/// alias the same host pages into the calling cage's address space. +/// The ranges don't overlap: cage linear memory occupies `[base, base + 4GB]` +/// on the host, and host bases are typically far above 4GB. /// -/// arg=0 is special-cased to "start of the calling cage's memory" regardless -/// of flag — a NULL host pointer would be meaningless. +/// arg=0 is special-cased to "start of the calling cage's memory" — a NULL +/// host pointer would be meaningless. /// /// ## Returns /// - `Ok(sysaddr)` host system address. -/// - `Err(Errno::EINVAL)` if the owning cage can't be looked up or its vmmap -/// has no base address yet. +/// - `Err(Errno::EINVAL)` if the calling cage can't be looked up or its vmmap +/// has no base address yet (only on the uaddr branch). pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result { #[cfg(feature = "secure")] { @@ -296,11 +297,11 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< return Err(Errno::EINVAL); } } + let _ = arg_cageid; // FLAG is consumed glibc-side; runtime distinguishes by value range. - // arg=0 has cage-relative "start of cage memory" meaning regardless of - // flag; a NULL host pointer would be meaningless. Anchor to the calling - // cage's base so early_init_stack-style mmaps (addr=0, MAP_FIXED) work - // whether or not a grate forwards the call with the flag set. + // arg=0 has cage-relative "start of cage memory" meaning — a NULL host + // pointer would be meaningless. Anchor to the calling cage's base so + // early_init_stack-style mmaps (addr=0, MAP_FIXED) work. if arg == 0 { let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; let vmmap = cage.vmmap.read(); @@ -308,16 +309,15 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< return Ok(base); } - let owner_cageid = if (arg_cageid & GRATE_MEMORY_FLAG) != 0 { - arg_cageid & LIND_ARG_CAGEID_MASK - } else { - cageid - }; + // Distinguish uaddr (≤ u32::MAX) from host sysaddr (above). See doc above. + if arg <= u32::MAX as u64 { + let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; + let vmmap = cage.vmmap.read(); + let base = vmmap.base_address.ok_or(Errno::EINVAL)?; + return Ok(base + arg as usize); + } - let owner = get_cage(owner_cageid).ok_or(Errno::EINVAL)?; - let vmmap = owner.vmmap.read(); - let base = vmmap.base_address.ok_or(Errno::EINVAL)?; - Ok(base + (arg as u32) as usize) + Ok(arg as usize) } /// Inverse of `sc_convert_addr_to_sys` — translate a host system address back From 1d7004a3f30353d895c670e14b8152f623099293 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 21:07:08 -0400 Subject: [PATCH 09/24] mmap-flag grate test: only apply FLAG to fd-backed mmaps Anonymous / fd=-1 mmaps (including the runtime's pre-main early_init_stack call) are forwarded by the grate unchanged. Tagging those with GRATE_MEMORY_FLAG would resolve addr against the grate's base instead of the cage's, breaking unrelated runtime mmaps. Cage test switched from MAP_ANONYMOUS|MAP_PRIVATE to an fd-backed MAP_SHARED mmap so the FLAG path is the one being exercised. --- tests/grate-tests/simple-tests/mmap-flag.c | 33 ++++++++++++++----- .../simple-tests/mmap-flag_grate.c | 33 +++++++++++++------ 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/tests/grate-tests/simple-tests/mmap-flag.c b/tests/grate-tests/simple-tests/mmap-flag.c index 9b9fdf0dcc..caee41b398 100644 --- a/tests/grate-tests/simple-tests/mmap-flag.c +++ b/tests/grate-tests/simple-tests/mmap-flag.c @@ -1,26 +1,40 @@ /* Cage side of the mmap-with-GRATE_MEMORY_FLAG test. * - * Vanilla mmap → write → read → munmap round-trip. Exercises the - * mmap_syscall code path when the grate interposes and forwards with - * `addr_cage | GRATE_MEMORY_FLAG`. + * fd-backed mmap → write → read → munmap round-trip. The grate hands + * this off to RawPOSIX with `addr_cage | GRATE_MEMORY_FLAG`, exercising + * the runtime's flag-aware path in mmap_syscall. * - * Progress prints to stdout (not stderr) with explicit fflush so the - * harness can see where it died if any step fails. + * Anonymous mmaps (including the runtime's own pre-main stack setup) + * are forwarded by the grate without the flag and aren't exercised here. */ +#include #include #include #include #include +#define FILE_PATH "mmap-flag.tmp" + int main(void) { const size_t size = 4096; - printf("[Cage|mmap-flag] calling mmap\n"); + int fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + printf("[Cage|mmap-flag] open failed\n"); + fflush(stdout); + return 1; + } + if (ftruncate(fd, size) != 0) { + printf("[Cage|mmap-flag] ftruncate failed\n"); + fflush(stdout); + return 1; + } + + printf("[Cage|mmap-flag] calling fd-backed mmap (fd=%d)\n", fd); fflush(stdout); - void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { printf("[Cage|mmap-flag] mmap returned MAP_FAILED\n"); fflush(stdout); @@ -50,6 +64,9 @@ int main(void) { return 1; } + close(fd); + unlink(FILE_PATH); + printf("[Cage|mmap-flag] PASS\n"); fflush(stdout); return 0; diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c index 45b22b4a48..d50e5217f9 100644 --- a/tests/grate-tests/simple-tests/mmap-flag_grate.c +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -44,27 +44,40 @@ int pass_fptr_to_wt(uint64_t fn_ptr_uint, uint64_t cageid, uint64_t arg1, arg4cage, arg5, arg5cage, arg6, arg6cage); } -/* mmap interception. Forward to RawPOSIX (MMAP_SYSCALL = 9) with the - addr's cageid tagged with GRATE_MEMORY_FLAG, asserting the runtime - accepts the flag and returns a usable cage uaddr. */ +/* mmap interception. + + Anonymous / fd == -1 mmaps (including the runtime's pre-main + early_init_stack call) are forwarded unchanged — the FLAG path is + meaningless for them and applying it would resolve addr against the + grate's base instead of the cage's, breaking those calls. + + File-backed mmaps (fd >= 0, !MAP_ANON) are forwarded with arg1cage + tagged GRATE_MEMORY_FLAG, exercising the runtime's flag-aware path + in mmap_syscall. */ +#define MAP_ANON_FLAG 0x20 + int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, uint64_t arg2cage, uint64_t arg3, uint64_t arg3cage, uint64_t arg4, uint64_t arg4cage, uint64_t arg5, uint64_t arg5cage, uint64_t arg6, uint64_t arg6cage) { int self_grate_id = getpid(); + int fd = (int)(int64_t)arg5; + int is_anonymous = (fd < 0) || ((arg4 & MAP_ANON_FLAG) != 0); printf("[Grate|mmap-flag] intercepting mmap: addr=0x%llx len=%llu " - "prot=0x%llx flags=0x%llx fd=%lld off=%lld\n", + "prot=0x%llx flags=0x%llx fd=%d off=%lld flag_path=%d\n", (unsigned long long)arg1, (unsigned long long)arg2, - (unsigned long long)arg3, (unsigned long long)arg4, - (long long)arg5, (long long)arg6); + (unsigned long long)arg3, (unsigned long long)arg4, fd, + (long long)arg6, !is_anonymous); fflush(stdout); - /* Forward with arg1cage tagged GRATE_MEMORY_FLAG. */ + uint64_t fwd_arg1cage = + is_anonymous ? arg1cage : (self_grate_id | GRATE_MEMORY_FLAG); + int ret = make_threei_call( - 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, - self_grate_id | GRATE_MEMORY_FLAG, arg2, arg2cage, arg3, arg3cage, - arg4, arg4cage, arg5, arg5cage, arg6, arg6cage, + 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, fwd_arg1cage, + arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, arg5cage, + arg6, arg6cage, 0 /* translate_errno off — propagate raw return */ ); printf("[Grate|mmap-flag] mmap returned %d\n", ret); From c4ba2a785e2c2b6d2bbf7ec14cd7ee98ed69e548 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 21:24:57 -0400 Subject: [PATCH 10/24] diagnostics: hex eprintln in mmap_syscall + grate, to compare with/without grate args --- src/rawposix/src/fs_calls.rs | 15 +++++++++ .../simple-tests/mmap-flag_grate.c | 31 ++++++++++++++----- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index ff8f0cb720..4cb8e0ca1f 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -829,6 +829,11 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { + eprintln!( + "[mmap_syscall] entry cageid={} addr_arg={:#018x} addr_cageid={:#x} len_arg={:#018x} prot_arg={:#x} flags_arg={:#x} vfd_arg={:#018x} off_arg={:#018x}", + cageid, addr_arg, addr_cageid, len_arg, prot_arg, flags_arg, vfd_arg, off_arg + ); + let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -947,6 +952,11 @@ pub extern "C" fn mmap_syscall( return syscall_error(Errno::EINVAL, "mmap", "invalid flags"); } + eprintln!( + "[mmap_syscall] resolved cageid={} sysaddr={:#x} useraddr={:#x} rounded_length={:#x} flags={:#x} fildes={}", + cageid, sysaddr, useraddr, rounded_length, flags, fildes + ); + if rounded_length > 0 { if flags & MAP_ANONYMOUS as i32 > 0 { fildes = -1; @@ -962,6 +972,11 @@ pub extern "C" fn mmap_syscall( off, ); + eprintln!( + "[mmap_syscall] mmap_inner returned cageid={} result={:#x} (errno_check={})", + cageid, result, is_mmap_error(result) + ); + // Check for error BEFORE sys_to_user conversion if is_mmap_error(result) { let errno = get_errno(); diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c index d50e5217f9..88dc85aae7 100644 --- a/tests/grate-tests/simple-tests/mmap-flag_grate.c +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -64,24 +64,39 @@ int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, int fd = (int)(int64_t)arg5; int is_anonymous = (fd < 0) || ((arg4 & MAP_ANON_FLAG) != 0); - printf("[Grate|mmap-flag] intercepting mmap: addr=0x%llx len=%llu " - "prot=0x%llx flags=0x%llx fd=%d off=%lld flag_path=%d\n", - (unsigned long long)arg1, (unsigned long long)arg2, - (unsigned long long)arg3, (unsigned long long)arg4, fd, - (long long)arg6, !is_anonymous); - fflush(stdout); + fprintf(stderr, + "[Grate|mmap-flag] entry cageid=%llu arg1=%#018llx " + "arg1cage=%#llx arg2=%#018llx arg3=%#llx arg4=%#llx " + "arg5=%#018llx arg6=%#018llx anon=%d\n", + (unsigned long long)cageid, (unsigned long long)arg1, + (unsigned long long)arg1cage, (unsigned long long)arg2, + (unsigned long long)arg3, (unsigned long long)arg4, + (unsigned long long)arg5, (unsigned long long)arg6, + is_anonymous); + fflush(stderr); uint64_t fwd_arg1cage = is_anonymous ? arg1cage : (self_grate_id | GRATE_MEMORY_FLAG); + fprintf(stderr, + "[Grate|mmap-flag] forwarding arg1=%#018llx fwd_arg1cage=%#llx " + "arg2=%#018llx arg3=%#llx arg4=%#llx arg5=%#018llx " + "arg6=%#018llx\n", + (unsigned long long)arg1, + (unsigned long long)fwd_arg1cage, (unsigned long long)arg2, + (unsigned long long)arg3, (unsigned long long)arg4, + (unsigned long long)arg5, (unsigned long long)arg6); + fflush(stderr); + int ret = make_threei_call( 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, fwd_arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, arg5cage, arg6, arg6cage, 0 /* translate_errno off — propagate raw return */ ); - printf("[Grate|mmap-flag] mmap returned %d\n", ret); - fflush(stdout); + fprintf(stderr, "[Grate|mmap-flag] make_threei_call returned %d (%#x)\n", + ret, (unsigned)ret); + fflush(stderr); return ret; } From 304f5a4dfa59b27cb034beaad1a7a9e72da44538 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 21:31:39 -0400 Subject: [PATCH 11/24] mmap-flag grate: fix target cageid (use arg5cage, not handler's grate id) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first cageid param to a grate handler is the GRATE's id (3i's _call_grate_func passes grateid there), not the calling cage. Forwarding with target=cageid mmap'd at the grate's base, clobbering the grate's function table — the cage's instance setup completed normally, then the grate's next call_indirect trapped with 'uninitialized element'. imfs already documents this: arg5 (fd) is an integer whose cage tag is reliably the original caller (handlers.rs:1219-1223). Use arg5cage as the forwarding target. --- .../simple-tests/mmap-flag_grate.c | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c index 88dc85aae7..c6057c05f9 100644 --- a/tests/grate-tests/simple-tests/mmap-flag_grate.c +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -64,34 +64,40 @@ int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, int fd = (int)(int64_t)arg5; int is_anonymous = (fd < 0) || ((arg4 & MAP_ANON_FLAG) != 0); + /* `cageid` passed to the handler is the grate's id (3i's + _call_grate_func passes grateid here). The calling cage's id is + reliably carried in an integer arg's cage tag — use arg5cage (fd) + per the convention imfs follows. */ + uint64_t calling_cage = arg5cage; + fprintf(stderr, - "[Grate|mmap-flag] entry cageid=%llu arg1=%#018llx " - "arg1cage=%#llx arg2=%#018llx arg3=%#llx arg4=%#llx " - "arg5=%#018llx arg6=%#018llx anon=%d\n", - (unsigned long long)cageid, (unsigned long long)arg1, - (unsigned long long)arg1cage, (unsigned long long)arg2, - (unsigned long long)arg3, (unsigned long long)arg4, - (unsigned long long)arg5, (unsigned long long)arg6, - is_anonymous); + "[Grate|mmap-flag] entry handler_cageid=%llu calling_cage=%llu " + "arg1=%#018llx arg1cage=%#llx arg2=%#018llx arg3=%#llx " + "arg4=%#llx arg5=%#018llx arg6=%#018llx anon=%d\n", + (unsigned long long)cageid, (unsigned long long)calling_cage, + (unsigned long long)arg1, (unsigned long long)arg1cage, + (unsigned long long)arg2, (unsigned long long)arg3, + (unsigned long long)arg4, (unsigned long long)arg5, + (unsigned long long)arg6, is_anonymous); fflush(stderr); uint64_t fwd_arg1cage = is_anonymous ? arg1cage : (self_grate_id | GRATE_MEMORY_FLAG); fprintf(stderr, - "[Grate|mmap-flag] forwarding arg1=%#018llx fwd_arg1cage=%#llx " - "arg2=%#018llx arg3=%#llx arg4=%#llx arg5=%#018llx " - "arg6=%#018llx\n", - (unsigned long long)arg1, + "[Grate|mmap-flag] forwarding target=%llu arg1=%#018llx " + "fwd_arg1cage=%#llx arg2=%#018llx arg3=%#llx arg4=%#llx " + "arg5=%#018llx arg6=%#018llx\n", + (unsigned long long)calling_cage, (unsigned long long)arg1, (unsigned long long)fwd_arg1cage, (unsigned long long)arg2, (unsigned long long)arg3, (unsigned long long)arg4, (unsigned long long)arg5, (unsigned long long)arg6); fflush(stderr); int ret = make_threei_call( - 9 /* MMAP_SYSCALL */, 0, self_grate_id, cageid, arg1, fwd_arg1cage, - arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, arg5cage, - arg6, arg6cage, + 9 /* MMAP_SYSCALL */, 0, self_grate_id, calling_cage, arg1, + fwd_arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, + arg5cage, arg6, arg6cage, 0 /* translate_errno off — propagate raw return */ ); fprintf(stderr, "[Grate|mmap-flag] make_threei_call returned %d (%#x)\n", From ef4846fb36857306d6e8a7c71c1fce4654818003 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 21:33:55 -0400 Subject: [PATCH 12/24] Strip diagnostic eprintlns and grate/cage debug printfs --- src/rawposix/src/fs_calls.rs | 15 -------- tests/grate-tests/simple-tests/mmap-flag.c | 28 ++------------- .../simple-tests/mmap-flag_grate.c | 34 +++---------------- 3 files changed, 8 insertions(+), 69 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 4cb8e0ca1f..ff8f0cb720 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -829,11 +829,6 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { - eprintln!( - "[mmap_syscall] entry cageid={} addr_arg={:#018x} addr_cageid={:#x} len_arg={:#018x} prot_arg={:#x} flags_arg={:#x} vfd_arg={:#018x} off_arg={:#018x}", - cageid, addr_arg, addr_cageid, len_arg, prot_arg, flags_arg, vfd_arg, off_arg - ); - let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -952,11 +947,6 @@ pub extern "C" fn mmap_syscall( return syscall_error(Errno::EINVAL, "mmap", "invalid flags"); } - eprintln!( - "[mmap_syscall] resolved cageid={} sysaddr={:#x} useraddr={:#x} rounded_length={:#x} flags={:#x} fildes={}", - cageid, sysaddr, useraddr, rounded_length, flags, fildes - ); - if rounded_length > 0 { if flags & MAP_ANONYMOUS as i32 > 0 { fildes = -1; @@ -972,11 +962,6 @@ pub extern "C" fn mmap_syscall( off, ); - eprintln!( - "[mmap_syscall] mmap_inner returned cageid={} result={:#x} (errno_check={})", - cageid, result, is_mmap_error(result) - ); - // Check for error BEFORE sys_to_user conversion if is_mmap_error(result) { let errno = get_errno(); diff --git a/tests/grate-tests/simple-tests/mmap-flag.c b/tests/grate-tests/simple-tests/mmap-flag.c index caee41b398..45e1b38a7b 100644 --- a/tests/grate-tests/simple-tests/mmap-flag.c +++ b/tests/grate-tests/simple-tests/mmap-flag.c @@ -1,8 +1,8 @@ /* Cage side of the mmap-with-GRATE_MEMORY_FLAG test. * - * fd-backed mmap → write → read → munmap round-trip. The grate hands - * this off to RawPOSIX with `addr_cage | GRATE_MEMORY_FLAG`, exercising - * the runtime's flag-aware path in mmap_syscall. + * fd-backed mmap → write → read → munmap round-trip. The companion + * grate forwards this call to RawPOSIX with `arg1cage | GRATE_MEMORY_FLAG`, + * exercising the runtime's flag-aware path in mmap_syscall. * * Anonymous mmaps (including the runtime's own pre-main stack setup) * are forwarded by the grate without the flag and aren't exercised here. @@ -21,46 +21,25 @@ int main(void) { int fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd < 0) { - printf("[Cage|mmap-flag] open failed\n"); - fflush(stdout); return 1; } if (ftruncate(fd, size) != 0) { - printf("[Cage|mmap-flag] ftruncate failed\n"); - fflush(stdout); return 1; } - printf("[Cage|mmap-flag] calling fd-backed mmap (fd=%d)\n", fd); - fflush(stdout); - void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { - printf("[Cage|mmap-flag] mmap returned MAP_FAILED\n"); - fflush(stdout); return 1; } - printf("[Cage|mmap-flag] mmap returned %p\n", p); - fflush(stdout); memset(p, 0x42, size); - printf("[Cage|mmap-flag] memset done\n"); - fflush(stdout); - for (size_t i = 0; i < size; i++) { if (((unsigned char *)p)[i] != 0x42) { - printf("[Cage|mmap-flag] byte %zu mismatch (got 0x%x)\n", - i, ((unsigned char *)p)[i]); - fflush(stdout); return 1; } } - printf("[Cage|mmap-flag] readback ok\n"); - fflush(stdout); if (munmap(p, size) != 0) { - printf("[Cage|mmap-flag] munmap failed\n"); - fflush(stdout); return 1; } @@ -68,6 +47,5 @@ int main(void) { unlink(FILE_PATH); printf("[Cage|mmap-flag] PASS\n"); - fflush(stdout); return 0; } diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c index c6057c05f9..64273ed215 100644 --- a/tests/grate-tests/simple-tests/mmap-flag_grate.c +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -60,50 +60,26 @@ int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, uint64_t arg2cage, uint64_t arg3, uint64_t arg3cage, uint64_t arg4, uint64_t arg4cage, uint64_t arg5, uint64_t arg5cage, uint64_t arg6, uint64_t arg6cage) { + (void)cageid; int self_grate_id = getpid(); int fd = (int)(int64_t)arg5; int is_anonymous = (fd < 0) || ((arg4 & MAP_ANON_FLAG) != 0); - /* `cageid` passed to the handler is the grate's id (3i's + /* The handler's first `cageid` param is the grate's own id (3i's _call_grate_func passes grateid here). The calling cage's id is - reliably carried in an integer arg's cage tag — use arg5cage (fd) - per the convention imfs follows. */ + carried in an integer arg's cage tag — use arg5cage (fd), which + isn't subject to pointer-translation rewrites. */ uint64_t calling_cage = arg5cage; - fprintf(stderr, - "[Grate|mmap-flag] entry handler_cageid=%llu calling_cage=%llu " - "arg1=%#018llx arg1cage=%#llx arg2=%#018llx arg3=%#llx " - "arg4=%#llx arg5=%#018llx arg6=%#018llx anon=%d\n", - (unsigned long long)cageid, (unsigned long long)calling_cage, - (unsigned long long)arg1, (unsigned long long)arg1cage, - (unsigned long long)arg2, (unsigned long long)arg3, - (unsigned long long)arg4, (unsigned long long)arg5, - (unsigned long long)arg6, is_anonymous); - fflush(stderr); - uint64_t fwd_arg1cage = is_anonymous ? arg1cage : (self_grate_id | GRATE_MEMORY_FLAG); - fprintf(stderr, - "[Grate|mmap-flag] forwarding target=%llu arg1=%#018llx " - "fwd_arg1cage=%#llx arg2=%#018llx arg3=%#llx arg4=%#llx " - "arg5=%#018llx arg6=%#018llx\n", - (unsigned long long)calling_cage, (unsigned long long)arg1, - (unsigned long long)fwd_arg1cage, (unsigned long long)arg2, - (unsigned long long)arg3, (unsigned long long)arg4, - (unsigned long long)arg5, (unsigned long long)arg6); - fflush(stderr); - - int ret = make_threei_call( + return make_threei_call( 9 /* MMAP_SYSCALL */, 0, self_grate_id, calling_cage, arg1, fwd_arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, arg5cage, arg6, arg6cage, 0 /* translate_errno off — propagate raw return */ ); - fprintf(stderr, "[Grate|mmap-flag] make_threei_call returned %d (%#x)\n", - ret, (unsigned)ret); - fflush(stderr); - return ret; } int main(int argc, char *argv[]) { From 3829f3a5ea2d3482da19c64e6905941f6da06933 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 22:55:22 -0400 Subject: [PATCH 13/24] TEMP diagnostic: trace mmap_syscall path-by-path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds stderr-flushed eprintlns at every decision point in mmap_syscall so we can identify which call kills the runtime mid-printf. Drop before merge — same shape as the previous c4ba2a785/ef4846fb3 cycle. --- src/rawposix/src/fs_calls.rs | 81 +++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index ff8f0cb720..85a4a0353f 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -829,6 +829,20 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { + use std::io::Write; + eprintln!( + "[mmap] >> cage={} addr={:#018x} (acage={:#x}) len={:#x} prot={:#x} flags={:#x} fd={} off={:#x}", + cageid, + addr_arg, + addr_cageid, + len_arg, + prot_arg, + flags_arg, + vfd_arg as i32, + off_arg, + ); + let _ = std::io::stderr().flush(); + let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -869,6 +883,11 @@ pub extern "C" fn mmap_syscall( // address gets added, since base_address is itself page-aligned. let rounded_addr = round_up_page(addr_arg); if rounded_addr != addr_arg { + eprintln!( + "[mmap] !! EINVAL addr-not-aligned cage={} addr={:#018x} rounded={:#018x}", + cageid, addr_arg, rounded_addr, + ); + let _ = std::io::stderr().flush(); return syscall_error(Errno::EINVAL, "mmap", "address it not aligned"); } @@ -904,6 +923,11 @@ pub extern "C" fn mmap_syscall( } else { addr_arg as u32 }; + eprintln!( + "[mmap] find_map_space path cage={} hint_useraddr={:#x} rlen={:#x}", + cageid, hint_useraddr, rounded_length, + ); + let _ = std::io::stderr().flush(); let vmmap = cage.vmmap.write(); let result = if hint_useraddr == 0 { @@ -917,27 +941,61 @@ pub extern "C" fn mmap_syscall( }; if result.is_none() { + eprintln!( + "[mmap] !! ENOMEM no-space cage={} hint={:#x} rlen={:#x}", + cageid, hint_useraddr, rounded_length, + ); + let _ = std::io::stderr().flush(); return syscall_error(Errno::ENOMEM, "mmap", "no memory"); } useraddr = (result.unwrap().start() << PAGESHIFT) as u32; sysaddr = vmmap.user_to_sys(useraddr); drop(vmmap); + eprintln!( + "[mmap] find_map_space ok cage={} useraddr={:#x} sysaddr={:#x}", + cageid, useraddr, sysaddr, + ); + let _ = std::io::stderr().flush(); } else { + eprintln!( + "[mmap] MAP_FIXED path cage={} addr={:#018x} (acage={:#x})", + cageid, addr_arg, addr_cageid, + ); + let _ = std::io::stderr().flush(); // Caller specified an exact address. Use the flag-aware helper so // a grate-supplied addr is resolved against the grate's base, and // a cage-supplied addr against the calling cage's base. sysaddr = match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) { Ok(s) => s, - Err(e) => return syscall_error(e, "mmap", "invalid addr"), + Err(e) => { + eprintln!( + "[mmap] !! addr_to_sys failed cage={} addr={:#018x} err={:?}", + cageid, addr_arg, e, + ); + let _ = std::io::stderr().flush(); + return syscall_error(e, "mmap", "invalid addr"); + } }; // Derive the calling cage's uaddr for the return value + vmmap // bookkeeping. Errors here mean the sysaddr is outside the cage's // linear memory range — invalid for MAP_FIXED in this cage. useraddr = match sc_convert_sys_to_user(sysaddr, cageid) { Ok(u) => u, - Err(e) => return syscall_error(e, "mmap", "addr outside cage"), + Err(e) => { + eprintln!( + "[mmap] !! sys_to_user failed cage={} sysaddr={:#x} err={:?}", + cageid, sysaddr, e, + ); + let _ = std::io::stderr().flush(); + return syscall_error(e, "mmap", "addr outside cage"); + } }; + eprintln!( + "[mmap] MAP_FIXED ok cage={} useraddr={:#x} sysaddr={:#x}", + cageid, useraddr, sysaddr, + ); + let _ = std::io::stderr().flush(); } flags |= MAP_FIXED as i32; @@ -952,6 +1010,12 @@ pub extern "C" fn mmap_syscall( fildes = -1; } + eprintln!( + "[mmap] libc::mmap call cage={} sysaddr={:#x} rlen={:#x} prot={:#x} flags={:#x} fd={}", + cageid, sysaddr, rounded_length, prot, flags, fildes, + ); + let _ = std::io::stderr().flush(); + let result = mmap_inner( cageid, sysaddr as *mut u8, @@ -962,15 +1026,28 @@ pub extern "C" fn mmap_syscall( off, ); + eprintln!( + "[mmap] libc::mmap ret cage={} result={:#x} is_err={}", + cageid, result, is_mmap_error(result), + ); + let _ = std::io::stderr().flush(); + // Check for error BEFORE sys_to_user conversion if is_mmap_error(result) { let errno = get_errno(); + eprintln!("[mmap] !! libc::mmap errno cage={} errno={}", cageid, errno); + let _ = std::io::stderr().flush(); return handle_errno(errno, "mmap"); } let vmmap = cage.vmmap.read(); let result = vmmap.sys_to_user(result); drop(vmmap); + eprintln!( + "[mmap] << ok cage={} ret_useraddr={:#x}", + cageid, result as u32, + ); + let _ = std::io::stderr().flush(); // if mmap addr is positive, that would mean the mapping is successful and we need to update the vmmap entry if result >= 0 { From a11fad873b69f9f805525683c10aecb87e52704b Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 22:56:05 -0400 Subject: [PATCH 14/24] Revert "TEMP diagnostic: trace mmap_syscall path-by-path" This reverts commit 3829f3a5ea2d3482da19c64e6905941f6da06933. --- src/rawposix/src/fs_calls.rs | 81 +----------------------------------- 1 file changed, 2 insertions(+), 79 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 85a4a0353f..ff8f0cb720 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -829,20 +829,6 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { - use std::io::Write; - eprintln!( - "[mmap] >> cage={} addr={:#018x} (acage={:#x}) len={:#x} prot={:#x} flags={:#x} fd={} off={:#x}", - cageid, - addr_arg, - addr_cageid, - len_arg, - prot_arg, - flags_arg, - vfd_arg as i32, - off_arg, - ); - let _ = std::io::stderr().flush(); - let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -883,11 +869,6 @@ pub extern "C" fn mmap_syscall( // address gets added, since base_address is itself page-aligned. let rounded_addr = round_up_page(addr_arg); if rounded_addr != addr_arg { - eprintln!( - "[mmap] !! EINVAL addr-not-aligned cage={} addr={:#018x} rounded={:#018x}", - cageid, addr_arg, rounded_addr, - ); - let _ = std::io::stderr().flush(); return syscall_error(Errno::EINVAL, "mmap", "address it not aligned"); } @@ -923,11 +904,6 @@ pub extern "C" fn mmap_syscall( } else { addr_arg as u32 }; - eprintln!( - "[mmap] find_map_space path cage={} hint_useraddr={:#x} rlen={:#x}", - cageid, hint_useraddr, rounded_length, - ); - let _ = std::io::stderr().flush(); let vmmap = cage.vmmap.write(); let result = if hint_useraddr == 0 { @@ -941,61 +917,27 @@ pub extern "C" fn mmap_syscall( }; if result.is_none() { - eprintln!( - "[mmap] !! ENOMEM no-space cage={} hint={:#x} rlen={:#x}", - cageid, hint_useraddr, rounded_length, - ); - let _ = std::io::stderr().flush(); return syscall_error(Errno::ENOMEM, "mmap", "no memory"); } useraddr = (result.unwrap().start() << PAGESHIFT) as u32; sysaddr = vmmap.user_to_sys(useraddr); drop(vmmap); - eprintln!( - "[mmap] find_map_space ok cage={} useraddr={:#x} sysaddr={:#x}", - cageid, useraddr, sysaddr, - ); - let _ = std::io::stderr().flush(); } else { - eprintln!( - "[mmap] MAP_FIXED path cage={} addr={:#018x} (acage={:#x})", - cageid, addr_arg, addr_cageid, - ); - let _ = std::io::stderr().flush(); // Caller specified an exact address. Use the flag-aware helper so // a grate-supplied addr is resolved against the grate's base, and // a cage-supplied addr against the calling cage's base. sysaddr = match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) { Ok(s) => s, - Err(e) => { - eprintln!( - "[mmap] !! addr_to_sys failed cage={} addr={:#018x} err={:?}", - cageid, addr_arg, e, - ); - let _ = std::io::stderr().flush(); - return syscall_error(e, "mmap", "invalid addr"); - } + Err(e) => return syscall_error(e, "mmap", "invalid addr"), }; // Derive the calling cage's uaddr for the return value + vmmap // bookkeeping. Errors here mean the sysaddr is outside the cage's // linear memory range — invalid for MAP_FIXED in this cage. useraddr = match sc_convert_sys_to_user(sysaddr, cageid) { Ok(u) => u, - Err(e) => { - eprintln!( - "[mmap] !! sys_to_user failed cage={} sysaddr={:#x} err={:?}", - cageid, sysaddr, e, - ); - let _ = std::io::stderr().flush(); - return syscall_error(e, "mmap", "addr outside cage"); - } + Err(e) => return syscall_error(e, "mmap", "addr outside cage"), }; - eprintln!( - "[mmap] MAP_FIXED ok cage={} useraddr={:#x} sysaddr={:#x}", - cageid, useraddr, sysaddr, - ); - let _ = std::io::stderr().flush(); } flags |= MAP_FIXED as i32; @@ -1010,12 +952,6 @@ pub extern "C" fn mmap_syscall( fildes = -1; } - eprintln!( - "[mmap] libc::mmap call cage={} sysaddr={:#x} rlen={:#x} prot={:#x} flags={:#x} fd={}", - cageid, sysaddr, rounded_length, prot, flags, fildes, - ); - let _ = std::io::stderr().flush(); - let result = mmap_inner( cageid, sysaddr as *mut u8, @@ -1026,28 +962,15 @@ pub extern "C" fn mmap_syscall( off, ); - eprintln!( - "[mmap] libc::mmap ret cage={} result={:#x} is_err={}", - cageid, result, is_mmap_error(result), - ); - let _ = std::io::stderr().flush(); - // Check for error BEFORE sys_to_user conversion if is_mmap_error(result) { let errno = get_errno(); - eprintln!("[mmap] !! libc::mmap errno cage={} errno={}", cageid, errno); - let _ = std::io::stderr().flush(); return handle_errno(errno, "mmap"); } let vmmap = cage.vmmap.read(); let result = vmmap.sys_to_user(result); drop(vmmap); - eprintln!( - "[mmap] << ok cage={} ret_useraddr={:#x}", - cageid, result as u32, - ); - let _ = std::io::stderr().flush(); // if mmap addr is positive, that would mean the mapping is successful and we need to update the vmmap entry if result >= 0 { From d1b9877cdb95e356aa75b8795b7350e9d57589c9 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 23:45:58 -0400 Subject: [PATCH 15/24] Address review: repurpose sc_convert_uaddr_to_host instead of adding new helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Sanchit's review on #1185 + issue #1186: the existing unused sc_convert_uaddr_to_host already takes (uaddr, addr_cageid) and uses addr_cageid to look up the right cage's base — which is exactly the shape we need for both cage-userland calls and grate-forwarded calls (where glibc strips FLAG and leaves addr_cageid as the grate's id). Changes: - sc_convert_uaddr_to_host: drop the NULL early-return (so uaddr=0 anchors to the cage's base — needed for early_init_stack-style MAP_FIXED at 0); change the 'uaddr >= base_addr' panic into a passthrough (already-translated host pointer). - Delete sc_convert_addr_to_sys; mmap_syscall and brk_syscall now call sc_convert_uaddr_to_host directly. - Rename sc_convert_sys_to_user to sc_convert_host_to_uaddr for naming symmetry with the forward direction. Keep it as the inverse helper for return-value bookkeeping (mmap result -> cage uaddr, brk arg passed as host sysaddr -> cage uaddr). No behavior change vs the prior PR commits — same value-range disambiguation, same passthrough semantics, just routed through the existing helper. --- src/rawposix/src/fs_calls.rs | 54 +++++-------- src/typemap/src/datatype_conversion.rs | 104 ++++++------------------- 2 files changed, 43 insertions(+), 115 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index ff8f0cb720..b6202bc38a 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -884,26 +884,21 @@ pub extern "C" fn mmap_syscall( // round up length to be multiple of pages let rounded_length = round_up_page(len as u64); - // Resolve (useraddr in calling cage, sysaddr host pointer). Addresses - // arrive as either a cage uaddr (≤ u32::MAX, from cage-side mmap.c) or - // a host sysaddr (above u32::MAX, from a grate-forwarded call whose - // GRATE_MEMORY_FLAG was already consumed by glibc's make_threei_call / - // TRANSLATE_ARG_TO_HOST). sc_convert_addr_to_sys handles the split. + // Resolve (useraddr in calling cage, sysaddr host pointer). The addr + // arrives in one of two forms, distinguished inside + // `sc_convert_uaddr_to_host` by comparison against `addr_cageid`'s base: + // - cage uaddr (small, < base): translated via the cage's base. + // - host pointer (>= base, e.g. glibc's TRANSLATE_*_TO_HOST already ran + // inside a grate-forwarded call with GRATE_MEMORY_FLAG): passthrough. let mut useraddr: u32; let sysaddr: usize; if flags & MAP_FIXED as i32 == 0 { // No fixed address — runtime picks via the calling cage's vmmap. - // Use addr_arg as a hint; if a grate forwarded a host sysaddr - // (above u32 range), convert it back to a cage uaddr first. - let hint_useraddr = if addr_arg > u32::MAX as u64 { - match sc_convert_sys_to_user(addr_arg as usize, cageid) { - Ok(u) => u, - Err(_) => 0, - } - } else { - addr_arg as u32 - }; + // Treat addr_arg as a hint only if it looks like a cage uaddr (the + // calling cage's range); otherwise ignore (a grate-supplied host + // pointer hint isn't meaningful in the calling cage's address space). + let hint_useraddr = sc_convert_host_to_uaddr(addr_arg as usize, cageid).unwrap_or(0); let vmmap = cage.vmmap.write(); let result = if hint_useraddr == 0 { @@ -924,17 +919,12 @@ pub extern "C" fn mmap_syscall( sysaddr = vmmap.user_to_sys(useraddr); drop(vmmap); } else { - // Caller specified an exact address. Use the flag-aware helper so - // a grate-supplied addr is resolved against the grate's base, and - // a cage-supplied addr against the calling cage's base. - sysaddr = match sc_convert_addr_to_sys(addr_arg, addr_cageid, cageid) { - Ok(s) => s, - Err(e) => return syscall_error(e, "mmap", "invalid addr"), - }; + // Caller specified an exact address. + sysaddr = sc_convert_uaddr_to_host(addr_arg, addr_cageid, cageid) as usize; // Derive the calling cage's uaddr for the return value + vmmap // bookkeeping. Errors here mean the sysaddr is outside the cage's // linear memory range — invalid for MAP_FIXED in this cage. - useraddr = match sc_convert_sys_to_user(sysaddr, cageid) { + useraddr = match sc_convert_host_to_uaddr(sysaddr, cageid) { Ok(u) => u, Err(e) => return syscall_error(e, "mmap", "addr outside cage"), }; @@ -1208,16 +1198,14 @@ pub extern "C" fn brk_syscall( ) -> i32 { // Cage-side glibc brk.c passes a raw uaddr (low 32 bits); the runtime // page-aligns it and compares against vmmap.heap_start in user space. - // A grate forwarding the call via make_threei_call goes through - // glibc's TRANSLATE_ARG_TO_HOST which produces a host sysaddr (above - // u32 range) — convert that back to a cage uaddr before proceeding. - let brk = if brk_arg > u32::MAX as u64 { - match sc_convert_sys_to_user(brk_arg as usize, cageid) { - Ok(u) => u as i32, - Err(e) => return syscall_error(e, "brk", "addr outside cage"), - } - } else { - sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid) + // A grate forwarding the call via make_threei_call goes through glibc's + // TRANSLATE_ARG_TO_HOST which produces a host sysaddr — convert back to a + // cage uaddr before proceeding. `sc_convert_host_to_uaddr` returns Err + // when the arg is below the cage's base (i.e. already a small uaddr); in + // that case fall back to the standard u64→i32 cast. + let brk = match sc_convert_host_to_uaddr(brk_arg as usize, cageid) { + Ok(u) => u as i32, + Err(_) => sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid), }; // would sometimes check, sometimes be a no-op depending on the compiler settings if !(sc_unusedarg(arg2, arg2_cageid) diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index dead5043b6..7c634296e4 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -260,69 +260,9 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { arg as *mut u8 } -/// Resolve a (uaddr, cageid) pair to a host system address, honoring -/// `GRATE_MEMORY_FLAG`. -/// -/// For path-style buffer args the runtime can just dereference the address as -/// a host pointer (see `get_cstr`) — bytes are bytes. For address args that -/// the runtime *interprets* rather than dereferences (mmap, munmap, mprotect, -/// brk, shmat, shmdt), we need the actual host system address. -/// -/// Distinguishes which form of address `arg` carries: -/// -/// - **u32 range (`arg <= u32::MAX`)**: a uaddr in the calling cage's linear -/// memory. This is what cage-side glibc wrappers (e.g. `mmap.c`'s -/// `(uintptr_t) addr`) pass — wasm32 uaddrs fit in u32. We translate via -/// the calling cage's vmmap base. -/// - **Above u32 range**: a host system address already, produced by glibc's -/// `TRANSLATE_ARG_TO_HOST` macro inside `make_threei_call` (e.g. when a -/// grate forwards with `GRATE_MEMORY_FLAG` set). By the time the runtime -/// sees the call, the FLAG bit has been stripped from `arg_cageid` and the -/// arg is the resolved host pointer. Use as-is. -/// -/// The ranges don't overlap: cage linear memory occupies `[base, base + 4GB]` -/// on the host, and host bases are typically far above 4GB. -/// -/// arg=0 is special-cased to "start of the calling cage's memory" — a NULL -/// host pointer would be meaningless. -/// -/// ## Returns -/// - `Ok(sysaddr)` host system address. -/// - `Err(Errno::EINVAL)` if the calling cage can't be looked up or its vmmap -/// has no base address yet (only on the uaddr branch). -pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result { - #[cfg(feature = "secure")] - { - if !validate_cageid(arg_cageid, cageid) { - return Err(Errno::EINVAL); - } - } - let _ = arg_cageid; // FLAG is consumed glibc-side; runtime distinguishes by value range. - - // arg=0 has cage-relative "start of cage memory" meaning — a NULL host - // pointer would be meaningless. Anchor to the calling cage's base so - // early_init_stack-style mmaps (addr=0, MAP_FIXED) work. - if arg == 0 { - let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; - let vmmap = cage.vmmap.read(); - let base = vmmap.base_address.ok_or(Errno::EINVAL)?; - return Ok(base); - } - - // Distinguish uaddr (≤ u32::MAX) from host sysaddr (above). See doc above. - if arg <= u32::MAX as u64 { - let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; - let vmmap = cage.vmmap.read(); - let base = vmmap.base_address.ok_or(Errno::EINVAL)?; - return Ok(base + arg as usize); - } - - Ok(arg as usize) -} - -/// Inverse of `sc_convert_addr_to_sys` — translate a host system address back -/// to a uaddr in the named cage's linear memory. Used for return values of -/// mmap-family syscalls and for bookkeeping into the cage's vmmap. +/// Inverse of `sc_convert_uaddr_to_host` — translate a host system address +/// back to a uaddr in the named cage's linear memory. Used for return values +/// of mmap-family syscalls and for bookkeeping into the cage's vmmap. /// /// ## Arguments /// - `sysaddr`: the host system address. @@ -333,7 +273,7 @@ pub fn sc_convert_addr_to_sys(arg: u64, arg_cageid: u64, cageid: u64) -> Result< /// wasm32 lind). /// - `Err(Errno::EINVAL)` if the cage can't be looked up, its vmmap has no /// base, or `sysaddr` is below the cage's base. -pub fn sc_convert_sys_to_user(sysaddr: usize, cageid: u64) -> Result { +pub fn sc_convert_host_to_uaddr(sysaddr: usize, cageid: u64) -> Result { let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; let vmmap = cage.vmmap.read(); let base = vmmap.base_address.ok_or(Errno::EINVAL)?; @@ -356,15 +296,22 @@ pub fn sc_convert_buf(buf_arg: u64, _arg_cageid: u64, _cageid: u64) -> *const u8 buf_arg as *const u8 } -// TODO: This function can be removed/revamped significantly -// Leaving it in for now since it is used threei/ -/// ## Arguments: -/// - `uaddr`: The user address to convert (u64). -/// - `addr_cageid`: The cage ID associated with the address. -/// - `cageid`: The calling cage ID (used for validation in secure mode). +/// Resolve a `(uaddr, addr_cageid)` syscall arg to a host system address. /// -/// ## Returns: -/// - The host address as u64, or 0 if the address is null. +/// Robust to either input form, distinguished by comparing against the named +/// cage's base: +/// +/// - `uaddr < base_addr`: a cage-relative address in `addr_cageid`'s linear +/// memory. Translate by adding the base. `uaddr == 0` falls in this branch +/// and resolves to the cage's base — that's what `early_init_stack`-style +/// `MAP_FIXED at 0` mmaps need. +/// - `uaddr >= base_addr`: a pre-translated host pointer (e.g. glibc's +/// `TRANSLATE_*_TO_HOST` already ran inside a grate-forwarded call with +/// `GRATE_MEMORY_FLAG` set). Pass through unchanged. +/// +/// `addr_cageid` picks the right cage's base: for a cage-userland call it's +/// the calling cage; for a grate-forwarded call (FLAG stripped by glibc) it's +/// the grate's id, which is what produced the host pointer in the first place. pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u64 { #[cfg(feature = "secure")] { @@ -373,21 +320,14 @@ pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u } } - // Do not convert on NULL. - if uaddr == 0 { - return uaddr; - } - let cage = get_cage(addr_cageid).unwrap(); let vmmap = cage.vmmap.read(); let base_addr = vmmap.base_address.unwrap() as u64; - // Only convert to host if not already converted. if uaddr >= base_addr { - panic!( - "sc_convert_uaddr_to_host: invalid uaddr {:#x} - expected a cage-relative address", - uaddr - ); + // Already a host pointer — caller (e.g. glibc's TRANSLATE_*_TO_HOST) + // resolved it. Pass through. + return uaddr; } uaddr + base_addr From 236833153971d709c0779ad2ad23bb65680d83c3 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 23:49:48 -0400 Subject: [PATCH 16/24] vmmap: handle cage uaddr in calculate_page_range (issue #1186 part 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_addr_read / check_addr_rw assume addr is a pre-translated host pointer and do 'addr - base_addr' inside calculate_page_range. When a grate calls copy_data_between_cages(..., cage_uaddr, cage_id, ...) with a uaddr returned from a previous mmap (POSIX doesn't translate return values, and glibc's TRANSLATE_UADDR_TO_HOST only fires when arg_cageid == __lind_cageid — i.e. when the addr is in the caller's own cage), the uaddr arrives untranslated, addr - base_addr underflows in u64, the page_num is garbage, and validation reports 'range invalid'. End-effect for imfs's mmap interposition: copy_data_between_cages fails the validation when seeding a freshly-mmap'd cage region with file contents, so the cage reads zeros. Fix mirrors the mmap_syscall / sc_convert_uaddr_to_host change: if 'addr >= base_addr' treat as host pointer (subtract base); otherwise treat as cage uaddr (use directly). Same passthrough disambiguation. --- src/cage/src/memory/vmmap.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cage/src/memory/vmmap.rs b/src/cage/src/memory/vmmap.rs index 5f74c7603c..e39888c7c9 100644 --- a/src/cage/src/memory/vmmap.rs +++ b/src/cage/src/memory/vmmap.rs @@ -380,6 +380,15 @@ impl Vmmap { /// This helper function converts a byte address range into page numbers, /// handling overflow safely with checked arithmetic. /// + /// `addr` may arrive as either: + /// - a pre-translated host pointer (`addr >= base_addr`, e.g. a buffer + /// that already went through glibc's `TRANSLATE_GUEST_POINTER_TO_HOST`), + /// in which case the cage uaddr is `addr - base_addr`, or + /// - a raw cage uaddr (`addr < base_addr`, e.g. a value returned from a + /// previous mmap syscall that the caller is passing back in — POSIX + /// doesn't translate return values), in which case `addr` is already + /// the cage uaddr. + /// /// # Arguments /// * `addr` - Virtual memory address (in bytes) /// * `length` - Length of the memory region in bytes @@ -389,7 +398,11 @@ impl Vmmap { /// * `None` - If the calculation would overflow fn calculate_page_range(&self, addr: u64, length: usize) -> Option<(u32, u32)> { let base_addr = self.base_address.unwrap() as u64; - let uaddr = addr - base_addr; + let uaddr = if addr >= base_addr { + addr - base_addr + } else { + addr + }; let page_num = (uaddr >> PAGESHIFT) as u32; let end_addr = uaddr From 01927899c11bfb633e15617663f5e61444f2f4ff Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Sun, 10 May 2026 23:55:51 -0400 Subject: [PATCH 17/24] copy_data_between_cages: translate src/dest before validate + memcpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #1186 part 2. copy_data_between_cages did 'let host_src_addr = srcaddr' without actually translating — it assumed glibc's TRANSLATE_UADDR_TO_HOST had already converted the addr to a host pointer. That only happens for args owned by the calling cage; cross- cage args (e.g. a uaddr returned from a previous mmap that the caller is passing back in for someone else's cage) arrive untranslated. End-effect: when imfs's grate calls copy_data_between_cages(..., cage_uaddr, cage_id, ...) to seed a freshly-mmap'd cage region with file contents, the cage_uaddr reached the runtime as a small integer. On main, _validate_range_* underflowed and returned 'range invalid'; the copy never ran. Loosening calculate_page_range to accept the small value let validation pass, but the subsequent std::ptr::copy_nonoverlapping at the same small value segfaulted. Fix: translate srcaddr and destaddr at the top of copy_data_between_cages via sc_convert_uaddr_to_host, which handles both forms — host-pointer passthrough, cage uaddr gets the named cage's base added. After that, _validate_range_* and the actual copy_nonoverlapping use translated host pointers and work normally. Reverts the calculate_page_range loosening from the previous commit; that was the wrong layer to fix this at. --- src/cage/src/memory/vmmap.rs | 15 +-------------- src/threei/src/threei.rs | 9 +++++++++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/cage/src/memory/vmmap.rs b/src/cage/src/memory/vmmap.rs index e39888c7c9..5f74c7603c 100644 --- a/src/cage/src/memory/vmmap.rs +++ b/src/cage/src/memory/vmmap.rs @@ -380,15 +380,6 @@ impl Vmmap { /// This helper function converts a byte address range into page numbers, /// handling overflow safely with checked arithmetic. /// - /// `addr` may arrive as either: - /// - a pre-translated host pointer (`addr >= base_addr`, e.g. a buffer - /// that already went through glibc's `TRANSLATE_GUEST_POINTER_TO_HOST`), - /// in which case the cage uaddr is `addr - base_addr`, or - /// - a raw cage uaddr (`addr < base_addr`, e.g. a value returned from a - /// previous mmap syscall that the caller is passing back in — POSIX - /// doesn't translate return values), in which case `addr` is already - /// the cage uaddr. - /// /// # Arguments /// * `addr` - Virtual memory address (in bytes) /// * `length` - Length of the memory region in bytes @@ -398,11 +389,7 @@ impl Vmmap { /// * `None` - If the calculation would overflow fn calculate_page_range(&self, addr: u64, length: usize) -> Option<(u32, u32)> { let base_addr = self.base_address.unwrap() as u64; - let uaddr = if addr >= base_addr { - addr - base_addr - } else { - addr - }; + let uaddr = addr - base_addr; let page_num = (uaddr >> PAGESHIFT) as u32; let end_addr = uaddr diff --git a/src/threei/src/threei.rs b/src/threei/src/threei.rs index 838146fbd3..8cf744fe26 100644 --- a/src/threei/src/threei.rs +++ b/src/threei/src/threei.rs @@ -890,6 +890,15 @@ pub fn copy_data_between_cages( panic!("Dynamic allocation not yet supported in copy_data_between_cages"); } + // Resolve src/dest to host pointers. glibc's TRANSLATE_UADDR_TO_HOST only + // fires when the arg is in the calling cage; cross-cage args (e.g. a uaddr + // returned from a previous mmap that the caller is passing back in for + // someone else's cage) arrive untranslated. `sc_convert_uaddr_to_host` + // handles both forms — already-host passthrough, cage uaddr gets the + // named cage's base added. + let srcaddr = sc_convert_uaddr_to_host(srcaddr, srccage, _thiscage); + let destaddr = sc_convert_uaddr_to_host(destaddr, destcage, _thiscage); + // Decide the actual number of bytes to copy depending on CopyType. // // `RawMemcpy`: From d2ec5488a9089c6afeec3d0efd89f1e23d3fd2cb Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 01:10:38 -0400 Subject: [PATCH 18/24] Trace popen handler table inheritance --- src/rawposix/src/sys_calls.rs | 12 +++++++++++- src/threei/src/handler_table/dashmap_impl.rs | 12 ++++++++++++ src/threei/src/threei.rs | 14 +++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/rawposix/src/sys_calls.rs b/src/rawposix/src/sys_calls.rs index e9ca2331d5..df72135467 100644 --- a/src/rawposix/src/sys_calls.rs +++ b/src/rawposix/src/sys_calls.rs @@ -108,6 +108,12 @@ pub extern "C" fn fork_syscall( // otherwise, we are creating a process (i.e. fork) let flags = args.flags; let isthread = flags & (sys_const::CLONE_VM); + eprintln!( + "[popen-trace|rawposix fork_syscall] parent={} flags=0x{:x} isthread_by_clone_vm={}", + cageid, + flags, + isthread != 0 + ); // Effective parent cage ID. let parent_cageid = cageid; @@ -161,7 +167,7 @@ pub extern "C" fn fork_syscall( // This ensures that the child process inherits all syscall // interposition and routing behavior, including RawPOSIX's // syscall implementation - threei::copy_handler_table_to_cage( + let copy_ret = threei::copy_handler_table_to_cage( UNUSED_ARG, child_cageid, parent_cageid, @@ -177,6 +183,10 @@ pub extern "C" fn fork_syscall( UNUSED_ARG, UNUSED_ID, ); + eprintln!( + "[popen-trace|rawposix fork_syscall] copied handlers parent={} child={} ret={}", + parent_cageid, child_cageid, copy_ret + ); } // Delegate execution back to binary runtime (currently only support Wasmtime, diff --git a/src/threei/src/handler_table/dashmap_impl.rs b/src/threei/src/handler_table/dashmap_impl.rs index 486523965f..49331f9b5c 100644 --- a/src/threei/src/handler_table/dashmap_impl.rs +++ b/src/threei/src/handler_table/dashmap_impl.rs @@ -247,9 +247,11 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { ); return threei_const::ELINDAPIABORTED; }; + let src_has_mmap = src_snapshot.contains_key(&9); let dst_call_map_ref = HANDLERTABLE.entry(targetcage).or_insert_with(DashMap::new); let dst_call_map: &CallnumMap = &*dst_call_map_ref; + let dst_had_mmap = dst_call_map.contains_key(&9); // Copy without overwriting existing destination handlers. for src_call_entry in src_snapshot.iter() { @@ -267,6 +269,16 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { dst_target_map.entry(handlefunccage).or_insert(addr); } } + let dst_has_mmap = dst_call_map.contains_key(&9); + eprintln!( + "[popen-trace|3i copy_impl] source={} target={} src_calls={} src_has_mmap={} dst_had_mmap={} dst_has_mmap={}", + srccage, + targetcage, + src_snapshot.len(), + src_has_mmap, + dst_had_mmap, + dst_has_mmap + ); 0 } diff --git a/src/threei/src/threei.rs b/src/threei/src/threei.rs index 8cf744fe26..573d52836d 100644 --- a/src/threei/src/threei.rs +++ b/src/threei/src/threei.rs @@ -355,11 +355,23 @@ pub fn copy_handler_table_to_cage( // Verifies that neither srccage nor targetcage are in the EXITING state to avoid // copying from or to a cage that may be invalid. if EXITING_TABLE.contains(&targetcage) || EXITING_TABLE.contains(&srccage) { + eprintln!( + "[popen-trace|3i copy_handler_table] rejected source={} target={} source_exiting={} target_exiting={}", + srccage, + targetcage, + EXITING_TABLE.contains(&srccage), + EXITING_TABLE.contains(&targetcage) + ); return threei_const::ELINDESRCH as u64; } // Actual implementation is in handler_table module according to feature flag - copy_handler_table_to_cage_impl(srccage, targetcage) + let ret = copy_handler_table_to_cage_impl(srccage, targetcage); + eprintln!( + "[popen-trace|3i copy_handler_table] source={} target={} ret={}", + srccage, targetcage, ret + ); + ret } /// actually performs a call. Not interposable From 310c803382ff1bdb7410fe748c67539aac136395 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 01:18:52 -0400 Subject: [PATCH 19/24] Trace dashmap handler table dup2 state --- src/threei/src/handler_table/dashmap_impl.rs | 53 +++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/src/threei/src/handler_table/dashmap_impl.rs b/src/threei/src/handler_table/dashmap_impl.rs index 49331f9b5c..45df84a5c7 100644 --- a/src/threei/src/handler_table/dashmap_impl.rs +++ b/src/threei/src/handler_table/dashmap_impl.rs @@ -104,6 +104,10 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O return Some((gid, addr)); } + eprintln!( + "[popen-trace|3i dashmap get_handler] empty target map self={} syscall={} target={}", + self_cageid, syscall_num, target_cageid + ); panic!( "No handlers for self_cageid={} syscall_num={} (target_cageid={})", self_cageid, syscall_num, target_cageid @@ -124,10 +128,20 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O /// todo: a more efficient way to do clean up pub fn _rm_grate_from_handler(grateid: u64) { for self_entry in HANDLERTABLE.iter() { + let self_cageid = *self_entry.key(); let call_map: &CallnumMap = self_entry.value(); for call_entry in call_map.iter() { + let callnum = *call_entry.key(); let target_map: &TargetCageMap = call_entry.value(); + let before = target_map.len(); target_map.retain(|dest_grateid, _| *dest_grateid != grateid); + let after = target_map.len(); + if before != after || callnum == 33 { + eprintln!( + "[popen-trace|3i dashmap rm_grate] grate={} self={} syscall={} before={} after={}", + grateid, self_cageid, callnum, before, after + ); + } } } } @@ -210,7 +224,15 @@ pub fn register_handler_impl( if handlefunccage == threei_const::THREEI_DEREGISTER { if let Some(self_entry) = HANDLERTABLE.get(&srccage) { let call_map: &CallnumMap = self_entry.value(); + let before = call_map + .get(&targetcallnum) + .map(|target_map| target_map.value().len()) + .unwrap_or(0); call_map.remove(&targetcallnum); + eprintln!( + "[popen-trace|3i dashmap register] deregister self={} syscall={} before={}", + srccage, targetcallnum, before + ); } return 0; } @@ -224,9 +246,21 @@ pub fn register_handler_impl( // Each (srccage, targetcallnum) pair keeps only one handler entry, // so we clear any existing mapping and replace it directly. + let before = target_map.len(); target_map.clear(); target_map.insert(handlefunccage, in_grate_fn_ptr_u64); + if targetcallnum == 33 { + eprintln!( + "[popen-trace|3i dashmap register] self={} syscall={} handler_cage={} before={} after={} fn=0x{:x}", + srccage, + targetcallnum, + handlefunccage, + before, + target_map.len(), + in_grate_fn_ptr_u64 + ); + } 0 } @@ -248,10 +282,18 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { return threei_const::ELINDAPIABORTED; }; let src_has_mmap = src_snapshot.contains_key(&9); + let src_dup2_size = src_snapshot + .get(&33) + .map(|target_map| target_map.value().len()) + .unwrap_or(0); let dst_call_map_ref = HANDLERTABLE.entry(targetcage).or_insert_with(DashMap::new); let dst_call_map: &CallnumMap = &*dst_call_map_ref; let dst_had_mmap = dst_call_map.contains_key(&9); + let dst_dup2_size_before = dst_call_map + .get(&33) + .map(|target_map| target_map.value().len()) + .unwrap_or(0); // Copy without overwriting existing destination handlers. for src_call_entry in src_snapshot.iter() { @@ -270,14 +312,21 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { } } let dst_has_mmap = dst_call_map.contains_key(&9); + let dst_dup2_size_after = dst_call_map + .get(&33) + .map(|target_map| target_map.value().len()) + .unwrap_or(0); eprintln!( - "[popen-trace|3i copy_impl] source={} target={} src_calls={} src_has_mmap={} dst_had_mmap={} dst_has_mmap={}", + "[popen-trace|3i copy_impl] source={} target={} src_calls={} src_has_mmap={} dst_had_mmap={} dst_has_mmap={} src_dup2_size={} dst_dup2_before={} dst_dup2_after={}", srccage, targetcage, src_snapshot.len(), src_has_mmap, dst_had_mmap, - dst_has_mmap + dst_has_mmap, + src_dup2_size, + dst_dup2_size_before, + dst_dup2_size_after ); 0 From de012db6d3a13fe18a0dfd4499051204ebe46fde Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 01:21:21 -0400 Subject: [PATCH 20/24] Trace threei handler table backend for popen --- src/threei/src/handler_table/hashmap_impl.rs | 60 +++++++++++++++++++- src/threei/src/threei.rs | 8 ++- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/threei/src/handler_table/hashmap_impl.rs b/src/threei/src/handler_table/hashmap_impl.rs index 553e80a67e..c9701199fe 100644 --- a/src/threei/src/handler_table/hashmap_impl.rs +++ b/src/threei/src/handler_table/hashmap_impl.rs @@ -91,6 +91,15 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O let grateid = target_map.keys().next().copied()?; let addr = target_map.values().next().copied()?; + if syscall_num == 33 { + eprintln!( + "[popen-trace|3i hashmap get_handler] self={} syscall={} target={} handlers={}", + self_cageid, + syscall_num, + target_cageid, + target_map.len() + ); + } // Otherwise fallback to any registered handler return Some((grateid, addr)); } @@ -109,9 +118,17 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O /// todo: a more efficient way to do clean up pub fn _rm_grate_from_handler(grateid: u64) { let mut table = HANDLERTABLE.lock().unwrap(); - for (_, callmap) in table.iter_mut() { - for (_, target_map) in callmap.iter_mut() { + for (self_cageid, callmap) in table.iter_mut() { + for (callnum, target_map) in callmap.iter_mut() { + let before = target_map.len(); target_map.retain(|dest_grateid, _| *dest_grateid != grateid); + let after = target_map.len(); + if before != after || *callnum == 33 { + eprintln!( + "[popen-trace|3i hashmap rm_grate] grate={} self={} syscall={} before={} after={}", + grateid, self_cageid, callnum, before, after + ); + } } } } @@ -198,7 +215,15 @@ pub fn register_handler_impl( // for the given (targetcage, targetcallnum). if handlefunccage == threei_const::THREEI_DEREGISTER { if let Some(call_map) = table.get_mut(&srccage) { + let before = call_map + .get(&targetcallnum) + .map(|target_map| target_map.len()) + .unwrap_or(0); call_map.remove(&targetcallnum); + eprintln!( + "[popen-trace|3i hashmap register] deregister self={} syscall={} before={}", + srccage, targetcallnum, before + ); if call_map.is_empty() { table.remove(&srccage); @@ -213,9 +238,21 @@ pub fn register_handler_impl( // Each (srccage, targetcallnum) pair keeps only one handler entry, // so we clear any existing mapping and replace it directly. + let before = target_map.len(); target_map.clear(); target_map.insert(handlefunccage, in_grate_fn_ptr_u64); + if targetcallnum == 33 { + eprintln!( + "[popen-trace|3i hashmap register] self={} syscall={} handler_cage={} before={} after={} fn=0x{:x}", + srccage, + targetcallnum, + handlefunccage, + before, + target_map.len(), + in_grate_fn_ptr_u64 + ); + } 0 } @@ -229,6 +266,12 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { // Does not overwrite any existing handlers in the target. if let Some(src_entry) = handler_table.get(&srccage).cloned() { let target_entry = handler_table.entry(targetcage).or_insert_with(HashMap::new); + let src_dup2_size = src_entry.get(&33).map(|target_map| target_map.len()).unwrap_or(0); + let dst_dup2_size_before = target_entry + .get(&33) + .map(|target_map| target_map.len()) + .unwrap_or(0); + let src_calls = src_entry.len(); for (callnum, callnum_map) in src_entry { let target_callnum_map = target_entry.entry(callnum).or_insert_with(HashMap::new); for (handlefunc, handlefunccage) in callnum_map { @@ -238,6 +281,19 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { .or_insert(handlefunccage); } } + let dst_dup2_size_after = target_entry + .get(&33) + .map(|target_map| target_map.len()) + .unwrap_or(0); + eprintln!( + "[popen-trace|3i hashmap copy_impl] source={} target={} src_calls={} src_dup2_size={} dst_dup2_before={} dst_dup2_after={}", + srccage, + targetcage, + src_calls, + src_dup2_size, + dst_dup2_size_before, + dst_dup2_size_after + ); 0 } else { eprintln!( diff --git a/src/threei/src/threei.rs b/src/threei/src/threei.rs index 573d52836d..5f44982063 100644 --- a/src/threei/src/threei.rs +++ b/src/threei/src/threei.rs @@ -368,8 +368,12 @@ pub fn copy_handler_table_to_cage( // Actual implementation is in handler_table module according to feature flag let ret = copy_handler_table_to_cage_impl(srccage, targetcage); eprintln!( - "[popen-trace|3i copy_handler_table] source={} target={} ret={}", - srccage, targetcage, ret + "[popen-trace|3i copy_handler_table] source={} target={} ret={} feature_hashmap={} feature_dashmap={}", + srccage, + targetcage, + ret, + cfg!(feature = "hashmap"), + cfg!(feature = "dashmap") ); ret } From d64f8445e60969ddc20c5d378f23e000cdc14428 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 01:37:40 -0400 Subject: [PATCH 21/24] Remove popen trace instrumentation --- src/rawposix/src/sys_calls.rs | 12 +--- src/threei/src/handler_table/dashmap_impl.rs | 61 ----------------- src/threei/src/handler_table/hashmap_impl.rs | 69 +++----------------- src/threei/src/threei.rs | 18 +---- 4 files changed, 10 insertions(+), 150 deletions(-) diff --git a/src/rawposix/src/sys_calls.rs b/src/rawposix/src/sys_calls.rs index df72135467..e9ca2331d5 100644 --- a/src/rawposix/src/sys_calls.rs +++ b/src/rawposix/src/sys_calls.rs @@ -108,12 +108,6 @@ pub extern "C" fn fork_syscall( // otherwise, we are creating a process (i.e. fork) let flags = args.flags; let isthread = flags & (sys_const::CLONE_VM); - eprintln!( - "[popen-trace|rawposix fork_syscall] parent={} flags=0x{:x} isthread_by_clone_vm={}", - cageid, - flags, - isthread != 0 - ); // Effective parent cage ID. let parent_cageid = cageid; @@ -167,7 +161,7 @@ pub extern "C" fn fork_syscall( // This ensures that the child process inherits all syscall // interposition and routing behavior, including RawPOSIX's // syscall implementation - let copy_ret = threei::copy_handler_table_to_cage( + threei::copy_handler_table_to_cage( UNUSED_ARG, child_cageid, parent_cageid, @@ -183,10 +177,6 @@ pub extern "C" fn fork_syscall( UNUSED_ARG, UNUSED_ID, ); - eprintln!( - "[popen-trace|rawposix fork_syscall] copied handlers parent={} child={} ret={}", - parent_cageid, child_cageid, copy_ret - ); } // Delegate execution back to binary runtime (currently only support Wasmtime, diff --git a/src/threei/src/handler_table/dashmap_impl.rs b/src/threei/src/handler_table/dashmap_impl.rs index 45df84a5c7..486523965f 100644 --- a/src/threei/src/handler_table/dashmap_impl.rs +++ b/src/threei/src/handler_table/dashmap_impl.rs @@ -104,10 +104,6 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O return Some((gid, addr)); } - eprintln!( - "[popen-trace|3i dashmap get_handler] empty target map self={} syscall={} target={}", - self_cageid, syscall_num, target_cageid - ); panic!( "No handlers for self_cageid={} syscall_num={} (target_cageid={})", self_cageid, syscall_num, target_cageid @@ -128,20 +124,10 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O /// todo: a more efficient way to do clean up pub fn _rm_grate_from_handler(grateid: u64) { for self_entry in HANDLERTABLE.iter() { - let self_cageid = *self_entry.key(); let call_map: &CallnumMap = self_entry.value(); for call_entry in call_map.iter() { - let callnum = *call_entry.key(); let target_map: &TargetCageMap = call_entry.value(); - let before = target_map.len(); target_map.retain(|dest_grateid, _| *dest_grateid != grateid); - let after = target_map.len(); - if before != after || callnum == 33 { - eprintln!( - "[popen-trace|3i dashmap rm_grate] grate={} self={} syscall={} before={} after={}", - grateid, self_cageid, callnum, before, after - ); - } } } } @@ -224,15 +210,7 @@ pub fn register_handler_impl( if handlefunccage == threei_const::THREEI_DEREGISTER { if let Some(self_entry) = HANDLERTABLE.get(&srccage) { let call_map: &CallnumMap = self_entry.value(); - let before = call_map - .get(&targetcallnum) - .map(|target_map| target_map.value().len()) - .unwrap_or(0); call_map.remove(&targetcallnum); - eprintln!( - "[popen-trace|3i dashmap register] deregister self={} syscall={} before={}", - srccage, targetcallnum, before - ); } return 0; } @@ -246,21 +224,9 @@ pub fn register_handler_impl( // Each (srccage, targetcallnum) pair keeps only one handler entry, // so we clear any existing mapping and replace it directly. - let before = target_map.len(); target_map.clear(); target_map.insert(handlefunccage, in_grate_fn_ptr_u64); - if targetcallnum == 33 { - eprintln!( - "[popen-trace|3i dashmap register] self={} syscall={} handler_cage={} before={} after={} fn=0x{:x}", - srccage, - targetcallnum, - handlefunccage, - before, - target_map.len(), - in_grate_fn_ptr_u64 - ); - } 0 } @@ -281,19 +247,9 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { ); return threei_const::ELINDAPIABORTED; }; - let src_has_mmap = src_snapshot.contains_key(&9); - let src_dup2_size = src_snapshot - .get(&33) - .map(|target_map| target_map.value().len()) - .unwrap_or(0); let dst_call_map_ref = HANDLERTABLE.entry(targetcage).or_insert_with(DashMap::new); let dst_call_map: &CallnumMap = &*dst_call_map_ref; - let dst_had_mmap = dst_call_map.contains_key(&9); - let dst_dup2_size_before = dst_call_map - .get(&33) - .map(|target_map| target_map.value().len()) - .unwrap_or(0); // Copy without overwriting existing destination handlers. for src_call_entry in src_snapshot.iter() { @@ -311,23 +267,6 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { dst_target_map.entry(handlefunccage).or_insert(addr); } } - let dst_has_mmap = dst_call_map.contains_key(&9); - let dst_dup2_size_after = dst_call_map - .get(&33) - .map(|target_map| target_map.value().len()) - .unwrap_or(0); - eprintln!( - "[popen-trace|3i copy_impl] source={} target={} src_calls={} src_has_mmap={} dst_had_mmap={} dst_has_mmap={} src_dup2_size={} dst_dup2_before={} dst_dup2_after={}", - srccage, - targetcage, - src_snapshot.len(), - src_has_mmap, - dst_had_mmap, - dst_has_mmap, - src_dup2_size, - dst_dup2_size_before, - dst_dup2_size_after - ); 0 } diff --git a/src/threei/src/handler_table/hashmap_impl.rs b/src/threei/src/handler_table/hashmap_impl.rs index c9701199fe..ed57a9f537 100644 --- a/src/threei/src/handler_table/hashmap_impl.rs +++ b/src/threei/src/handler_table/hashmap_impl.rs @@ -1,7 +1,6 @@ use crate::threei_const; -use std::collections::{hash_map::Entry, HashMap}; +use std::collections::HashMap; use std::sync::Mutex; -use sysdefs::constants::lind_platform_const; /// HANDLERTABLE: /// A nested hash map used to define fine-grained per-syscall interposition rules. @@ -73,7 +72,11 @@ pub fn _check_cage_handler_exists(cageid: u64) -> bool { /// ## Panics: /// - If no entry exists for `self_cageid`. /// - If no entry exists for `syscall_num`. -pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> Option<(u64, u64)> { +pub fn _get_handler( + self_cageid: u64, + syscall_num: u64, + _target_cageid: u64, +) -> Option<(u64, u64)> { let handler_table = HANDLERTABLE.lock().unwrap(); let call_map = handler_table.get(&self_cageid).unwrap_or_else(|| { @@ -91,15 +94,6 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O let grateid = target_map.keys().next().copied()?; let addr = target_map.values().next().copied()?; - if syscall_num == 33 { - eprintln!( - "[popen-trace|3i hashmap get_handler] self={} syscall={} target={} handlers={}", - self_cageid, - syscall_num, - target_cageid, - target_map.len() - ); - } // Otherwise fallback to any registered handler return Some((grateid, addr)); } @@ -118,17 +112,9 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> O /// todo: a more efficient way to do clean up pub fn _rm_grate_from_handler(grateid: u64) { let mut table = HANDLERTABLE.lock().unwrap(); - for (self_cageid, callmap) in table.iter_mut() { - for (callnum, target_map) in callmap.iter_mut() { - let before = target_map.len(); + for (_self_cageid, callmap) in table.iter_mut() { + for (_callnum, target_map) in callmap.iter_mut() { target_map.retain(|dest_grateid, _| *dest_grateid != grateid); - let after = target_map.len(); - if before != after || *callnum == 33 { - eprintln!( - "[popen-trace|3i hashmap rm_grate] grate={} self={} syscall={} before={} after={}", - grateid, self_cageid, callnum, before, after - ); - } } } } @@ -215,15 +201,7 @@ pub fn register_handler_impl( // for the given (targetcage, targetcallnum). if handlefunccage == threei_const::THREEI_DEREGISTER { if let Some(call_map) = table.get_mut(&srccage) { - let before = call_map - .get(&targetcallnum) - .map(|target_map| target_map.len()) - .unwrap_or(0); call_map.remove(&targetcallnum); - eprintln!( - "[popen-trace|3i hashmap register] deregister self={} syscall={} before={}", - srccage, targetcallnum, before - ); if call_map.is_empty() { table.remove(&srccage); @@ -238,21 +216,9 @@ pub fn register_handler_impl( // Each (srccage, targetcallnum) pair keeps only one handler entry, // so we clear any existing mapping and replace it directly. - let before = target_map.len(); target_map.clear(); target_map.insert(handlefunccage, in_grate_fn_ptr_u64); - if targetcallnum == 33 { - eprintln!( - "[popen-trace|3i hashmap register] self={} syscall={} handler_cage={} before={} after={} fn=0x{:x}", - srccage, - targetcallnum, - handlefunccage, - before, - target_map.len(), - in_grate_fn_ptr_u64 - ); - } 0 } @@ -266,12 +232,6 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { // Does not overwrite any existing handlers in the target. if let Some(src_entry) = handler_table.get(&srccage).cloned() { let target_entry = handler_table.entry(targetcage).or_insert_with(HashMap::new); - let src_dup2_size = src_entry.get(&33).map(|target_map| target_map.len()).unwrap_or(0); - let dst_dup2_size_before = target_entry - .get(&33) - .map(|target_map| target_map.len()) - .unwrap_or(0); - let src_calls = src_entry.len(); for (callnum, callnum_map) in src_entry { let target_callnum_map = target_entry.entry(callnum).or_insert_with(HashMap::new); for (handlefunc, handlefunccage) in callnum_map { @@ -281,19 +241,6 @@ pub fn copy_handler_table_to_cage_impl(srccage: u64, targetcage: u64) -> u64 { .or_insert(handlefunccage); } } - let dst_dup2_size_after = target_entry - .get(&33) - .map(|target_map| target_map.len()) - .unwrap_or(0); - eprintln!( - "[popen-trace|3i hashmap copy_impl] source={} target={} src_calls={} src_dup2_size={} dst_dup2_before={} dst_dup2_after={}", - srccage, - targetcage, - src_calls, - src_dup2_size, - dst_dup2_size_before, - dst_dup2_size_after - ); 0 } else { eprintln!( diff --git a/src/threei/src/threei.rs b/src/threei/src/threei.rs index 5f44982063..8cf744fe26 100644 --- a/src/threei/src/threei.rs +++ b/src/threei/src/threei.rs @@ -355,27 +355,11 @@ pub fn copy_handler_table_to_cage( // Verifies that neither srccage nor targetcage are in the EXITING state to avoid // copying from or to a cage that may be invalid. if EXITING_TABLE.contains(&targetcage) || EXITING_TABLE.contains(&srccage) { - eprintln!( - "[popen-trace|3i copy_handler_table] rejected source={} target={} source_exiting={} target_exiting={}", - srccage, - targetcage, - EXITING_TABLE.contains(&srccage), - EXITING_TABLE.contains(&targetcage) - ); return threei_const::ELINDESRCH as u64; } // Actual implementation is in handler_table module according to feature flag - let ret = copy_handler_table_to_cage_impl(srccage, targetcage); - eprintln!( - "[popen-trace|3i copy_handler_table] source={} target={} ret={} feature_hashmap={} feature_dashmap={}", - srccage, - targetcage, - ret, - cfg!(feature = "hashmap"), - cfg!(feature = "dashmap") - ); - ret + copy_handler_table_to_cage_impl(srccage, targetcage) } /// actually performs a call. Not interposable From f706ed6ff61263141483df51415c3f3b9a0a87c5 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 01:58:42 -0400 Subject: [PATCH 22/24] Fix grate memory runtime formatting --- src/threei/src/handler_table/hashmap_impl.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/threei/src/handler_table/hashmap_impl.rs b/src/threei/src/handler_table/hashmap_impl.rs index ed57a9f537..94e97dbf4e 100644 --- a/src/threei/src/handler_table/hashmap_impl.rs +++ b/src/threei/src/handler_table/hashmap_impl.rs @@ -72,11 +72,7 @@ pub fn _check_cage_handler_exists(cageid: u64) -> bool { /// ## Panics: /// - If no entry exists for `self_cageid`. /// - If no entry exists for `syscall_num`. -pub fn _get_handler( - self_cageid: u64, - syscall_num: u64, - _target_cageid: u64, -) -> Option<(u64, u64)> { +pub fn _get_handler(self_cageid: u64, syscall_num: u64, _target_cageid: u64) -> Option<(u64, u64)> { let handler_table = HANDLERTABLE.lock().unwrap(); let call_map = handler_table.get(&self_cageid).unwrap_or_else(|| { From 619179da900736a7eb3ba60af9ec8027dbadedb9 Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 12:54:16 -0400 Subject: [PATCH 23/24] Leave hashmap handler table unchanged --- src/threei/src/handler_table/hashmap_impl.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/threei/src/handler_table/hashmap_impl.rs b/src/threei/src/handler_table/hashmap_impl.rs index 94e97dbf4e..553e80a67e 100644 --- a/src/threei/src/handler_table/hashmap_impl.rs +++ b/src/threei/src/handler_table/hashmap_impl.rs @@ -1,6 +1,7 @@ use crate::threei_const; -use std::collections::HashMap; +use std::collections::{hash_map::Entry, HashMap}; use std::sync::Mutex; +use sysdefs::constants::lind_platform_const; /// HANDLERTABLE: /// A nested hash map used to define fine-grained per-syscall interposition rules. @@ -72,7 +73,7 @@ pub fn _check_cage_handler_exists(cageid: u64) -> bool { /// ## Panics: /// - If no entry exists for `self_cageid`. /// - If no entry exists for `syscall_num`. -pub fn _get_handler(self_cageid: u64, syscall_num: u64, _target_cageid: u64) -> Option<(u64, u64)> { +pub fn _get_handler(self_cageid: u64, syscall_num: u64, target_cageid: u64) -> Option<(u64, u64)> { let handler_table = HANDLERTABLE.lock().unwrap(); let call_map = handler_table.get(&self_cageid).unwrap_or_else(|| { @@ -108,8 +109,8 @@ pub fn _get_handler(self_cageid: u64, syscall_num: u64, _target_cageid: u64) -> /// todo: a more efficient way to do clean up pub fn _rm_grate_from_handler(grateid: u64) { let mut table = HANDLERTABLE.lock().unwrap(); - for (_self_cageid, callmap) in table.iter_mut() { - for (_callnum, target_map) in callmap.iter_mut() { + for (_, callmap) in table.iter_mut() { + for (_, target_map) in callmap.iter_mut() { target_map.retain(|dest_grateid, _| *dest_grateid != grateid); } } From 7b65359a59defc10d43b0e818c4e0536d19db84d Mon Sep 17 00:00:00 2001 From: Nicholas Renner Date: Wed, 13 May 2026 13:15:26 -0400 Subject: [PATCH 24/24] Address grate memory flag review comments --- src/rawposix/src/fs_calls.rs | 10 ++-- .../src/constants/lind_platform_const.rs | 20 +++++-- src/typemap/src/datatype_conversion.rs | 52 +++++++++++++------ 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 9e6098a3bc..aa849a897f 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -886,11 +886,11 @@ pub extern "C" fn mmap_syscall( // Resolve (useraddr in calling cage, sysaddr host pointer). The addr // arrives in one of two forms, distinguished inside - // `sc_convert_uaddr_to_host` by comparison against `addr_cageid`'s base: - // - cage uaddr (small, < base): translated via the cage's base. - // - host pointer (>= base, e.g. glibc's TRANSLATE_*_TO_HOST already ran - // inside a grate-forwarded call with GRATE_MEMORY_FLAG): passthrough. - let mut useraddr: u32; + // `sc_convert_uaddr_to_host` by checking whether the address is already in + // `addr_cageid`'s host linear-memory reservation: + // - cage uaddr outside that host reservation: translated via the cage's base. + // - host pointer already inside that reservation: passthrough. + let useraddr: u32; let sysaddr: usize; if flags & MAP_FIXED as i32 == 0 { diff --git a/src/sysdefs/src/constants/lind_platform_const.rs b/src/sysdefs/src/constants/lind_platform_const.rs index 7c7686ee05..0eba133963 100644 --- a/src/sysdefs/src/constants/lind_platform_const.rs +++ b/src/sysdefs/src/constants/lind_platform_const.rs @@ -32,13 +32,27 @@ pub const MAXFD: usize = 1024; // Maximum file descriptors per cage pub const MAX_LINEAR_MEMORY_SIZE: u64 = 0xFFFF_FFFF; /// Placeholder for unused syscall argument pub const UNUSED_ARG: u64 = 0xDEADBEEF_DEADBEEF; -/// MSB of a syscall arg's cageid: signals that the arg should be treated as a -/// host-side reference into the named cage's linear memory, not as a uaddr -/// in the calling cage's memory. Mirrors `LIND_ARG_TRANSLATE_FLAG` in +/// MSB of a syscall arg's cageid: requests guest-to-host address translation +/// for that arg. Mirrors `LIND_ARG_TRANSLATE_FLAG` in /// `src/glibc/lind_syscall/addr_translation.h`. pub const GRATE_MEMORY_FLAG: u64 = 1u64 << 63; /// Mask to recover the actual cageid by clearing `GRATE_MEMORY_FLAG`. pub const LIND_ARG_CAGEID_MASK: u64 = !GRATE_MEMORY_FLAG; + +/// Return the actual cageid after clearing any syscall address-translation bit. +pub const fn lind_arg_cageid(cageid: u64) -> u64 { + cageid & LIND_ARG_CAGEID_MASK +} + +/// Return true when a syscall argument cageid requests address translation. +pub const fn lind_arg_needs_translation(cageid: u64) -> bool { + cageid & GRATE_MEMORY_FLAG != 0 +} + +/// Mark a syscall argument cageid as requiring address translation. +pub const fn lind_arg_with_translation(cageid: u64) -> u64 { + cageid | GRATE_MEMORY_FLAG +} /// Placeholder for unused cage/grate ID pub const UNUSED_ID: u64 = 0xCAFEBABE_CAFEBABE; /// Placeholder for unused syscall name diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index 7c634296e4..495108db77 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -10,7 +10,9 @@ use crate::cage_helpers::validate_cageid; use cage::get_cage; use std::error::Error; use std::os::raw::c_char; -use sysdefs::constants::lind_platform_const::{MAX_CAGEID, PATH_MAX}; +use sysdefs::constants::lind_platform_const::{ + lind_arg_cageid, MAX_CAGEID, MAX_LINEAR_MEMORY_SIZE, PATH_MAX, +}; use sysdefs::constants::lind_platform_const::{UNUSED_ARG, UNUSED_ID, UNUSED_NAME}; use sysdefs::constants::Errno; use sysdefs::data::fs_struct::{ @@ -260,6 +262,26 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { arg as *mut u8 } +#[inline] +fn host_addr_offset(sysaddr: usize, base_addr: usize) -> Option { + let offset = (sysaddr as u64).checked_sub(base_addr as u64)?; + if offset <= MAX_LINEAR_MEMORY_SIZE { + Some(offset as u32) + } else { + None + } +} + +/// Return true when `sysaddr` is inside the host reservation for the cage +/// whose linear-memory base is `base_addr`. +/// +/// For today's 4 GiB-aligned reservations this is equivalent to matching the +/// high bits against `base_addr`; this range form keeps the check correct if +/// the runtime stops requiring 4 GiB alignment. +pub fn sc_is_host_addr_in_cage_memory(sysaddr: usize, base_addr: usize) -> bool { + host_addr_offset(sysaddr, base_addr).is_some() +} + /// Inverse of `sc_convert_uaddr_to_host` — translate a host system address /// back to a uaddr in the named cage's linear memory. Used for return values /// of mmap-family syscalls and for bookkeeping into the cage's vmmap. @@ -272,15 +294,12 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { /// - `Ok(uaddr)` truncated to u32 (cage user addresses fit in 32 bits on /// wasm32 lind). /// - `Err(Errno::EINVAL)` if the cage can't be looked up, its vmmap has no -/// base, or `sysaddr` is below the cage's base. +/// base, or `sysaddr` is outside the cage's linear memory reservation. pub fn sc_convert_host_to_uaddr(sysaddr: usize, cageid: u64) -> Result { let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; let vmmap = cage.vmmap.read(); let base = vmmap.base_address.ok_or(Errno::EINVAL)?; - if sysaddr < base { - return Err(Errno::EINVAL); - } - Ok((sysaddr - base) as u32) + host_addr_offset(sysaddr, base).ok_or(Errno::EINVAL) } /// This function translates the buffer pointer from user buffer address to system address, because we are @@ -298,21 +317,22 @@ pub fn sc_convert_buf(buf_arg: u64, _arg_cageid: u64, _cageid: u64) -> *const u8 /// Resolve a `(uaddr, addr_cageid)` syscall arg to a host system address. /// -/// Robust to either input form, distinguished by comparing against the named -/// cage's base: +/// Robust to either input form, distinguished by checking whether the address +/// is already inside the named cage's host linear-memory reservation: /// -/// - `uaddr < base_addr`: a cage-relative address in `addr_cageid`'s linear -/// memory. Translate by adding the base. `uaddr == 0` falls in this branch -/// and resolves to the cage's base — that's what `early_init_stack`-style -/// `MAP_FIXED at 0` mmaps need. -/// - `uaddr >= base_addr`: a pre-translated host pointer (e.g. glibc's -/// `TRANSLATE_*_TO_HOST` already ran inside a grate-forwarded call with -/// `GRATE_MEMORY_FLAG` set). Pass through unchanged. +/// - inside cage host memory: a pre-translated host pointer. Pass through +/// unchanged. +/// - outside cage host memory: a cage-relative address in `addr_cageid`'s +/// linear memory. Translate by adding the base. `uaddr == 0` falls in this +/// branch and resolves to the cage's base — that's what +/// `early_init_stack`-style `MAP_FIXED at 0` mmaps need. /// /// `addr_cageid` picks the right cage's base: for a cage-userland call it's /// the calling cage; for a grate-forwarded call (FLAG stripped by glibc) it's /// the grate's id, which is what produced the host pointer in the first place. pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u64 { + let addr_cageid = lind_arg_cageid(addr_cageid); + #[cfg(feature = "secure")] { if !validate_cageid(addr_cageid, _cageid) { @@ -324,7 +344,7 @@ pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u let vmmap = cage.vmmap.read(); let base_addr = vmmap.base_address.unwrap() as u64; - if uaddr >= base_addr { + if sc_is_host_addr_in_cage_memory(uaddr as usize, base_addr as usize) { // Already a host pointer — caller (e.g. glibc's TRANSLATE_*_TO_HOST) // resolved it. Pass through. return uaddr;