diff --git a/src/rawposix/src/fs_calls.rs b/src/rawposix/src/fs_calls.rs index 84fed7977..aa849a897 100644 --- a/src/rawposix/src/fs_calls.rs +++ b/src/rawposix/src/fs_calls.rs @@ -829,13 +829,6 @@ pub extern "C" fn mmap_syscall( off_arg: u64, off_cageid: u64, ) -> i32 { - let addr = { - if addr_arg == 0 { - 0 as *mut u8 - } else { - sc_convert_to_u8_mut(addr_arg, addr_cageid, cageid) - } - }; let len = sc_convert_sysarg_to_usize(len_arg, len_cageid, cageid); let prot = sc_convert_sysarg_to_i32(prot_arg, prot_cageid, cageid); let mut flags = sc_convert_sysarg_to_i32(flags_arg, flags_cageid, cageid); @@ -871,9 +864,11 @@ pub extern "C" fn mmap_syscall( lind_debug_panic("mmap protection flag PROT_EXEC is not allowed in Lind"); } - // check if the provided address is multiple of pages - let rounded_addr = round_up_page(addr as u64); - if rounded_addr != addr as u64 { + // Page-align check on the low bits of addr_arg. Page alignment is a + // numeric property of the low bits regardless of which cage's base + // address gets added, since base_address is itself page-aligned. + let rounded_addr = round_up_page(addr_arg); + if rounded_addr != addr_arg { return syscall_error(Errno::EINVAL, "mmap", "address it not aligned"); } @@ -889,31 +884,50 @@ pub extern "C" fn mmap_syscall( // round up length to be multiple of pages let rounded_length = round_up_page(len as u64); - let mut useraddr = addr as u32; - // if MAP_FIXED is not set, then we need to find an address for the user + // Resolve (useraddr in calling cage, sysaddr host pointer). The addr + // arrives in one of two forms, distinguished inside + // `sc_convert_uaddr_to_host` by checking whether the address is already in + // `addr_cageid`'s host linear-memory reservation: + // - cage uaddr outside that host reservation: translated via the cage's base. + // - host pointer already inside that reservation: passthrough. + let useraddr: u32; + let sysaddr: usize; + if flags & MAP_FIXED as i32 == 0 { - let vmmap = cage.vmmap.write(); - let result; + // No fixed address — runtime picks via the calling cage's vmmap. + // Treat addr_arg as a hint only if it looks like a cage uaddr (the + // calling cage's range); otherwise ignore (a grate-supplied host + // pointer hint isn't meaningful in the calling cage's address space). + let hint_useraddr = sc_convert_host_to_uaddr(addr_arg as usize, cageid).unwrap_or(0); - // pick an address of appropriate size, anywhere - if useraddr == 0 { - result = vmmap.find_map_space(rounded_length as u32 >> PAGESHIFT, 1); + let vmmap = cage.vmmap.write(); + let result = if hint_useraddr == 0 { + vmmap.find_map_space(rounded_length as u32 >> PAGESHIFT, 1) } else { - // use address user provided as hint to find address - result = vmmap.find_map_space_with_hint( + vmmap.find_map_space_with_hint( rounded_length as u32 >> PAGESHIFT, 1, - addr as u32 >> PAGESHIFT, - ); - } + hint_useraddr >> PAGESHIFT, + ) + }; - // did not find desired memory region if result.is_none() { return syscall_error(Errno::ENOMEM, "mmap", "no memory"); } - let space = result.unwrap(); - useraddr = (space.start() << PAGESHIFT) as u32; + useraddr = (result.unwrap().start() << PAGESHIFT) as u32; + sysaddr = vmmap.user_to_sys(useraddr); + drop(vmmap); + } else { + // Caller specified an exact address. + sysaddr = sc_convert_uaddr_to_host(addr_arg, addr_cageid, cageid) as usize; + // Derive the calling cage's uaddr for the return value + vmmap + // bookkeeping. Errors here mean the sysaddr is outside the cage's + // linear memory range — invalid for MAP_FIXED in this cage. + useraddr = match sc_convert_host_to_uaddr(sysaddr, cageid) { + Ok(u) => u, + Err(e) => return syscall_error(e, "mmap", "addr outside cage"), + }; } flags |= MAP_FIXED as i32; @@ -923,12 +937,6 @@ pub extern "C" fn mmap_syscall( return syscall_error(Errno::EINVAL, "mmap", "invalid flags"); } - let vmmap = cage.vmmap.read(); - - let sysaddr = vmmap.user_to_sys(useraddr); - - drop(vmmap); - if rounded_length > 0 { if flags & MAP_ANONYMOUS as i32 > 0 { fildes = -1; @@ -1188,7 +1196,17 @@ pub extern "C" fn brk_syscall( arg6: u64, arg6_cageid: u64, ) -> i32 { - let brk = sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid); + // Cage-side glibc brk.c passes a raw uaddr (low 32 bits); the runtime + // page-aligns it and compares against vmmap.heap_start in user space. + // A grate forwarding the call via make_threei_call goes through glibc's + // TRANSLATE_ARG_TO_HOST which produces a host sysaddr — convert back to a + // cage uaddr before proceeding. `sc_convert_host_to_uaddr` returns Err + // when the arg is below the cage's base (i.e. already a small uaddr); in + // that case fall back to the standard u64→i32 cast. + let brk = match sc_convert_host_to_uaddr(brk_arg as usize, cageid) { + Ok(u) => u as i32, + Err(_) => sc_convert_sysarg_to_i32(brk_arg, brk_cageid, cageid), + }; // would sometimes check, sometimes be a no-op depending on the compiler settings if !(sc_unusedarg(arg2, arg2_cageid) && sc_unusedarg(arg3, arg3_cageid) diff --git a/src/sysdefs/src/constants/lind_platform_const.rs b/src/sysdefs/src/constants/lind_platform_const.rs index bfe959f10..0eba13396 100644 --- a/src/sysdefs/src/constants/lind_platform_const.rs +++ b/src/sysdefs/src/constants/lind_platform_const.rs @@ -32,6 +32,27 @@ pub const MAXFD: usize = 1024; // Maximum file descriptors per cage pub const MAX_LINEAR_MEMORY_SIZE: u64 = 0xFFFF_FFFF; /// Placeholder for unused syscall argument pub const UNUSED_ARG: u64 = 0xDEADBEEF_DEADBEEF; +/// MSB of a syscall arg's cageid: requests guest-to-host address translation +/// for that arg. Mirrors `LIND_ARG_TRANSLATE_FLAG` in +/// `src/glibc/lind_syscall/addr_translation.h`. +pub const GRATE_MEMORY_FLAG: u64 = 1u64 << 63; +/// Mask to recover the actual cageid by clearing `GRATE_MEMORY_FLAG`. +pub const LIND_ARG_CAGEID_MASK: u64 = !GRATE_MEMORY_FLAG; + +/// Return the actual cageid after clearing any syscall address-translation bit. +pub const fn lind_arg_cageid(cageid: u64) -> u64 { + cageid & LIND_ARG_CAGEID_MASK +} + +/// Return true when a syscall argument cageid requests address translation. +pub const fn lind_arg_needs_translation(cageid: u64) -> bool { + cageid & GRATE_MEMORY_FLAG != 0 +} + +/// Mark a syscall argument cageid as requiring address translation. +pub const fn lind_arg_with_translation(cageid: u64) -> u64 { + cageid | GRATE_MEMORY_FLAG +} /// Placeholder for unused cage/grate ID pub const UNUSED_ID: u64 = 0xCAFEBABE_CAFEBABE; /// Placeholder for unused syscall name diff --git a/src/threei/src/threei.rs b/src/threei/src/threei.rs index 838146fbd..8cf744fe2 100644 --- a/src/threei/src/threei.rs +++ b/src/threei/src/threei.rs @@ -890,6 +890,15 @@ pub fn copy_data_between_cages( panic!("Dynamic allocation not yet supported in copy_data_between_cages"); } + // Resolve src/dest to host pointers. glibc's TRANSLATE_UADDR_TO_HOST only + // fires when the arg is in the calling cage; cross-cage args (e.g. a uaddr + // returned from a previous mmap that the caller is passing back in for + // someone else's cage) arrive untranslated. `sc_convert_uaddr_to_host` + // handles both forms — already-host passthrough, cage uaddr gets the + // named cage's base added. + let srcaddr = sc_convert_uaddr_to_host(srcaddr, srccage, _thiscage); + let destaddr = sc_convert_uaddr_to_host(destaddr, destcage, _thiscage); + // Decide the actual number of bytes to copy depending on CopyType. // // `RawMemcpy`: diff --git a/src/typemap/src/datatype_conversion.rs b/src/typemap/src/datatype_conversion.rs index ee30e0f4b..495108db7 100644 --- a/src/typemap/src/datatype_conversion.rs +++ b/src/typemap/src/datatype_conversion.rs @@ -10,7 +10,9 @@ use crate::cage_helpers::validate_cageid; use cage::get_cage; use std::error::Error; use std::os::raw::c_char; -use sysdefs::constants::lind_platform_const::{MAX_CAGEID, PATH_MAX}; +use sysdefs::constants::lind_platform_const::{ + lind_arg_cageid, MAX_CAGEID, MAX_LINEAR_MEMORY_SIZE, PATH_MAX, +}; use sysdefs::constants::lind_platform_const::{UNUSED_ARG, UNUSED_ID, UNUSED_NAME}; use sysdefs::constants::Errno; use sysdefs::data::fs_struct::{ @@ -260,6 +262,46 @@ pub fn sc_convert_to_u8_mut(arg: u64, arg_cageid: u64, cageid: u64) -> *mut u8 { arg as *mut u8 } +#[inline] +fn host_addr_offset(sysaddr: usize, base_addr: usize) -> Option { + let offset = (sysaddr as u64).checked_sub(base_addr as u64)?; + if offset <= MAX_LINEAR_MEMORY_SIZE { + Some(offset as u32) + } else { + None + } +} + +/// Return true when `sysaddr` is inside the host reservation for the cage +/// whose linear-memory base is `base_addr`. +/// +/// For today's 4 GiB-aligned reservations this is equivalent to matching the +/// high bits against `base_addr`; this range form keeps the check correct if +/// the runtime stops requiring 4 GiB alignment. +pub fn sc_is_host_addr_in_cage_memory(sysaddr: usize, base_addr: usize) -> bool { + host_addr_offset(sysaddr, base_addr).is_some() +} + +/// Inverse of `sc_convert_uaddr_to_host` — translate a host system address +/// back to a uaddr in the named cage's linear memory. Used for return values +/// of mmap-family syscalls and for bookkeeping into the cage's vmmap. +/// +/// ## Arguments +/// - `sysaddr`: the host system address. +/// - `cageid`: the cage whose user-address space we want. +/// +/// ## Returns +/// - `Ok(uaddr)` truncated to u32 (cage user addresses fit in 32 bits on +/// wasm32 lind). +/// - `Err(Errno::EINVAL)` if the cage can't be looked up, its vmmap has no +/// base, or `sysaddr` is outside the cage's linear memory reservation. +pub fn sc_convert_host_to_uaddr(sysaddr: usize, cageid: u64) -> Result { + let cage = get_cage(cageid).ok_or(Errno::EINVAL)?; + let vmmap = cage.vmmap.read(); + let base = vmmap.base_address.ok_or(Errno::EINVAL)?; + host_addr_offset(sysaddr, base).ok_or(Errno::EINVAL) +} + /// This function translates the buffer pointer from user buffer address to system address, because we are /// transferring between 32-bit WASM environment to 64-bit kernel /// @@ -273,16 +315,24 @@ pub fn sc_convert_buf(buf_arg: u64, _arg_cageid: u64, _cageid: u64) -> *const u8 buf_arg as *const u8 } -// TODO: This function can be removed/revamped significantly -// Leaving it in for now since it is used threei/ -/// ## Arguments: -/// - `uaddr`: The user address to convert (u64). -/// - `addr_cageid`: The cage ID associated with the address. -/// - `cageid`: The calling cage ID (used for validation in secure mode). +/// Resolve a `(uaddr, addr_cageid)` syscall arg to a host system address. /// -/// ## Returns: -/// - The host address as u64, or 0 if the address is null. +/// Robust to either input form, distinguished by checking whether the address +/// is already inside the named cage's host linear-memory reservation: +/// +/// - inside cage host memory: a pre-translated host pointer. Pass through +/// unchanged. +/// - outside cage host memory: a cage-relative address in `addr_cageid`'s +/// linear memory. Translate by adding the base. `uaddr == 0` falls in this +/// branch and resolves to the cage's base — that's what +/// `early_init_stack`-style `MAP_FIXED at 0` mmaps need. +/// +/// `addr_cageid` picks the right cage's base: for a cage-userland call it's +/// the calling cage; for a grate-forwarded call (FLAG stripped by glibc) it's +/// the grate's id, which is what produced the host pointer in the first place. pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u64 { + let addr_cageid = lind_arg_cageid(addr_cageid); + #[cfg(feature = "secure")] { if !validate_cageid(addr_cageid, _cageid) { @@ -290,21 +340,14 @@ pub fn sc_convert_uaddr_to_host(uaddr: u64, addr_cageid: u64, _cageid: u64) -> u } } - // Do not convert on NULL. - if uaddr == 0 { - return uaddr; - } - let cage = get_cage(addr_cageid).unwrap(); let vmmap = cage.vmmap.read(); let base_addr = vmmap.base_address.unwrap() as u64; - // Only convert to host if not already converted. - if uaddr >= base_addr { - panic!( - "sc_convert_uaddr_to_host: invalid uaddr {:#x} - expected a cage-relative address", - uaddr - ); + if sc_is_host_addr_in_cage_memory(uaddr as usize, base_addr as usize) { + // Already a host pointer — caller (e.g. glibc's TRANSLATE_*_TO_HOST) + // resolved it. Pass through. + return uaddr; } uaddr + base_addr diff --git a/tests/grate-tests/simple-tests/mmap-flag.c b/tests/grate-tests/simple-tests/mmap-flag.c new file mode 100644 index 000000000..45e1b38a7 --- /dev/null +++ b/tests/grate-tests/simple-tests/mmap-flag.c @@ -0,0 +1,51 @@ +/* Cage side of the mmap-with-GRATE_MEMORY_FLAG test. + * + * fd-backed mmap → write → read → munmap round-trip. The companion + * grate forwards this call to RawPOSIX with `arg1cage | GRATE_MEMORY_FLAG`, + * exercising the runtime's flag-aware path in mmap_syscall. + * + * Anonymous mmaps (including the runtime's own pre-main stack setup) + * are forwarded by the grate without the flag and aren't exercised here. + */ + +#include +#include +#include +#include +#include + +#define FILE_PATH "mmap-flag.tmp" + +int main(void) { + const size_t size = 4096; + + int fd = open(FILE_PATH, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fd < 0) { + return 1; + } + if (ftruncate(fd, size) != 0) { + return 1; + } + + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + return 1; + } + + memset(p, 0x42, size); + for (size_t i = 0; i < size; i++) { + if (((unsigned char *)p)[i] != 0x42) { + return 1; + } + } + + if (munmap(p, size) != 0) { + return 1; + } + + close(fd); + unlink(FILE_PATH); + + printf("[Cage|mmap-flag] PASS\n"); + return 0; +} diff --git a/tests/grate-tests/simple-tests/mmap-flag_grate.c b/tests/grate-tests/simple-tests/mmap-flag_grate.c new file mode 100644 index 000000000..64273ed21 --- /dev/null +++ b/tests/grate-tests/simple-tests/mmap-flag_grate.c @@ -0,0 +1,121 @@ +/* Grate side of the mmap-with-GRATE_MEMORY_FLAG test. + * + * Registers an mmap handler that forwards the cage's mmap call to the + * runtime via make_threei_call, with `addr_cage` tagged with + * `GRATE_MEMORY_FLAG`. This exercises the runtime's flag-aware path in + * mmap_syscall (skip the truncate-and-translate-via-cage-vmmap step, treat + * the addr as a host sysaddr when non-zero). + * + * The test uses MAP_ANONYMOUS|MAP_PRIVATE with addr=NULL; the runtime will + * pick an address. We're testing that the flag doesn't break the path, not + * MAP_FIXED placement. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Standard dispatcher used by every grate. Unchanged from the other + simple-tests grates. */ +int pass_fptr_to_wt(uint64_t fn_ptr_uint, uint64_t cageid, uint64_t arg1, + uint64_t arg1cage, uint64_t arg2, uint64_t arg2cage, + uint64_t arg3, uint64_t arg3cage, uint64_t arg4, + uint64_t arg4cage, uint64_t arg5, uint64_t arg5cage, + uint64_t arg6, uint64_t arg6cage) { + if (fn_ptr_uint == 0) { + fprintf(stderr, "[Grate|mmap-flag] Invalid function ptr\n"); + assert(0); + } + + int (*fn)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t) = + (int (*)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t))(uintptr_t)fn_ptr_uint; + + return fn(cageid, arg1, arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, + arg4cage, arg5, arg5cage, arg6, arg6cage); +} + +/* mmap interception. + + Anonymous / fd == -1 mmaps (including the runtime's pre-main + early_init_stack call) are forwarded unchanged — the FLAG path is + meaningless for them and applying it would resolve addr against the + grate's base instead of the cage's, breaking those calls. + + File-backed mmaps (fd >= 0, !MAP_ANON) are forwarded with arg1cage + tagged GRATE_MEMORY_FLAG, exercising the runtime's flag-aware path + in mmap_syscall. */ +#define MAP_ANON_FLAG 0x20 + +int mmap_grate(uint64_t cageid, uint64_t arg1, uint64_t arg1cage, uint64_t arg2, + uint64_t arg2cage, uint64_t arg3, uint64_t arg3cage, + uint64_t arg4, uint64_t arg4cage, uint64_t arg5, + uint64_t arg5cage, uint64_t arg6, uint64_t arg6cage) { + (void)cageid; + int self_grate_id = getpid(); + int fd = (int)(int64_t)arg5; + int is_anonymous = (fd < 0) || ((arg4 & MAP_ANON_FLAG) != 0); + + /* The handler's first `cageid` param is the grate's own id (3i's + _call_grate_func passes grateid here). The calling cage's id is + carried in an integer arg's cage tag — use arg5cage (fd), which + isn't subject to pointer-translation rewrites. */ + uint64_t calling_cage = arg5cage; + + uint64_t fwd_arg1cage = + is_anonymous ? arg1cage : (self_grate_id | GRATE_MEMORY_FLAG); + + return make_threei_call( + 9 /* MMAP_SYSCALL */, 0, self_grate_id, calling_cage, arg1, + fwd_arg1cage, arg2, arg2cage, arg3, arg3cage, arg4, arg4cage, arg5, + arg5cage, arg6, arg6cage, + 0 /* translate_errno off — propagate raw return */ + ); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + assert(0); + } + + int grateid = getpid(); + pid_t pid = fork(); + if (pid < 0) { + perror("fork failed"); + assert(0); + } else if (pid == 0) { + int cageid = getpid(); + uint64_t fn_ptr_addr = (uint64_t)(uintptr_t)&mmap_grate; + register_handler(cageid, 9 /* MMAP_SYSCALL */, grateid, + fn_ptr_addr); + + if (execv(argv[1], &argv[1]) == -1) { + perror("execv failed"); + assert(0); + } + } + + int status; + while (wait(&status) > 0) { + if (status != 0) { + fprintf(stderr, + "[Grate|mmap-flag] FAIL: child exited with " + "status %d\n", + status); + assert(0); + } + } + + printf("[Grate|mmap-flag] PASS\n"); + return 0; +}