From ab51b8bbcf15835c13add0fbc3134c8d8bf608c8 Mon Sep 17 00:00:00 2001 From: Qianxi Chen Date: Mon, 11 May 2026 09:02:46 +0000 Subject: [PATCH 1/2] dlopen within threads: epoch-based cross-thread library replay (#1028 Task 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When one thread calls dlopen(), all other running threads in the same cage now automatically get the library instantiated into their Wasm stores via a fire-and-forget epoch interrupt (EPOCH_DLOPEN = 0xd10ad). Runtime changes: - cage/signal: add epoch_dlopen_trigger_others() — sets EPOCH_DLOPEN on all other threads and sends SIGUSR2 to interrupt blocked syscalls - lind-multi-process/signal: add handle_dlopen_replay() — called from signal_handler for all non-main threads; pre-grows the table, calls module_with_child, and registers library symbols for dlsym - lind-boot/execute: drop got_guard before epoch_dlopen_trigger_others to avoid lock contention; call epoch_dlopen_trigger_others after append_module - lind-multi-process/lib: snapshot dlopen_modules at the same point as global_snapshots (before asyncify unwind) to eliminate a TOCTOU race in pthread_create_call - wasmtime/linker: skip apply_global_snapshots when snapshots is empty (the dlopen replay path; globals are set by attach_memory_base/attach_table_base) - lind-utils (LindGOT::new_entry): pre-fill duplicate GOT handlers from symbol_cache so replaying threads see correct function table indices Test (thread_dlopen_concurrent.c): Uses a two-barrier design — workers first signal via g_ready that they are registered, then main calls dlopen (EPOCH_DLOPEN fires on all workers), then main releases via g_go. The epoch check at pthread_barrier_wait's Wasm function entry delivers handle_dlopen_replay before any library call. lib.c: add add(int,int) export used by the new test. --- src/cage/src/signal/signal.rs | 93 ++++++++++ src/lind-boot/src/lind_wasmtime/execute.rs | 38 ++-- .../crates/lind-multi-process/src/lib.rs | 103 +++++++++-- .../crates/lind-multi-process/src/signal.rs | 123 ++++++++++++- src/wasmtime/crates/lind-utils/src/lib.rs | 15 +- .../crates/wasmtime/src/runtime/func.rs | 18 ++ .../crates/wasmtime/src/runtime/linker.rs | 13 +- .../dylink_tests/deterministic/lib.c | 4 + .../deterministic/thread_dlopen_concurrent.c | 162 ++++++++++++++++++ 9 files changed, 533 insertions(+), 36 deletions(-) create mode 100644 tests/unit-tests/dylink_tests/deterministic/thread_dlopen_concurrent.c diff --git a/src/cage/src/signal/signal.rs b/src/cage/src/signal/signal.rs index 4954d4f9e3..b3b1a809f9 100644 --- a/src/cage/src/signal/signal.rs +++ b/src/cage/src/signal/signal.rs @@ -7,6 +7,7 @@ use sysdefs::logging::lind_debug_panic; const EPOCH_NORMAL: u64 = 0; const EPOCH_SIGNAL: u64 = 0xc0ffee; const EPOCH_KILLED: u64 = 0xdead; +pub const EPOCH_DLOPEN: u64 = 0xd10ad; // switch the epoch of the main thread of the cage to "signal" state // thread safety: this function could possibly be invoked by multiple threads of the same cage @@ -139,6 +140,98 @@ pub fn epoch_kill_all(cageid: u64, caller_tid: i32) { } } +// trigger EPOCH_DLOPEN on all other threads of the cage (fire-and-forget) +// Only writes EPOCH_DLOPEN if the current epoch is EPOCH_NORMAL — if already +// non-NORMAL, the callback will fire anyway and handle all pending work. +// Also sends SIGUSR2 to interrupt threads blocked in host syscalls. +pub fn epoch_dlopen_trigger_others(cageid: u64, caller_tid: i32) { + #[cfg(feature = "disable_signals")] + return; + + #[cfg(not(feature = "disable_signals"))] + { + let cage = match get_cage(cageid) { + Some(c) => c, + None => return, + }; + + for entry in cage.epoch_handler.iter() { + if entry.key() == &caller_tid { + continue; + } + let epoch_handler = entry.value(); + let epoch = epoch_handler.load(Ordering::Acquire); + if epoch.is_null() { + continue; + } + // SAFETY: see comment at `signal_epoch_trigger` + unsafe { + // Only overwrite EPOCH_NORMAL. If already EPOCH_SIGNAL or EPOCH_KILLED, + // the callback will fire and handle all pending work anyway. + if *epoch == EPOCH_NORMAL { + *epoch = EPOCH_DLOPEN; + } + } + } + + // Best-effort: interrupt threads blocked in host syscalls so they + // replay the dlopen promptly. + let my_tid = unsafe { libc::syscall(libc::SYS_gettid) }; + for entry in cage.os_tid_map.iter() { + let os_tid = *entry.value(); + if os_tid != my_tid { + unsafe { + libc::syscall(libc::SYS_tkill, os_tid as i32, libc::SIGUSR2); + } + } + } + } +} + +// reset the epoch of a specific thread back to EPOCH_NORMAL +// Used after dlopen replay completes on a non-main thread. +pub fn epoch_thread_reset(cageid: u64, thread_id: i32) { + #[cfg(feature = "disable_signals")] + return; + + #[cfg(not(feature = "disable_signals"))] + { + let cage = match get_cage(cageid) { + Some(c) => c, + None => return, + }; + + let epoch_handler = match cage.epoch_handler.get(&thread_id) { + Some(h) => h, + None => return, + }; + let epoch = epoch_handler.load(Ordering::Acquire); + if epoch.is_null() { + return; + } + // SAFETY: see comment at `signal_epoch_trigger` + unsafe { + *epoch = EPOCH_NORMAL; + } + } +} + +// returns true if the cage has more than one thread registered +// Used to skip the cross-thread sync path in single-threaded cages. +pub fn has_other_threads(cageid: u64, caller_tid: i32) -> bool { + #[cfg(feature = "disable_signals")] + return false; + + #[cfg(not(feature = "disable_signals"))] + { + let cage = match get_cage(cageid) { + Some(c) => c, + None => return false, + }; + cage.epoch_handler.len() > 1 && cage.epoch_handler.contains_key(&caller_tid) + } +} + // get the current epoch state of the thread // thread safety: this function will only be invoked by main thread of the cage fn get_epoch_state(cageid: u64, thread_id: u64) -> u64 { diff --git a/src/lind-boot/src/lind_wasmtime/execute.rs b/src/lind-boot/src/lind_wasmtime/execute.rs index 343f6719c1..aa85d467a0 100644 --- a/src/lind-boot/src/lind_wasmtime/execute.rs +++ b/src/lind-boot/src/lind_wasmtime/execute.rs @@ -5,7 +5,9 @@ use crate::lind_wasmtime::host::{ }; use crate::{cli::CliOptions, lind_wasmtime::host::HostCtx, lind_wasmtime::trampoline::*}; use anyhow::{Context, Result, anyhow, bail}; -use cage::signal::{lind_signal_init, signal_may_trigger}; +use cage::signal::{ + epoch_dlopen_trigger_others, has_other_threads, lind_signal_init, signal_may_trigger, +}; use cfg_if::cfg_if; use std::os::unix::fs::MetadataExt; use std::path::Path; @@ -491,10 +493,6 @@ fn attach_api( let linker = lind_ctx.linker.clone().unwrap(); let got_table = lind_ctx.got_table.clone().unwrap(); - if lind_ctx.had_threads() { - lind_debug_panic("dlopen within threads is currently not supported!"); - } - load_library_module(caller, linker, got_table, cageid, library_name, mode) }); Some(dynamic_loader) @@ -832,7 +830,7 @@ fn load_library_module( // the library's function references can be relocated correctly. // // The GOT is used to patch symbol addresses/indices after instantiation. - let ret = match linker.module_with_caller( + let (ret, memory_base, symbol_map_clone) = match linker.module_with_caller( &mut main_module, cageid as u64, library_name, @@ -842,17 +840,35 @@ fn load_library_module( symbol_map, library_name.to_string(), ) { - Ok(handle) => handle as i32, - Err(e) => { + Ok((handle, memory_base, symbol_map_clone)) => { + (handle as i32, memory_base, symbol_map_clone) + } + Err(_) => { #[cfg(feature = "debug-dylink")] - println!("failed to process library `{}`: {:?}", library_name, e); - -(DylinkErrorCode::EINTERNAL as i32) // consider as internal error for now + println!("failed to process library `{}`", library_name); + return -(DylinkErrorCode::EINTERNAL as i32); } }; + // Release the GOT lock before notifying workers. Worker threads that receive + // EPOCH_DLOPEN will call handle_dlopen_replay, which also acquires got_arc. + // Dropping got_guard here ensures they can proceed without contention. + drop(got_guard); + + let caller_tid = main_module.data().lind_fork_ctx.as_ref().unwrap().tid; let lind_ctx = main_module.data_mut().lind_fork_ctx.as_mut().unwrap(); lind_ctx.attach_linker(linker); - lind_ctx.append_module(library_name.to_string(), lib_module); + lind_ctx.append_module( + library_name.to_string(), + lib_module, + memory_base, + symbol_map_clone, + ); + + // Fire-and-forget: notify other threads in this cage to replay the new library. + if has_other_threads(cageid as u64, caller_tid) { + epoch_dlopen_trigger_others(cageid as u64, caller_tid); + } ret } diff --git a/src/wasmtime/crates/lind-multi-process/src/lib.rs b/src/wasmtime/crates/lind-multi-process/src/lib.rs index 72cfe40402..32d5cf76fe 100644 --- a/src/wasmtime/crates/lind-multi-process/src/lib.rs +++ b/src/wasmtime/crates/lind-multi-process/src/lib.rs @@ -14,7 +14,7 @@ use sysdefs::logging::lind_debug_panic; use sysdefs::{constants::sys_const, data::sys_struct}; use threei::{threei::make_syscall, threei_const}; use wasmtime_lind_3i::*; -use wasmtime_lind_utils::{LindCageManager, LindGOT}; +use wasmtime_lind_utils::{LindCageManager, LindGOT, symbol_table::SymbolMap}; use std::ffi::CStr; use std::os::raw::c_char; @@ -73,7 +73,16 @@ pub struct LindCtx { // the module associated with the ctx modules: Vec<(String, String, Module)>, - dlopen_modules: Vec<(String, String, Module)>, + // Dynamically loaded modules from dlopen(), shared across threads of the same cage. + // Per-process (fork): child gets its own Arc with a snapshot of the parent's list. + // Per-thread (pthread_create): all threads share the same Arc so that cross-thread + // dlopen visibility works via the epoch-based replay mechanism. + // Tuple: (module_name, path, module, memory_base, symbol_map) + dlopen_modules: Arc>>, + + // Per-thread cursor into dlopen_modules. Entries from this index onward have not + // yet been replayed into this thread's store. + dlopen_replay_index: usize, // cage id pub cageid: i32, @@ -191,7 +200,8 @@ impl bool { + let list = self.dlopen_modules.lock().unwrap(); + self.dlopen_replay_index < list.len() + } + + // Snapshot the entries that have not yet been replayed into this thread's store. + pub fn pending_dlopen_entries(&self) -> Vec<(String, String, Module, i32, SymbolMap)> { + let list = self.dlopen_modules.lock().unwrap(); + list[self.dlopen_replay_index..].to_vec() + } + + // Advance the per-thread replay cursor after successful replay. + pub fn advance_dlopen_replay(&mut self, count: usize) { + self.dlopen_replay_index += count; } // The way multi-processing works depends on Asyncify from Binaryen. Asyncify marks the process into 3 states: @@ -431,7 +463,10 @@ impl + Clone + Send + 'static + std::marker::Sync, + U: Clone + Send + 'static + std::marker::Sync, +>( + caller: &mut Caller<'_, T>, + cageid: u64, +) { + let entries = caller.data().get_ctx().pending_dlopen_entries(); + if entries.is_empty() { + return; + } + + let mut table = match caller.get_function_table() { + Some(t) => t, + None => return, + }; + + let got_arc = caller.data().get_ctx().got_table.clone(); + + for (name, _path, module, memory_base, symbol_map) in &entries { + let dylink_info = match module.dylink_meminfo() { + Some(d) => d, + None => continue, + }; + + // Mirror the main-thread dlopen path (load_library_module): record the + // table size before the pre-grow, then grow by dylink_info.table_size. + // module_with_child → apply_GOT_relocs then appends one slot per + // exported function, producing the same absolute indices that the + // main-thread's grow_table_lib calls used. The GOT globals in this + // thread are pre-filled by LindGOT::new_entry from the symbol_cache, + // so they point to the correct indices without needing a GOT update here. + let table_start = table.size(caller.as_context_mut()) as i32; + let _ = table.grow( + caller.as_context_mut(), + dylink_info.table_size as u64, + Ref::Func(None), + ); + + let mut linker = match caller.data().get_ctx().linker.clone() { + Some(l) => l, + None => continue, + }; + linker.allow_shadowing(true); + + if let Some(ref got_arc) = got_arc { + let mut got_guard = got_arc.lock().unwrap(); + let _ = linker.define_GOT_dispatcher(&mut *caller, module, &mut *got_guard); + } + + let _ = linker.module_with_child( + &mut *caller, + cageid, + name, + module, + &mut table, + table_start, + *memory_base, + ChildLibraryType::Process, + &[], + ); + linker.allow_shadowing(false); + + // Register the library's symbols so dlsym works in this thread. + let _ = caller.push_library_symbols(symbol_map.clone()); + } + + caller + .data_mut() + .get_ctx_mut() + .advance_dlopen_replay(entries.len()); +} + // handle all the epoch callback // this is where the wasm instance is directed when epoch is triggered // this function could possibly be on the callstack of the Asyncify operation @@ -31,8 +117,9 @@ pub fn signal_handler< let host = caller.data().clone(); let ctx = host.get_ctx(); let cageid = ctx.cageid as u64; + let tid = ctx.tid; - if cage::signal::thread_check_killed(cageid, ctx.tid as u64) { + if cage::signal::thread_check_killed(cageid, tid as u64) { // If asyncify is already unwinding (e.g. exit_call was already // triggered by a prior thread-only exit via syscall 60), don't // call exit_call again — a double asyncify_start_unwind corrupts @@ -60,6 +147,23 @@ pub fn signal_handler< return 0; } + // Priority: dlopen replay. + // Fires regardless of whether EPOCH_DLOPEN or EPOCH_SIGNAL triggered this + // callback. If a thread was already handling a signal when dlopen fired, + // both the replay and the signal are handled in this single callback. + if caller.data().get_ctx().has_pending_dlopen_replay() { + handle_dlopen_replay(caller, cageid); + } + + // Non-main threads only handle killed (above) and dlopen replay. + // After replay, reset the thread's epoch and return. + if tid != MAIN_THREADID as i32 { + if !caller.data().get_ctx().has_pending_dlopen_replay() { + cage::signal::epoch_thread_reset(cageid, tid); + } + return 0; + } + // retrieve glibc's signal callback function, see line #87 in glibc/sysdeps/unix/sysv/linux/i386/libc_sigaction.c for more detail let signal_func = caller.get_signal_callback().unwrap(); @@ -87,9 +191,11 @@ pub fn signal_handler< break; } - // if this is the last pending (unblocked) signal in list, we should reset epoch - if cage::signal::lind_check_no_pending_signal(cageid) { - cage::signal::signal_epoch_reset(cageid); + // Reset epoch when this is the last pending signal AND no dlopen replay is outstanding. + if cage::signal::lind_check_no_pending_signal(cageid) + && !caller.data().get_ctx().has_pending_dlopen_replay() + { + cage::signal::epoch_thread_reset(cageid, tid); } let (signo, signal_handler, restorer) = signal.unwrap(); @@ -181,5 +287,12 @@ pub fn signal_handler< } } + // If the main thread had only a dlopen replay (no signals), reset the epoch here. + if cage::signal::lind_check_no_pending_signal(cageid) + && !caller.data().get_ctx().has_pending_dlopen_replay() + { + cage::signal::epoch_thread_reset(cageid, tid); + } + 0 } diff --git a/src/wasmtime/crates/lind-utils/src/lib.rs b/src/wasmtime/crates/lind-utils/src/lib.rs index 20883521aa..f884fde8fe 100644 --- a/src/wasmtime/crates/lind-utils/src/lib.rs +++ b/src/wasmtime/crates/lind-utils/src/lib.rs @@ -164,8 +164,19 @@ impl LindGOT { } self.global_offset_table.insert(name, handler as u64); } else { - #[cfg(feature = "debug-dylink")] - println!("[debug] Warning: ignore duplicated GOT entry {}", name); + // Symbol already registered (e.g. main thread's handler is primary). + // This new handler belongs to a different thread's store (dlopen replay). + // Pre-fill it from the cache so the replaying thread's GOT cell is live. + if let Some(cached_val) = self.symbol_cache.get(&name) { + let val = *cached_val; + let cell = unsafe { &*(handler as *const AtomicU32) }; + cell.store(val, Ordering::Release); + #[cfg(feature = "debug-dylink")] + println!( + "[debug] pre-resolve duplicate GOT entry {} = {} from cache", + name, val + ); + } } } diff --git a/src/wasmtime/crates/wasmtime/src/runtime/func.rs b/src/wasmtime/crates/wasmtime/src/runtime/func.rs index 532bb9c912..b82c795d1f 100644 --- a/src/wasmtime/crates/wasmtime/src/runtime/func.rs +++ b/src/wasmtime/crates/wasmtime/src/runtime/func.rs @@ -2424,6 +2424,24 @@ impl Caller<'_, T> { /// Return the current size of the library's function table (table index 0). /// Returns 0 if the module has no indirect function table. + + /// Get the indirect function table as a `Table` usable from host code. + /// + /// Tries the export name `__indirect_function_table` first. Falls back to + /// accessing table 0 by index directly, which is correct in Lind's dylink + /// model where the imported `__indirect_function_table` is always table 0. + pub fn get_function_table(&mut self) -> Option { + if let Some(crate::Extern::Table(t)) = self.get_export("__indirect_function_table") { + return Some(t); + } + // Fallback: access table 0 by index. The table is imported (not + // exported) in worker thread stores, so the name lookup above misses it. + let store_id = self.store.0.id(); + let (instance, _registry) = self.caller.id.get_mut_and_module_registry(self.store.0); + Some(instance.get_exported_table(store_id, TableIndex::from_u32(0))) + } + + /// This directly accesses the underlying VM table structure and queries its size. pub fn get_table_size(&mut self) -> u32 { match self .caller diff --git a/src/wasmtime/crates/wasmtime/src/runtime/linker.rs b/src/wasmtime/crates/wasmtime/src/runtime/linker.rs index 5237723d96..330c20d13a 100644 --- a/src/wasmtime/crates/wasmtime/src/runtime/linker.rs +++ b/src/wasmtime/crates/wasmtime/src/runtime/linker.rs @@ -1380,7 +1380,11 @@ impl Linker { instance.apply_GOT_relocs(&mut store, None, table, None, fpcast_enabled)?; // clone the wasm global for the child instance - instance.apply_global_snapshots(&mut store, snapshots); + // Skip when snapshots is empty (dlopen replay path — globals + // are already set by attach_memory_base/attach_table_base). + if !snapshots.is_empty() { + instance.apply_global_snapshots(&mut store, snapshots); + } if let ChildLibraryType::Thread(stack_addr) = child_type { // if the child library is a thread, we need to initialize the TLS for the library @@ -1423,7 +1427,7 @@ impl Linker { got: &LindGOT, mut symbol_map: SymbolMap, path: String, - ) -> Result + ) -> Result<(u64, i32, SymbolMap)> where T: 'static, { @@ -1601,6 +1605,9 @@ impl Linker { let is_local = symbol_map.is_local(); + // Clone before consuming so callers can store it for cross-thread replay. + let symbol_map_clone = symbol_map.clone(); + // append the symbol mapping of this library into the global lookup table let handler = store.push_library_symbols(symbol_map).unwrap() as u64; @@ -1617,7 +1624,7 @@ impl Linker { self.instance_dylink(store, module_name, instance, vec![]); } - Ok(handler) + Ok((handler, memory_base as i32, symbol_map_clone)) } } } diff --git a/tests/unit-tests/dylink_tests/deterministic/lib.c b/tests/unit-tests/dylink_tests/deterministic/lib.c index 2570e665cb..e88b4103e3 100644 --- a/tests/unit-tests/dylink_tests/deterministic/lib.c +++ b/tests/unit-tests/dylink_tests/deterministic/lib.c @@ -3,3 +3,7 @@ void hello(const char *name) { printf("Hello, %s! (from shared library)\n", name); } + +int add(int a, int b) { + return a + b; +} diff --git a/tests/unit-tests/dylink_tests/deterministic/thread_dlopen_concurrent.c b/tests/unit-tests/dylink_tests/deterministic/thread_dlopen_concurrent.c new file mode 100644 index 0000000000..44ad0dcaba --- /dev/null +++ b/tests/unit-tests/dylink_tests/deterministic/thread_dlopen_concurrent.c @@ -0,0 +1,162 @@ +/* + * thread_dlopen_concurrent.c + * + * Tests Task 3 (issue #1028): dlopen called from one thread while other threads + * are already running. + * + * Scenario: + * 1. Spawn NUM_WORKERS threads. They block on g_ready, signalling to main + * that they are live and registered in the epoch system. + * 2. Main waits at g_ready (ensuring all workers are registered), then calls + * dlopen(). epoch_dlopen_trigger_others fires EPOCH_DLOPEN on every + * registered worker thread. + * 3. Main releases workers via g_go. The epoch check at pthread_barrier_wait's + * function entry delivers the pending EPOCH_DLOPEN (handle_dlopen_replay), + * so lib.cwasm is loaded into each worker's store before any library call. + * 4. Each worker calls the dlopen'd function via a function pointer that was + * passed from the main thread, AND independently via dlsym(). Both paths + * must succeed for the test to pass. + * 5. Main thread joins all workers and verifies exit codes. + * + * Two-barrier design: + * g_ready (count = NUM_WORKERS + 1): + * Workers call this first so main can confirm they are all running + * (registered) before invoking dlopen. Without this guarantee, + * epoch_dlopen_trigger_others might fire before some threads are visible. + * + * g_go (count = NUM_WORKERS + 1): + * Main calls this after dlopen. The epoch check at pthread_barrier_wait's + * Wasm function entry processes EPOCH_DLOPEN (handle_dlopen_replay) before + * workers execute any indirect call into the library. + */ + +#include +#include +#include +#include + +#define NUM_WORKERS 4 + +typedef int (*add_fn)(int, int); + +/* Shared state written by main after dlopen, read by workers after g_go. */ +static void *g_handle = NULL; +static add_fn g_add_fp = NULL; + +static pthread_barrier_t g_ready; /* workers → main: "I am alive and registered" */ +static pthread_barrier_t g_go; /* main → workers: "dlopen is done, proceed" */ + +static void *worker(void *arg) +{ + long id = (long)arg; + + /* Signal to main that this thread is alive and registered in the epoch + * system. Main will not call dlopen until all workers reach here. */ + pthread_barrier_wait(&g_ready); + + /* Wait for main to complete dlopen. + * The epoch check at pthread_barrier_wait's Wasm function entry delivers + * any pending EPOCH_DLOPEN (handle_dlopen_replay), ensuring lib.cwasm is + * installed into this thread's store before we call any library symbol. */ + pthread_barrier_wait(&g_go); + + /* ---- call via inherited function pointer ---- */ + int result1 = g_add_fp(3, 4); + if (result1 != 7) { + fprintf(stderr, "worker %ld: add via fp gave %d, expected 7\n", id, result1); + return (void *)1L; + } + + /* ---- call via independent dlsym in this thread ---- */ + dlerror(); + add_fn fn2 = (add_fn)dlsym(g_handle, "add"); + char *err = dlerror(); + if (err) { + fprintf(stderr, "worker %ld: dlsym failed: %s\n", id, err); + return (void *)2L; + } + if (!fn2) { + fprintf(stderr, "worker %ld: dlsym returned NULL\n", id); + return (void *)3L; + } + + int result2 = fn2(10, 20); + if (result2 != 30) { + fprintf(stderr, "worker %ld: add via dlsym gave %d, expected 30\n", id, result2); + return (void *)4L; + } + + return (void *)0L; +} + +int main(void) +{ + if (pthread_barrier_init(&g_ready, NULL, NUM_WORKERS + 1) != 0 || + pthread_barrier_init(&g_go, NULL, NUM_WORKERS + 1) != 0) { + fprintf(stderr, "barrier_init failed\n"); + return 1; + } + + /* Spawn workers before dlopen so they are live when dlopen fires. */ + pthread_t tids[NUM_WORKERS]; + for (long i = 0; i < NUM_WORKERS; i++) { + if (pthread_create(&tids[i], NULL, worker, (void *)i) != 0) { + fprintf(stderr, "pthread_create failed for worker %ld\n", i); + return 1; + } + } + + /* Wait until all workers have reached g_ready, guaranteeing they are + * registered in the epoch system before we call dlopen. */ + pthread_barrier_wait(&g_ready); + + /* dlopen while all workers are alive and registered. + * epoch_dlopen_trigger_others sets EPOCH_DLOPEN on every worker. */ + g_handle = dlopen("lib.cwasm", RTLD_LAZY | RTLD_GLOBAL); + if (!g_handle) { + fprintf(stderr, "dlopen failed: %s\n", dlerror()); + return 1; + } + + dlerror(); + g_add_fp = (add_fn)dlsym(g_handle, "add"); + char *err = dlerror(); + if (err || !g_add_fp) { + fprintf(stderr, "dlsym(add) failed: %s\n", err ? err : "(null symbol)"); + dlclose(g_handle); + return 1; + } + + /* Confirm the library works in the main thread before releasing workers. */ + int main_result = g_add_fp(1, 2); + if (main_result != 3) { + fprintf(stderr, "main: add gave %d, expected 3\n", main_result); + dlclose(g_handle); + return 1; + } + + /* Release workers. The epoch check at pthread_barrier_wait's entry in + * each worker processes EPOCH_DLOPEN before any library call. */ + pthread_barrier_wait(&g_go); + + int failed = 0; + for (int i = 0; i < NUM_WORKERS; i++) { + void *ret; + pthread_join(tids[i], &ret); + if ((long)ret != 0) { + fprintf(stderr, "worker %d reported error %ld\n", i, (long)ret); + failed = 1; + } + } + + dlclose(g_handle); + pthread_barrier_destroy(&g_ready); + pthread_barrier_destroy(&g_go); + + if (failed) { + fprintf(stderr, "FAIL\n"); + return 1; + } + printf("OK\n"); + return 0; +} From 2097fc90ec2acbd3a0042393fa68bdca67965c8b Mon Sep 17 00:00:00 2001 From: Qianxi Chen Date: Mon, 25 May 2026 10:18:25 +0000 Subject: [PATCH 2/2] update skip_test_cases.txt --- skip_test_cases.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/skip_test_cases.txt b/skip_test_cases.txt index c1c56dfd3a..f647efe603 100644 --- a/skip_test_cases.txt +++ b/skip_test_cases.txt @@ -11,5 +11,6 @@ dylink_tests/deterministic/double_fork_dlopen.c dylink_tests/deterministic/fork_dlopen.c dylink_tests/deterministic/rdynamic_main.c dylink_tests/deterministic/rdynamic_lib.c +dylink_tests/deterministic/thread_dlopen_concurrent.c process_tests/deterministic/fork_max_cages.c ci/deterministic/ci_intentional_failure_tmp.c