Skip to content

Commit f1742b2

Browse files
committed
Simplify cancellation
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent 2afd1a9 commit f1742b2

File tree

7 files changed

+835
-804
lines changed

7 files changed

+835
-804
lines changed

docs/cancellation.md

Lines changed: 439 additions & 0 deletions
Large diffs are not rendered by default.

src/hyperlight_host/src/hypervisor/hyperv_linux.rs

Lines changed: 20 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extern crate mshv_bindings;
1818
extern crate mshv_ioctls;
1919

2020
use std::fmt::{Debug, Formatter};
21-
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
21+
use std::sync::atomic::{AtomicBool, AtomicU64};
2222
use std::sync::{Arc, Mutex};
2323

2424
use log::{LevelFilter, error};
@@ -51,8 +51,8 @@ use super::gdb::{
5151
use super::{HyperlightExit, Hypervisor, LinuxInterruptHandle, VirtualCPU};
5252
#[cfg(gdb)]
5353
use crate::HyperlightError;
54-
use crate::hypervisor::get_memory_access_violation;
5554
use crate::hypervisor::regs::CommonFpu;
55+
use crate::hypervisor::{InterruptHandle, InterruptHandleImpl, get_memory_access_violation};
5656
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
5757
use crate::mem::mgr::SandboxMemoryManager;
5858
use crate::mem::ptr::{GuestPtr, RawPtr};
@@ -273,7 +273,7 @@ pub(crate) struct HypervLinuxDriver {
273273
vcpu_fd: VcpuFd,
274274
orig_rsp: GuestPtr,
275275
entrypoint: u64,
276-
interrupt_handle: Arc<LinuxInterruptHandle>,
276+
interrupt_handle: Arc<dyn InterruptHandleImpl>,
277277
mem_mgr: Option<SandboxMemoryManager<HostSharedMemory>>,
278278
host_funcs: Option<Arc<Mutex<FunctionRegistry>>>,
279279

@@ -374,10 +374,8 @@ impl HypervLinuxDriver {
374374
vm_fd.map_user_memory(mshv_region)
375375
})?;
376376

377-
let interrupt_handle = Arc::new(LinuxInterruptHandle {
378-
running: AtomicU64::new(0),
379-
cancel_requested: AtomicU64::new(0),
380-
call_active: AtomicBool::new(false),
377+
let interrupt_handle: Arc<dyn InterruptHandleImpl> = Arc::new(LinuxInterruptHandle {
378+
state: AtomicU64::new(0),
381379
#[cfg(gdb)]
382380
debug_interrupt: AtomicBool::new(false),
383381
#[cfg(all(
@@ -500,8 +498,11 @@ impl Hypervisor for HypervLinuxDriver {
500498
};
501499
self.vcpu_fd.set_regs(&regs)?;
502500

501+
let interrupt_handle = self.interrupt_handle.clone();
502+
503503
VirtualCPU::run(
504504
self.as_mut_hypervisor(),
505+
interrupt_handle,
505506
#[cfg(gdb)]
506507
dbg_mem_access_fn,
507508
)
@@ -560,14 +561,15 @@ impl Hypervisor for HypervLinuxDriver {
560561
// reset fpu state
561562
self.set_fpu(&CommonFpu::default())?;
562563

564+
let interrupt_handle = self.interrupt_handle.clone();
565+
563566
// run
564567
VirtualCPU::run(
565568
self.as_mut_hypervisor(),
569+
interrupt_handle,
566570
#[cfg(gdb)]
567571
dbg_mem_access_fn,
568-
)?;
569-
570-
Ok(())
572+
)
571573
}
572574

573575
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
@@ -643,76 +645,11 @@ impl Hypervisor for HypervLinuxDriver {
643645
#[cfg(gdb)]
644646
const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT;
645647

646-
self.interrupt_handle
647-
.tid
648-
.store(unsafe { libc::pthread_self() as u64 }, Ordering::Release);
649-
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
650-
// Cast to internal trait for access to internal methods
651-
let interrupt_handle_internal =
652-
self.interrupt_handle.as_ref() as &dyn super::InterruptHandleInternal;
653-
654-
// (after set_running_bit but before checking cancel_requested):
655-
// - kill() will stamp cancel_requested with the current generation
656-
// - We will check cancel_requested below and skip the VcpuFd::run() call
657-
// - This is the desired behavior - the kill takes effect immediately
658-
let generation = interrupt_handle_internal.set_running_bit();
659-
660-
#[cfg(not(gdb))]
661-
let debug_interrupt = false;
662-
#[cfg(gdb)]
663-
let debug_interrupt = self
664-
.interrupt_handle
665-
.debug_interrupt
666-
.load(Ordering::Relaxed);
667-
668-
// Don't run the vcpu if `cancel_requested` is set for our generation
669-
//
670-
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
671-
// (after checking cancel_requested but before vcpu.run()):
672-
// - kill() will stamp cancel_requested with the current generation
673-
// - We will proceed with vcpu.run(), but signals will be sent to interrupt it
674-
// - The vcpu will be interrupted and return EINTR (handled below)
675-
let exit_reason = if interrupt_handle_internal
676-
.is_cancel_requested_for_generation(generation)
677-
|| debug_interrupt
678-
{
679-
Err(mshv_ioctls::MshvError::from(libc::EINTR))
680-
} else {
681-
#[cfg(feature = "trace_guest")]
682-
tc.setup_guest_trace(Span::current().context());
683-
684-
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
685-
// Then the vcpu will run, but we will keep sending signals to this thread
686-
// to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
687-
// return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
688-
// both of which are fine.
689-
self.vcpu_fd.run()
690-
};
691-
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
692-
// (after vcpu.run() returns but before clear_running_bit):
693-
// - kill() continues sending signals to this thread (running bit is still set)
694-
// - The signals are harmless (no-op handler), we just need to check cancel_requested
695-
// - We load cancel_requested below to determine if this run was cancelled
696-
let cancel_requested =
697-
interrupt_handle_internal.is_cancel_requested_for_generation(generation);
698-
#[cfg(gdb)]
699-
let debug_interrupt = self
700-
.interrupt_handle
701-
.debug_interrupt
702-
.load(Ordering::Relaxed);
703-
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
704-
// (after loading cancel_requested but before clear_running_bit):
705-
// - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded)
706-
// - kill() continues sending signals until running bit is cleared
707-
// - The newly stamped cancel_requested will affect the NEXT vcpu.run() call
708-
// - Signals sent now are harmless (no-op handler)
709-
interrupt_handle_internal.clear_running_bit();
710-
// At this point, running bit is clear so kill() will stop sending signals.
711-
// However, we may still receive delayed signals that were sent before clear_running_bit.
712-
// These stale signals are harmless because:
713-
// - The signal handler is a no-op
714-
// - We check generation matching in cancel_requested before treating EINTR as cancellation
715-
// - If generation doesn't match, we return Retry instead of Cancelled
648+
#[cfg(feature = "trace_guest")]
649+
tc.setup_guest_trace(Span::current().context());
650+
651+
let exit_reason = self.vcpu_fd.run();
652+
716653
let result = match exit_reason {
717654
Ok(m) => match m.header.message_type {
718655
HALT_MESSAGE => {
@@ -793,35 +730,7 @@ impl Hypervisor for HypervLinuxDriver {
793730
},
794731
Err(e) => match e.errno() {
795732
// We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR
796-
libc::EINTR => {
797-
// Check if cancellation was requested for THIS specific generation.
798-
// If not, the EINTR came from:
799-
// - A debug interrupt (if GDB is enabled)
800-
// - A stale signal from a previous guest call (generation mismatch)
801-
// - A signal meant for a different sandbox on the same thread
802-
// In these cases, we return Retry to continue execution.
803-
if cancel_requested {
804-
interrupt_handle_internal.clear_cancel_requested();
805-
HyperlightExit::Cancelled()
806-
} else {
807-
#[cfg(gdb)]
808-
if debug_interrupt {
809-
self.interrupt_handle
810-
.debug_interrupt
811-
.store(false, Ordering::Relaxed);
812-
813-
// If the vCPU was stopped because of an interrupt, we need to
814-
// return a special exit reason so that the gdb thread can handle it
815-
// and resume execution
816-
HyperlightExit::Debug(VcpuStopReason::Interrupt)
817-
} else {
818-
HyperlightExit::Retry()
819-
}
820-
821-
#[cfg(not(gdb))]
822-
HyperlightExit::Retry()
823-
}
824-
}
733+
libc::EINTR => HyperlightExit::Cancelled(),
825734
libc::EAGAIN => HyperlightExit::Retry(),
826735
_ => {
827736
crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self);
@@ -870,7 +779,7 @@ impl Hypervisor for HypervLinuxDriver {
870779
self as &mut dyn Hypervisor
871780
}
872781

873-
fn interrupt_handle(&self) -> Arc<dyn super::InterruptHandleInternal> {
782+
fn interrupt_handle(&self) -> Arc<dyn InterruptHandle> {
874783
self.interrupt_handle.clone()
875784
}
876785

@@ -1102,7 +1011,7 @@ impl Hypervisor for HypervLinuxDriver {
11021011
impl Drop for HypervLinuxDriver {
11031012
#[instrument(skip_all, parent = Span::current(), level = "Trace")]
11041013
fn drop(&mut self) {
1105-
self.interrupt_handle.dropped.store(true, Ordering::Relaxed);
1014+
self.interrupt_handle.set_dropped();
11061015
for region in self.sandbox_regions.iter().chain(self.mmap_regions.iter()) {
11071016
let mshv_region: mshv_user_mem_region = region.to_owned().into();
11081017
match self.vm_fd.unmap_user_memory(mshv_region) {

0 commit comments

Comments
 (0)