@@ -2,8 +2,9 @@ use std::cell::RefCell;
22use std:: collections:: BTreeMap ;
33use std:: io;
44use std:: rc:: { Rc , Weak } ;
5+ use std:: time:: Duration ;
56
6- use crate :: shims:: unix:: fd:: { FdId , FileDescriptionRef } ;
7+ use crate :: shims:: unix:: fd:: { FdId , FileDescriptionRef , WeakFileDescriptionRef } ;
78use crate :: shims:: unix:: * ;
89use crate :: * ;
910
@@ -19,6 +20,8 @@ struct Epoll {
1920 // This is an Rc because EpollInterest need to hold a reference to update
2021 // it.
2122 ready_list : Rc < RefCell < BTreeMap < ( FdId , i32 ) , EpollEventInstance > > > ,
23+ /// A list of thread ids blocked on this epoll instance.
24+ thread_id : RefCell < Vec < ThreadId > > ,
2225}
2326
2427/// EpollEventInstance contains information that will be returned by epoll_wait.
@@ -58,6 +61,8 @@ pub struct EpollEventInterest {
5861 data : u64 ,
5962 /// Ready list of the epoll instance under which this EpollEventInterest is registered.
6063 ready_list : Rc < RefCell < BTreeMap < ( FdId , i32 ) , EpollEventInstance > > > ,
64+ /// The file descriptor value that this EpollEventInterest is registered under.
65+ epfd : i32 ,
6166}
6267
6368/// EpollReadyEvents reflects the readiness of a file description.
@@ -338,6 +343,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
338343 events,
339344 data,
340345 ready_list : Rc :: clone ( ready_list) ,
346+ epfd : epfd_value,
341347 } ) ) ;
342348
343349 if op == epoll_ctl_add {
@@ -395,7 +401,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
395401
396402 /// The `timeout` argument specifies the number of milliseconds that
397403 /// `epoll_wait()` will block. Time is measured against the
398- /// CLOCK_MONOTONIC clock.
404+ /// CLOCK_MONOTONIC clock. If the timeout is zero, the function will not block,
405+ /// while if the timeout is -1, the function will block
406+ /// until at least one event has been retrieved (or an error
407+ /// occurred).
399408
400409 /// A call to `epoll_wait()` will block until either:
401410 /// • a file descriptor delivers an event;
@@ -421,59 +430,100 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
421430 events_op : & OpTy < ' tcx > ,
422431 maxevents : & OpTy < ' tcx > ,
423432 timeout : & OpTy < ' tcx > ,
424- ) -> InterpResult < ' tcx , Scalar > {
433+ dest : & MPlaceTy < ' tcx > ,
434+ ) -> InterpResult < ' tcx > {
425435 let this = self . eval_context_mut ( ) ;
426436
427- let epfd = this. read_scalar ( epfd) ?. to_i32 ( ) ?;
437+ let epfd_value = this. read_scalar ( epfd) ?. to_i32 ( ) ?;
428438 let events = this. read_immediate ( events_op) ?;
429439 let maxevents = this. read_scalar ( maxevents) ?. to_i32 ( ) ?;
430440 let timeout = this. read_scalar ( timeout) ?. to_i32 ( ) ?;
431441
432- if epfd <= 0 || maxevents <= 0 {
442+ if epfd_value <= 0 || maxevents <= 0 {
433443 let einval = this. eval_libc ( "EINVAL" ) ;
434444 this. set_last_error ( einval) ?;
435- return Ok ( Scalar :: from_i32 ( -1 ) ) ;
445+ this. write_int ( -1 , dest) ?;
446+ return Ok ( ( ) ) ;
436447 }
437448
438449 // This needs to come after the maxevents value check, or else maxevents.try_into().unwrap()
439450 // will fail.
440- let events = this. deref_pointer_as (
451+ let event = this. deref_pointer_as (
441452 & events,
442453 this. libc_array_ty_layout ( "epoll_event" , maxevents. try_into ( ) . unwrap ( ) ) ,
443454 ) ?;
444455
445- // FIXME: Implement blocking support
446- if timeout != 0 {
447- throw_unsup_format ! ( "epoll_wait: timeout value can only be 0" ) ;
448- }
449-
450- let Some ( epfd) = this. machine . fds . get ( epfd) else {
451- return Ok ( Scalar :: from_i32 ( this. fd_not_found ( ) ?) ) ;
456+ let Some ( epfd) = this. machine . fds . get ( epfd_value) else {
457+ let result_value: i32 = this. fd_not_found ( ) ?;
458+ this. write_int ( result_value, dest) ?;
459+ return Ok ( ( ) ) ;
452460 } ;
453- let epoll_file_description = epfd
454- . downcast :: < Epoll > ( )
455- . ok_or_else ( || err_unsup_format ! ( "non-epoll FD passed to `epoll_wait`" ) ) ?;
456-
457- let ready_list = epoll_file_description. get_ready_list ( ) ;
458- let mut ready_list = ready_list. borrow_mut ( ) ;
459- let mut num_of_events: i32 = 0 ;
460- let mut array_iter = this. project_array_fields ( & events) ?;
461-
462- while let Some ( des) = array_iter. next ( this) ? {
463- if let Some ( epoll_event_instance) = ready_list_next ( this, & mut ready_list) {
464- this. write_int_fields_named (
465- & [
466- ( "events" , epoll_event_instance. events . into ( ) ) ,
467- ( "u64" , epoll_event_instance. data . into ( ) ) ,
468- ] ,
469- & des. 1 ,
470- ) ?;
471- num_of_events = num_of_events. strict_add ( 1 ) ;
472- } else {
473- break ;
474- }
461+ // Create a weak ref of epfd and pass it to callback so we will make sure that epfd
462+ // is not close after the thread unblocks.
463+ let weak_epfd = epfd. downgrade ( ) ;
464+
465+ // We just need to know if the ready list is empty and borrow the thread_ids out.
466+ // The whole logic is wrapped inside a block so we don't need to manually drop epfd later.
467+ let ready_list_empty;
468+ let mut thread_ids;
469+ {
470+ let epoll_file_description = epfd
471+ . downcast :: < Epoll > ( )
472+ . ok_or_else ( || err_unsup_format ! ( "non-epoll FD passed to `epoll_wait`" ) ) ?;
473+ let binding = epoll_file_description. get_ready_list ( ) ;
474+ ready_list_empty = binding. borrow_mut ( ) . is_empty ( ) ;
475+ thread_ids = epoll_file_description. thread_id . borrow_mut ( ) ;
475476 }
476- Ok ( Scalar :: from_i32 ( num_of_events) )
477+ if timeout == 0 || !ready_list_empty {
478+ // If the ready list is not empty, or the timeout is 0, we can return immediately.
479+ blocking_epoll_callback ( epfd_value, weak_epfd, dest, & event, this) ?;
480+ } else {
481+ // Blocking
482+ let timeout = match timeout {
483+ 0 .. => {
484+ let duration = Duration :: from_millis ( timeout. try_into ( ) . unwrap ( ) ) ;
485+ Some ( ( TimeoutClock :: Monotonic , TimeoutAnchor :: Relative , duration) )
486+ }
487+ -1 => None ,
488+ ..-1 => {
489+ throw_unsup_format ! (
490+ "epoll_wait: Only timeout values greater than or equal to -1 are supported."
491+ ) ;
492+ }
493+ } ;
494+ thread_ids. push ( this. active_thread ( ) ) ;
495+ let dest = dest. clone ( ) ;
496+ this. block_thread (
497+ BlockReason :: Epoll ,
498+ timeout,
499+ callback ! (
500+ @capture<' tcx> {
501+ epfd_value: i32 ,
502+ weak_epfd: WeakFileDescriptionRef ,
503+ dest: MPlaceTy <' tcx>,
504+ event: MPlaceTy <' tcx>,
505+ }
506+ @unblock = |this| {
507+ blocking_epoll_callback( epfd_value, weak_epfd, & dest, & event, this) ?;
508+ Ok ( ( ) )
509+ }
510+ @timeout = |this| {
511+ // No notification after blocking timeout.
512+ let Some ( epfd) = weak_epfd. upgrade( ) else {
513+ throw_unsup_format!( "epoll FD {epfd_value} got closed while blocking." )
514+ } ;
515+ // Remove the current active thread_id from the blocked thread_id list.
516+ epfd. downcast:: <Epoll >( )
517+ . ok_or_else( || err_unsup_format!( "non-epoll FD passed to `epoll_wait`" ) ) ?
518+ . thread_id. borrow_mut( )
519+ . retain( |& id| id != this. active_thread( ) ) ;
520+ this. write_int( 0 , & dest) ?;
521+ Ok ( ( ) )
522+ }
523+ ) ,
524+ ) ;
525+ }
526+ Ok ( ( ) )
477527 }
478528
479529 /// For a specific file description, get its ready events and update the corresponding ready
@@ -483,17 +533,47 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
483533 ///
484534 /// This *will* report an event if anyone is subscribed to it, without any further filtering, so
485535 /// do not call this function when an FD didn't have anything happen to it!
486- fn check_and_update_readiness ( & self , fd_ref : & FileDescriptionRef ) -> InterpResult < ' tcx , ( ) > {
487- let this = self . eval_context_ref ( ) ;
536+ fn check_and_update_readiness (
537+ & mut self ,
538+ fd_ref : & FileDescriptionRef ,
539+ ) -> InterpResult < ' tcx , ( ) > {
540+ let this = self . eval_context_mut ( ) ;
488541 let id = fd_ref. get_id ( ) ;
542+ let mut waiter = Vec :: new ( ) ;
489543 // Get a list of EpollEventInterest that is associated to a specific file description.
490544 if let Some ( epoll_interests) = this. machine . epoll_interests . get_epoll_interest ( id) {
491545 for weak_epoll_interest in epoll_interests {
492546 if let Some ( epoll_interest) = weak_epoll_interest. upgrade ( ) {
493- check_and_update_one_event_interest ( fd_ref, epoll_interest, id, this) ?;
547+ let is_updated = check_and_update_one_event_interest (
548+ fd_ref,
549+ epoll_interest. clone ( ) ,
550+ id,
551+ this,
552+ ) ?;
553+ if is_updated {
554+ // Edge-triggered notification only notify one thread even if there are
555+ // multiple threads block on the same epfd.
556+ let epfd = this. machine . fds . get ( epoll_interest. borrow ( ) . epfd ) . unwrap ( ) ;
557+
558+ // This unwrap can never fail because if the current epoll instance were
559+ // closed and its epfd value reused, the upgrade of weak_epoll_interest
560+ // above would fail. This guarantee holds because only the epoll instance
561+ // holds a strong ref to epoll_interest.
562+ // FIXME: We can randomly pick a thread to unblock.
563+ if let Some ( thread_id) =
564+ epfd. downcast :: < Epoll > ( ) . unwrap ( ) . thread_id . borrow_mut ( ) . pop ( )
565+ {
566+ waiter. push ( thread_id) ;
567+ } ;
568+ }
494569 }
495570 }
496571 }
572+ waiter. sort ( ) ;
573+ waiter. dedup ( ) ;
574+ for thread_id in waiter {
575+ this. unblock_thread ( thread_id, BlockReason :: Epoll ) ?;
576+ }
497577 Ok ( ( ) )
498578 }
499579}
@@ -517,14 +597,15 @@ fn ready_list_next(
517597}
518598
519599/// This helper function checks whether an epoll notification should be triggered for a specific
520- /// epoll_interest and, if necessary, triggers the notification. Unlike check_and_update_readiness,
521- /// this function sends a notification to only one epoll instance.
600+ /// epoll_interest and, if necessary, triggers the notification, and returns whether the
601+ /// notification was added/updated. Unlike check_and_update_readiness, this function sends a
602+ /// notification to only one epoll instance.
522603fn check_and_update_one_event_interest < ' tcx > (
523604 fd_ref : & FileDescriptionRef ,
524605 interest : Rc < RefCell < EpollEventInterest > > ,
525606 id : FdId ,
526607 ecx : & MiriInterpCx < ' tcx > ,
527- ) -> InterpResult < ' tcx > {
608+ ) -> InterpResult < ' tcx , bool > {
528609 // Get the bitmask of ready events for a file description.
529610 let ready_events_bitmask = fd_ref. get_epoll_ready_events ( ) ?. get_event_bitmask ( ecx) ;
530611 let epoll_event_interest = interest. borrow ( ) ;
@@ -539,6 +620,46 @@ fn check_and_update_one_event_interest<'tcx>(
539620 let event_instance = EpollEventInstance :: new ( flags, epoll_event_interest. data ) ;
540621 // Triggers the notification by inserting it to the ready list.
541622 ready_list. insert ( epoll_key, event_instance) ;
623+ return Ok ( true ) ;
624+ }
625+ return Ok ( false ) ;
626+ }
627+
628+ /// Callback function after epoll_wait unblocks
629+ fn blocking_epoll_callback < ' tcx > (
630+ epfd_value : i32 ,
631+ weak_epfd : WeakFileDescriptionRef ,
632+ dest : & MPlaceTy < ' tcx > ,
633+ events : & MPlaceTy < ' tcx > ,
634+ ecx : & mut MiriInterpCx < ' tcx > ,
635+ ) -> InterpResult < ' tcx > {
636+ let Some ( epfd) = weak_epfd. upgrade ( ) else {
637+ throw_unsup_format ! ( "epoll FD {epfd_value} got closed while blocking." )
638+ } ;
639+
640+ let epoll_file_description = epfd
641+ . downcast :: < Epoll > ( )
642+ . ok_or_else ( || err_unsup_format ! ( "non-epoll FD passed to `epoll_wait`" ) ) ?;
643+
644+ let ready_list = epoll_file_description. get_ready_list ( ) ;
645+ let mut ready_list = ready_list. borrow_mut ( ) ;
646+ let mut num_of_events: i32 = 0 ;
647+ let mut array_iter = ecx. project_array_fields ( events) ?;
648+
649+ while let Some ( des) = array_iter. next ( ecx) ? {
650+ if let Some ( epoll_event_instance) = ready_list_next ( ecx, & mut ready_list) {
651+ ecx. write_int_fields_named (
652+ & [
653+ ( "events" , epoll_event_instance. events . into ( ) ) ,
654+ ( "u64" , epoll_event_instance. data . into ( ) ) ,
655+ ] ,
656+ & des. 1 ,
657+ ) ?;
658+ num_of_events = num_of_events. strict_add ( 1 ) ;
659+ } else {
660+ break ;
661+ }
542662 }
663+ ecx. write_int ( num_of_events, dest) ?;
543664 Ok ( ( ) )
544665}
0 commit comments