@@ -769,8 +769,8 @@ ReplicaActive::~ReplicaActive()
769769
770770/*
771771 * Note: we are expected to be in the initial internal state (Idle) when
772- * receiving any registration request. Our other internal states, the
773- * active ones, have their own handler for this event, and will treat it
772+ * receiving any registration request. ReplicaActiveOp, our other internal
773+ * state, has its own handler for this event, and will treat it
774774 * as an abort request.
775775 *
776776 * Process:
@@ -787,75 +787,101 @@ ReplicaActive::~ReplicaActive()
787787 * implementation note: sc::result objects cannot be copied or moved. Thus,
788788 * we've resorted to returning a code indicating the next action.
789789 */
790- ReplicaReactCode ReplicaActive::on_reserve_request (
791- const ReplicaReserveReq& ev,
792- bool async_request)
790+ sc::result ReplicaActive::react (const ReplicaReserveReq& ev)
793791{
794792 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
795- const auto m = ev.m_op ->get_req <MOSDScrubReserve>();
793+ const auto & m = *(ev.m_op ->get_req <MOSDScrubReserve>());
794+
795+ // should we handle the request asynchronously, using the reserver?
796+ const auto async_disabled = scrbr->get_pg_cct ()->_conf .get_val <bool >(
797+ " osd_scrub_disable_reservation_queuing" );
798+ const bool async_request = !async_disabled && m.wait_for_resources ;
796799 dout (10 ) << fmt::format (
797- " ReplicaActive::on_reserve_req() request:{} async_request:{} "
798- " reservation_nonce: {}" ,
799- ev, async_request, m-> reservation_nonce )
800+ " ReplicaActive::react(const ReplicaReserveReq&): async "
801+ " request?:{} disabled?:{} -> async? {}" ,
802+ m. wait_for_resources , async_disabled, async_request )
800803 << dendl;
801804
802- ceph_assert (!reservation_granted);
803- ceph_assert (!pending_reservation_nonce);
804- ReplicaReactCode next_action{ReplicaReactCode::discard};
805- AsyncScrubResData request_details{
806- pg_id, ev.m_from , ev.m_op ->sent_epoch , m->reservation_nonce };
805+ if (m_reservation_status != reservation_status_t ::unreserved) {
806+ // we are not expected to be in this state when a new request arrives.
807+ // Exit-then-reenter ReplicaActive, causing the existing reservation - be
808+ // it granted or pending - to be cleared. This maneuver also aborts
809+ // any on-going chunk request.
810+ dout (1 ) << fmt::format (
811+ " {}: unexpected request (granted:{} request:{})" , __func__,
812+ reservation_granted, m)
813+ << dendl;
814+ post_event (ev);
815+ return transit<ReplicaActive>();
816+ }
817+
807818 auto & reserver = m_osds->get_scrub_reserver ();
808819
809820 if (async_request) {
810821 // the request is to be handled asynchronously
811- dout (20 ) << fmt::format (
812- " {}: async request: {} details:{}" , __func__, ev,
813- request_details)
822+ AsyncScrubResData request_details{
823+ pg_id, ev.m_from , ev.m_op ->sent_epoch , m.reservation_nonce };
824+ dout (15 ) << fmt::format (
825+ " ReplicaActive::react(const ReplicaReserveReq& ev): async "
826+ " request: {} details:{}" ,
827+ ev, request_details)
814828 << dendl;
815- pending_reservation_nonce = m->reservation_nonce ;
829+
830+ pending_reservation_nonce = m.reservation_nonce ;
816831 const auto reservation_cb = new RtReservationCB (m_pg, request_details);
817- reserver.request_reservation (pg_id, reservation_cb, 0 , nullptr );
818- next_action = ReplicaReactCode::goto_waiting_reservation ;
832+ reserver.request_reservation (pg_id, reservation_cb, /* prio= */ 0 , nullptr );
833+ m_reservation_status = reservation_status_t ::requested_or_granted ;
819834
820835 } else {
821836 // an immediate yes/no is required
822837 Message* reply{nullptr };
823838 reservation_granted = reserver.request_reservation_or_fail (pg_id);
824839 if (reservation_granted) {
825840 dout (10 ) << fmt::format (" {}: reserved? yes" , __func__) << dendl;
841+ m_reservation_status = reservation_status_t ::requested_or_granted;
826842 reply = new MOSDScrubReserve (
827843 spg_t (pg_id.pgid , m_pg->get_primary ().shard ), ev.m_op ->sent_epoch ,
828- MOSDScrubReserve::GRANT, m_pg->pg_whoami , m->reservation_nonce );
829- next_action = ReplicaReactCode::goto_replica_reserved;
844+ MOSDScrubReserve::GRANT, m_pg->pg_whoami , m.reservation_nonce );
830845
831846 } else {
832847 dout (10 ) << fmt::format (" {}: reserved? no" , __func__) << dendl;
833848 reply = new MOSDScrubReserve (
834849 spg_t (pg_id.pgid , m_pg->get_primary ().shard ), ev.m_op ->sent_epoch ,
835- MOSDScrubReserve::REJECT, m_pg->pg_whoami , m->reservation_nonce );
836- // the event is discarded
837- next_action = ReplicaReactCode::discard;
850+ MOSDScrubReserve::REJECT, m_pg->pg_whoami , m.reservation_nonce );
838851 }
839852
840853 m_osds->send_message_osd_cluster (
841854 reply, ev.m_op ->get_req ()->get_connection ());
842855 }
843-
844- return next_action;
856+ return discard_event ();
845857}
846858
847- bool ReplicaActive::granted_by_reserver (const AsyncScrubResData& reservation)
859+
860+ sc::result ReplicaActive::react (const ReserverGranted& ev)
848861{
849862 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
850- dout (10 ) << fmt::format (" {}: reservation granted: {}" , __func__, reservation)
851- << dendl;
863+ const AsyncScrubResData& reservation = ev.value ;
864+ dout (10 )
865+ << fmt::format (
866+ " ReplicaActive::react(const ReserverGranted&). Reservation:{}" ,
867+ reservation)
868+ << dendl;
852869
853- // / verify that the granted reservation is the one we were waiting for
870+ /* *
871+ * discard (and log) unexpected 'reservation granted' messages
872+ * from the async reserver. 'Unexpected' here - either not carrying the
873+ * ID of our last request from the reserver, or arriving when there is
874+ * no 'open request' made to the reserver.
875+ * As canceled reservations may still be triggered, this is not
876+ * necessarily a bug.
877+ */
854878 if (reservation.nonce != pending_reservation_nonce) {
855879 dout (5 ) << fmt::format (
856- " {}: reservation_nonce mismatch: {} != {}" , __func__, reservation.nonce ,
857- pending_reservation_nonce) << dendl;
858- return false ;
880+ " ReplicaActive::react(const ReserverGranted&): "
881+ " reservation_nonce mismatch: {} != {}" ,
882+ reservation.nonce , pending_reservation_nonce)
883+ << dendl;
884+ return discard_event ();
859885 }
860886
861887 reservation_granted = true ;
@@ -867,9 +893,10 @@ bool ReplicaActive::granted_by_reserver(const AsyncScrubResData& reservation)
867893 MOSDScrubReserve::GRANT, m_pg->pg_whoami , pending_reservation_nonce);
868894 m_pg->send_cluster_message (
869895 m_pg->get_primary ().osd , grant_msg, reservation.request_epoch , false );
870- return true ;
896+ return discard_event () ;
871897}
872898
899+
873900void ReplicaActive::on_release (const ReplicaRelease& ev)
874901{
875902 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -899,12 +926,6 @@ void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
899926 }
900927}
901928
902- void ReplicaActive::ignore_unhandled_grant (const ReserverGranted&)
903- {
904- dout (10 ) << " ReplicaActive::react(const ReserverGranted&): ignored"
905- << dendl;
906- }
907-
908929
909930// ---------------- ReplicaActive/ReplicaIdle ---------------------------
910931
@@ -934,34 +955,6 @@ ReplicaUnreserved::ReplicaUnreserved(my_context ctx)
934955 << dendl;
935956}
936957
937- sc::result ReplicaUnreserved::react (const ReplicaReserveReq& ev)
938- {
939- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
940- dout (10 ) << " ReplicaUnreserved::react(const ReplicaReserveReq&)" << dendl;
941-
942- const auto & m = *(ev.m_op ->get_req <MOSDScrubReserve>());
943- const auto async_disabled = scrbr->get_pg_cct ()->_conf .get_val <bool >(
944- " osd_scrub_disable_reservation_queuing" );
945- const bool async_request = !async_disabled && m.wait_for_resources ;
946- dout (15 ) << fmt::format (
947- " ReplicaUnreserved::react(const ReplicaReserveReq&): "
948- " request:{} disabled?:{} -> async? {}" , m.wait_for_resources ,
949- async_disabled, async_request)
950- << dendl;
951-
952- switch (context<ReplicaActive>().on_reserve_request (ev, async_request)) {
953- case ReplicaReactCode::discard:
954- return discard_event ();
955- case ReplicaReactCode::goto_waiting_reservation:
956- return transit<ReplicaWaitingReservation>();
957- case ReplicaReactCode::goto_replica_reserved:
958- return transit<ReplicaReserved>();
959- default :
960- ceph_abort_msg (" unexpected return value" );
961- }
962- // can't happen, but some compilers complain:
963- return transit<ReplicaReserved>();
964- }
965958
966959sc::result ReplicaUnreserved::react (const StartReplica& ev)
967960{
@@ -983,20 +976,6 @@ sc::result ReplicaUnreserved::react(const ReplicaRelease&)
983976 return discard_event ();
984977}
985978
986- sc::result ReplicaUnreserved::react (const ReserverGranted&)
987- {
988- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
989- dout (10 ) << " ReplicaUnreserved::react(const ReserverGranted&)" << dendl;
990- // shouldn't happen. Might be a result of a cancelled reservation
991- // that was still delivered.
992- // must unreserve
993- dout (5 ) << " ReplicaUnreserved::react(const ReserverGranted&): reservation "
994- " granted while not being waited for"
995- << dendl;
996- context<ReplicaActive>().clear_remote_reservation (false );
997- return discard_event ();
998- }
999-
1000979
1001980// ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
1002981
@@ -1011,20 +990,6 @@ ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx)
1011990 << dendl;
1012991}
1013992
1014- sc::result ReplicaWaitingReservation::react (const ReserverGranted& ev)
1015- {
1016- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
1017- dout (10 ) << fmt::format (
1018- " ReplicaWaitingReservation::react(const ReserverGranted&): "
1019- " event:{}" ,
1020- ev)
1021- << dendl;
1022- if (context<ReplicaActive>().granted_by_reserver (ev.value )) {
1023- return transit<ReplicaReserved>();
1024- }
1025- return discard_event ();
1026- }
1027-
1028993sc::result ReplicaWaitingReservation::react (const ReplicaRelease& ev)
1029994{
1030995 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -1050,21 +1015,6 @@ sc::result ReplicaWaitingReservation::react(const StartReplica& ev)
10501015 return transit<ReplicaActiveOp>();
10511016}
10521017
1053- sc::result ReplicaWaitingReservation::react (const ReplicaReserveReq& ev)
1054- {
1055- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
1056- dout (10 ) << " ReplicaWaitingReservation::react(const ReplicaReserveReq&)"
1057- << dendl;
1058- // this shouldn't happen. We will handle it, but will also log an error.
1059- scrbr->get_clog ()->error () << fmt::format (
1060- " osd.{} pg[{}]: reservation requested while previous is pending" ,
1061- scrbr->get_whoami (), scrbr->get_spgid ());
1062- // cancel the existing reservation, and re-request
1063- context<ReplicaActive>().clear_remote_reservation (true );
1064- post_event (ev);
1065- return transit<ReplicaUnreserved>();
1066- }
1067-
10681018
10691019// ---------------- ReplicaIdle/ReplicaReserved ---------------------------
10701020
@@ -1086,22 +1036,6 @@ sc::result ReplicaReserved::react(const ReplicaRelease& ev)
10861036 return transit<ReplicaUnreserved>();
10871037}
10881038
1089- sc::result ReplicaReserved::react (const ReplicaReserveReq& ev)
1090- {
1091- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
1092- dout (10 ) << " ReplicaReserved::react(const ReplicaReserveReq&)" << dendl;
1093- scrbr->get_clog ()->error () << fmt::format (
1094- " osd.{} pg[{}]: reservation requested while still reserved" ,
1095- scrbr->get_whoami (), scrbr->get_spgid ());
1096- // This is a bug. We should never receive a new request unless the
1097- // previous one was cancelled - either by the primary, or on interval
1098- // change.
1099- // cancel the existing reservation, and re-request
1100- context<ReplicaActive>().clear_remote_reservation (true );
1101- post_event (ev);
1102- return transit<ReplicaUnreserved>();
1103- }
1104-
11051039sc::result ReplicaReserved::react (const StartReplica& ev)
11061040{
11071041 DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -1146,6 +1080,21 @@ sc::result ReplicaActiveOp::react(const StartReplica&)
11461080 return transit<ReplicaActiveOp>();
11471081}
11481082
1083+
1084+ sc::result ReplicaActiveOp::react (const ReplicaReserveReq& ev)
1085+ {
1086+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
1087+ dout (10 ) << " ReplicaActiveOp::react(const ReplicaReserveReq&)" << dendl;
1088+ scrbr->get_clog ()->warn () << fmt::format (
1089+ " osd.{} pg[{}]: reservation requested while processing a chunk" ,
1090+ scrbr->get_whoami (), scrbr->get_spgid ());
1091+ // exit-n-renter ReplicaActive (aborting the chunk & freeing the reservation
1092+ // in the process)
1093+ post_event (ev);
1094+ return transit<ReplicaActive>();
1095+ }
1096+
1097+
11491098sc::result ReplicaActiveOp::react (const ReplicaRelease& ev)
11501099{
11511100 dout (10 ) << " ReplicaActiveOp::react(const ReplicaRelease&)" << dendl;
0 commit comments