Skip to content

Commit 765d475

Browse files
committed
osd/scrub: handle Release messages directly in ReplicaIdle
The expected path is that the scrubber will be in the ReplicaIdle sub-state. If, however, we are processing a chunk (ReplicaActiveOp) - the scrub is aborted, and the scrubber is returned to the ReplicaIdle. Signed-off-by: Ronen Friedman <[email protected]>
1 parent ec381fa commit 765d475

File tree

2 files changed

+29
-69
lines changed

2 files changed

+29
-69
lines changed

src/osd/scrubber/scrub_machine.cc

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -897,14 +897,6 @@ sc::result ReplicaActive::react(const ReserverGranted& ev)
897897
}
898898

899899

900-
void ReplicaActive::on_release(const ReplicaRelease& ev)
901-
{
902-
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
903-
dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from)
904-
<< dendl;
905-
clear_remote_reservation(true);
906-
}
907-
908900
void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
909901
{
910902
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -917,16 +909,31 @@ void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
917909
m_osds->get_scrub_reserver().cancel_reservation(pg_id);
918910
reservation_granted = false;
919911
pending_reservation_nonce = 0;
912+
ceph_assert(m_reservation_status != reservation_status_t::unreserved);
913+
m_reservation_status = reservation_status_t::unreserved;
914+
920915
} else if (warn_if_no_reservation) {
921916
const auto msg =
922917
"ReplicaActive::clear_remote_reservation(): "
923918
"not reserved!";
924919
dout(5) << msg << dendl;
925-
scrbr->get_clog()->warn() << msg;
920+
scrbr->get_clog()->info() << msg;
926921
}
927922
}
928923

929924

925+
sc::result ReplicaActive::react(const ReplicaRelease& ev)
926+
{
927+
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
928+
dout(10) << fmt::format(
929+
"ReplicaActive::react(const ReplicaRelease&) from {}",
930+
ev.m_from)
931+
<< dendl;
932+
clear_remote_reservation(true);
933+
return discard_event();
934+
}
935+
936+
930937
// ---------------- ReplicaActive/ReplicaIdle ---------------------------
931938

932939
ReplicaIdle::ReplicaIdle(my_context ctx)
@@ -964,18 +971,6 @@ sc::result ReplicaUnreserved::react(const StartReplica& ev)
964971
return transit<ReplicaActiveOp>();
965972
}
966973

967-
sc::result ReplicaUnreserved::react(const ReplicaRelease&)
968-
{
969-
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
970-
dout(10) << "ReplicaUnreserved::react(const ReplicaRelease&)" << dendl;
971-
// this is a bug. We should never receive a release request unless we
972-
// are reserved or have a pending reservation.
973-
scrbr->get_clog()->error() << fmt::format(
974-
"osd.{} pg[{}]: reservation released while not reserved",
975-
scrbr->get_whoami(), scrbr->get_spgid());
976-
return discard_event();
977-
}
978-
979974

980975
// ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
981976

@@ -990,16 +985,6 @@ ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx)
990985
<< dendl;
991986
}
992987

993-
sc::result ReplicaWaitingReservation::react(const ReplicaRelease& ev)
994-
{
995-
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
996-
dout(10) << "ReplicaWaitingReservation::react(const ReplicaRelease&)"
997-
<< dendl;
998-
// must cancel the queued reservation request
999-
context<ReplicaActive>().on_release(ev);
1000-
return transit<ReplicaUnreserved>();
1001-
}
1002-
1003988
sc::result ReplicaWaitingReservation::react(const StartReplica& ev)
1004989
{
1005990
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -1028,14 +1013,6 @@ ReplicaReserved::ReplicaReserved(my_context ctx)
10281013
<< dendl;
10291014
}
10301015

1031-
sc::result ReplicaReserved::react(const ReplicaRelease& ev)
1032-
{
1033-
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
1034-
dout(10) << "ReplicaReserved::react(const ReplicaRelease&)" << dendl;
1035-
context<ReplicaActive>().on_release(ev);
1036-
return transit<ReplicaUnreserved>();
1037-
}
1038-
10391016
sc::result ReplicaReserved::react(const StartReplica& ev)
10401017
{
10411018
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
@@ -1098,8 +1075,7 @@ sc::result ReplicaActiveOp::react(const ReplicaReserveReq& ev)
10981075
sc::result ReplicaActiveOp::react(const ReplicaRelease& ev)
10991076
{
11001077
dout(10) << "ReplicaActiveOp::react(const ReplicaRelease&)" << dendl;
1101-
post_event(ev);
1102-
return transit<ReplicaReserved>();
1078+
return transit<ReplicaActive>();
11031079
}
11041080

11051081

src/osd/scrubber/scrub_machine.h

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -769,16 +769,6 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
769769
* - No scrubbing is performed in this state, but reservation-related
770770
* events are handled.
771771
*
772-
* - sub-states:
773-
* * ReplicaUnreserved - not reserved by a primary. In this state we
774-
* are waiting for either a reservation request, or a chunk scrub op.
775-
*
776-
* * ReplicaWaitingReservation - a reservation request was received from
777-
* our primary. We expect a ' go ahead' from the reserver, or a
778-
* cancellation command from the primary (or an interval change).
779-
*
780-
* * ReplicaReserved - we are reserved by a primary.
781-
*
782772
* - ReplicaActiveOp - handling a single map request op
783773
* * ReplicaWaitUpdates
784774
* * ReplicaBuildingMap
@@ -826,9 +816,6 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
826816
explicit ReplicaActive(my_context ctx);
827817
~ReplicaActive();
828818

829-
/// handle a 'release' from a primary
830-
void on_release(const ReplicaRelease& ev);
831-
832819
/**
833820
* cancel a granted or pending reservation
834821
*
@@ -842,11 +829,18 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
842829
using reactions = mpl::list<
843830
sc::transition<IntervalChanged, NotActive>,
844831
sc::custom_reaction<ReserverGranted>,
845-
sc::custom_reaction<ReplicaReserveReq>>;
832+
sc::custom_reaction<ReplicaReserveReq>,
833+
sc::custom_reaction<ReplicaRelease>>;
846834

847835
/// handle a reservation request from a primary
848836
sc::result react(const ReplicaReserveReq& ev);
849837

838+
/*
839+
* the Primary released the reservation.
840+
* Note: The ActiveReplicaOp state will handle this event as well.
841+
*/
842+
sc::result react(const ReplicaRelease&);
843+
850844
/**
851845
* the queued reservation request was granted by the async reserver.
852846
* Notify the Primary.
@@ -942,12 +936,9 @@ struct ReplicaUnreserved : sc::state<ReplicaUnreserved, ReplicaIdle>,
942936
explicit ReplicaUnreserved(my_context ctx);
943937

944938
using reactions = mpl::list<
945-
sc::custom_reaction<StartReplica>,
946-
// unexpected (bug-induced) events:
947-
sc::custom_reaction<ReplicaRelease>>;
939+
sc::custom_reaction<StartReplica>>;
948940

949941
sc::result react(const StartReplica& ev);
950-
sc::result react(const ReplicaRelease&);
951942
};
952943

953944
/**
@@ -968,11 +959,8 @@ struct ReplicaWaitingReservation
968959
explicit ReplicaWaitingReservation(my_context ctx);
969960

970961
using reactions = mpl::list<
971-
// the 'normal' (expected) events:
972-
sc::custom_reaction<ReplicaRelease>,
973962
sc::custom_reaction<StartReplica>>;
974963

975-
sc::result react(const ReplicaRelease& ev);
976964
sc::result react(const StartReplica& ev);
977965
};
978966

@@ -990,10 +978,8 @@ struct ReplicaReserved : sc::state<ReplicaReserved, ReplicaIdle>, NamedSimply {
990978
explicit ReplicaReserved(my_context ctx);
991979

992980
using reactions = mpl::list<
993-
sc::custom_reaction<StartReplica>,
994-
sc::custom_reaction<ReplicaRelease>>;
981+
sc::custom_reaction<StartReplica>>;
995982

996-
sc::result react(const ReplicaRelease&);
997983
sc::result react(const StartReplica& eq);
998984
};
999985

@@ -1029,10 +1015,8 @@ struct ReplicaActiveOp
10291015

10301016
/**
10311017
* a 'release' was send by the primary. Possible scenario: 'no-scrub'
1032-
* abort. Our two-steps reaction:
1033-
* - we exit the 'ActiveOp' state, and
1034-
* - we make sure the 'release' is remembered, to be handled by the state
1035-
* we would transition into (which should be ReplicaReserved).
1018+
* abort. We abort the current chunk handling and re-enter ReplicaActive,
1019+
* releasing the reservation on the way.
10361020
*/
10371021
sc::result react(const ReplicaRelease&);
10381022

0 commit comments

Comments
 (0)