Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ after libswsscommon deb make.
#define STATE_FDB_TABLE_NAME "FDB_TABLE"
#define STATE_WARM_RESTART_TABLE_NAME "WARM_RESTART_TABLE"
#define STATE_WARM_RESTART_ENABLE_TABLE_NAME "WARM_RESTART_ENABLE_TABLE"
#define STATE_WARM_RESTART_REGISTRATION_TABLE_NAME "WARM_RESTART_REGISTRATION_TABLE"
#define STATE_VRF_TABLE_NAME "VRF_TABLE"
#define STATE_VRF_OBJECT_TABLE_NAME "VRF_OBJECT_TABLE"
#define STATE_MGMT_PORT_TABLE_NAME "MGMT_PORT_TABLE"
Expand Down
203 changes: 175 additions & 28 deletions common/warm_restart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,68 @@
#include <climits>
#include "logger.h"
#include "schema.h"
#include "timestamp.h"
#include "warm_restart.h"

namespace swss {

const WarmStart::WarmStartStateNameMap WarmStart::warmStartStateNameMap =
const std::string WarmStart::kNsfManagerNotificationChannel =
"NSF_MANAGER_COMMON_NOTIFICATION_CHANNEL";
const std::string WarmStart::kRegistrationFreezeKey = "freeze";
const std::string WarmStart::kRegistrationCheckpointKey = "checkpoint";
const std::string WarmStart::kRegistrationReconciliationKey = "reconciliation";
const std::string WarmStart::kRegistrationTimestampKey = "timestamp";

const WarmStart::WarmStartStateNameMap* WarmStart::warmStartStateNameMap()
{
static const auto* const warmStartStateNameMap =
new WarmStartStateNameMap({
{INITIALIZED, "initialized"},
{RESTORED, "restored"},
{REPLAYED, "replayed"},
{RECONCILED, "reconciled"},
{WSDISABLED, "disabled"},
{WSUNKNOWN, "unknown"},
{FROZEN, "frozen"},
{QUIESCENT, "quiescent"},
{CHECKPOINTED, "checkpointed"},
{FAILED, "failed"}
});
return warmStartStateNameMap;
}

const WarmStart::DataCheckStateNameMap* WarmStart::dataCheckStateNameMap()
{
static const auto* const dataCheckStateNameMap =
new DataCheckStateNameMap({
{CHECK_IGNORED, "ignored"},
{CHECK_PASSED, "passed"},
{CHECK_FAILED, "failed"}
});
return dataCheckStateNameMap;
}

const WarmStart::WarmBootNotificationNameMap* WarmStart::warmBootNotificationNameMap()
{
{INITIALIZED, "initialized"},
{RESTORED, "restored"},
{REPLAYED, "replayed"},
{RECONCILED, "reconciled"},
{WSDISABLED, "disabled"},
{WSUNKNOWN, "unknown"}
};

const WarmStart::DataCheckStateNameMap WarmStart::dataCheckStateNameMap =
static const auto* const warmBootNotificationNameMap =
new WarmBootNotificationNameMap({
{WarmBootNotification::kFreeze, "freeze"},
{WarmBootNotification::kUnfreeze, "unfreeze"},
{WarmBootNotification::kCheckpoint, "checkpoint"},
});
return warmBootNotificationNameMap;
}

const WarmStart::WarmBootNotificationReverseMap* WarmStart::warmBootNotificationReverseMap()
{
{CHECK_IGNORED, "ignored"},
{CHECK_PASSED, "passed"},
{CHECK_FAILED, "failed"}
};
static const auto* const warmBootNotificationReverseMap =
new WarmBootNotificationReverseMap({
{"freeze", WarmBootNotification::kFreeze},
{"unfreeze", WarmBootNotification::kUnfreeze},
{"checkpoint", WarmBootNotification::kCheckpoint},
});
return warmBootNotificationReverseMap;
}

WarmStart &WarmStart::getInstance(void)
{
Expand All @@ -44,6 +86,9 @@ void WarmStart::initialize(const std::string &app_name,
return;
}

warmStart.m_appName = app_name;
warmStart.m_dockerName = docker_name;

/* Use unix socket for db connection by default */
warmStart.m_stateDb =
std::make_shared<swss::DBConnector>("STATE_DB", db_timeout, isTcpConn);
Expand All @@ -58,6 +103,71 @@ void WarmStart::initialize(const std::string &app_name,
std::unique_ptr<Table>(new Table(warmStart.m_cfgDb.get(), CFG_WARM_RESTART_TABLE_NAME));

warmStart.m_initialized = true;
warmStart.m_warmbootState = WSUNKNOWN;
}

/*
* registerWarmBootInfo
*
* Register an application with NSF Manager.
*
* Returns: true on success, false otherwise.
*
* wait_for_freeze: if true, NSF Manager waits for application to freeze
* and become quiescent before proceeding to state
* verification and checkpointing
* wait_for_checkpoint: if true, NSF Manager waits for application to
* complete checkpointing before reboot
* wait_for_reconciliation: if true, NSF Manager waits for application to
* complete reconciliation before unfreeze
*/
bool WarmStart::registerWarmBootInfo(bool wait_for_freeze,
bool wait_for_checkpoint,
bool wait_for_reconciliation) {
auto& warmStart = getInstance();

if (!warmStart.m_initialized) {
SWSS_LOG_ERROR("registerWarmBootInfo called before initialized");
return false;
}

if (warmStart.m_dockerName.empty()) {
SWSS_LOG_ERROR("registerWarmBootInfo: m_dockerName is empty");
return false;
}

if (warmStart.m_appName.empty()) {
SWSS_LOG_ERROR("registerWarmBootInfo: m_appName is empty");
return false;
}

std::unique_ptr<Table> stateWarmRestartRegistrationTable =
std::unique_ptr<Table>(
new Table(warmStart.m_stateDb.get(),
STATE_WARM_RESTART_REGISTRATION_TABLE_NAME));

std::string separator =
TableBase::getTableSeparator(warmStart.m_stateDb->getDbId());
std::string tableName =
warmStart.m_dockerName + separator + warmStart.m_appName;

std::vector<FieldValueTuple> values;

values.push_back(swss::FieldValueTuple(WarmStart::kRegistrationFreezeKey,
wait_for_freeze ? "true" : "false"));
values.push_back(swss::FieldValueTuple(
WarmStart::kRegistrationCheckpointKey,
wait_for_checkpoint ? "true" : "false"));
values.push_back(swss::FieldValueTuple(
WarmStart::kRegistrationReconciliationKey,
wait_for_reconciliation ? "true" : "false"));
values.push_back(swss::FieldValueTuple(
WarmStart::kRegistrationTimestampKey,
getTimestamp()));

stateWarmRestartRegistrationTable->set(tableName, values);

return true;
}

/*
Expand Down Expand Up @@ -190,47 +300,64 @@ void WarmStart::getWarmStartState(const std::string &app_name, WarmStartState &s

auto& warmStart = getInstance();

state = RECONCILED;

if (!isWarmStart())
{
if (app_name == warmStart.m_appName &&
warmStart.m_warmbootState != WSUNKNOWN) {
/* Cache is up-to-date. Read state from cache. */
state = warmStart.m_warmbootState;
return;
}

warmStart.m_stateWarmRestartTable->hget(app_name, "state", statestr);

/* If warm-start is enabled, state cannot be assumed as Reconciled
* It should be set to unknown
*/
state = WSUNKNOWN;

for (auto it = warmStartStateNameMap.begin(); it != warmStartStateNameMap.end(); it++)
for (auto it = warmStartStateNameMap()->begin(); it != warmStartStateNameMap()->end(); it++)
{
if (it->second == statestr)
{
state = it->first;
break;
}
}

if (app_name == warmStart.m_appName)
{
/* Update cache. */
warmStart.m_warmbootState = state;
}

SWSS_LOG_INFO("%s warm start state get %s(%d)",
app_name.c_str(), statestr.c_str(), state);

return;
}

// Wrap getWarmStartState to return state vs passing a state variable by
// reference. SWIG (for python) does not handle passing enum by reference
// cleanly.
WarmStart::WarmStartState WarmStart::returnWarmStartState(const std::string &app_name)
{
WarmStartState state;
getWarmStartState(app_name, state);
return state;
}
// Set the WarmStart FSM state for a particular application.
void WarmStart::setWarmStartState(const std::string &app_name, WarmStartState state)
{
auto& warmStart = getInstance();

warmStart.m_stateWarmRestartTable->hset(app_name,
"state",
warmStartStateNameMap.at(state).c_str());
warmStartStateNameMap()->at(state).c_str());

if (app_name == warmStart.m_appName)
{
/* Update cache. */
warmStart.m_warmbootState = state;
}

SWSS_LOG_NOTICE("%s warm start state changed to %s",
app_name.c_str(),
warmStartStateNameMap.at(state).c_str());
warmStartStateNameMap()->at(state).c_str());
}

// Set the WarmStart data check state for a particular application.
Expand All @@ -246,12 +373,12 @@ void WarmStart::setDataCheckState(const std::string &app_name, DataCheckStage st
}
warmStart.m_stateWarmRestartTable->hset(app_name,
stageField,
dataCheckStateNameMap.at(state).c_str());
dataCheckStateNameMap()->at(state).c_str());

SWSS_LOG_NOTICE("%s %s result %s",
app_name.c_str(),
stageField.c_str(),
dataCheckStateNameMap.at(state).c_str());
dataCheckStateNameMap()->at(state).c_str());
}

WarmStart::DataCheckState WarmStart::getDataCheckState(const std::string &app_name, DataCheckStage stage)
Expand All @@ -271,7 +398,7 @@ WarmStart::DataCheckState WarmStart::getDataCheckState(const std::string &app_na

DataCheckState state = CHECK_IGNORED;

for (auto it = dataCheckStateNameMap.begin(); it != dataCheckStateNameMap.end(); it++)
for (auto it = dataCheckStateNameMap()->begin(); it != dataCheckStateNameMap()->end(); it++)
{
if (it->second == stateStr)
{
Expand All @@ -288,4 +415,24 @@ WarmStart::DataCheckState WarmStart::getDataCheckState(const std::string &app_na
return state;
}

bool WarmStart::isStateVerificationEnabled()
{
auto& warmStart = getInstance();

std::string value;
warmStart.m_stateWarmRestartEnableTable->hget("system",
"state_verification", value);
if (value == "true")
{
return true;
}
return false;
}

bool WarmStart::waitForUnfreeze()
{
// Wait for unfreeze notification only if state verification is enabled.
return isStateVerificationEnabled();
}

} // namespace swss
41 changes: 38 additions & 3 deletions common/warm_restart.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ namespace swss {
class WarmStart
{
public:
static const std::string kNsfManagerNotificationChannel;
static const std::string kRegistrationFreezeKey;
static const std::string kRegistrationCheckpointKey;
static const std::string kRegistrationReconciliationKey;
static const std::string kRegistrationTimestampKey;

enum WarmStartState
{
INITIALIZED,
Expand All @@ -21,6 +27,10 @@ class WarmStart
RECONCILED,
WSDISABLED,
WSUNKNOWN,
FROZEN,
QUIESCENT,
CHECKPOINTED,
FAILED,
};

enum DataCheckState
Expand All @@ -36,11 +46,23 @@ class WarmStart
STAGE_RESTORE,
};

enum class WarmBootNotification {
kFreeze,
kUnfreeze,
kCheckpoint,
};

typedef std::map<WarmStartState, std::string> WarmStartStateNameMap;
static const WarmStartStateNameMap warmStartStateNameMap;
static const WarmStartStateNameMap* warmStartStateNameMap();

typedef std::map<DataCheckState, std::string> DataCheckStateNameMap;
static const DataCheckStateNameMap dataCheckStateNameMap;
static const DataCheckStateNameMap* dataCheckStateNameMap();

typedef std::map<WarmBootNotification, std::string> WarmBootNotificationNameMap;
static const WarmBootNotificationNameMap* warmBootNotificationNameMap();

typedef std::map<std::string, WarmBootNotification> WarmBootNotificationReverseMap;
static const WarmBootNotificationReverseMap* warmBootNotificationReverseMap();

static WarmStart &getInstance(void);

Expand All @@ -49,6 +71,10 @@ class WarmStart
unsigned int db_timeout = 0,
bool isTcpConn = false);

static bool registerWarmBootInfo(bool wait_for_freeze,
bool wait_for_checkpoint,
bool wait_for_reconciliation);

static bool checkWarmStart(const std::string &app_name,
const std::string &docker_name,
const bool incr_restore_cnt = true);
Expand All @@ -60,6 +86,10 @@ class WarmStart
static void getWarmStartState(const std::string &app_name,
WarmStartState &state);

// For python via SWIG: return state instead of passing state
// variable by reference as a parameter.
static WarmStartState returnWarmStartState(const std::string &app_name);

static void setWarmStartState(const std::string &app_name,
WarmStartState state);

Expand All @@ -71,7 +101,9 @@ class WarmStart
DataCheckState state);

static DataCheckState getDataCheckState(const std::string &app_name,
DataCheckStage stage);
DataCheckStage stage);
static bool isStateVerificationEnabled();
static bool waitForUnfreeze();
private:
std::shared_ptr<swss::DBConnector> m_stateDb;
std::shared_ptr<swss::DBConnector> m_cfgDb;
Expand All @@ -81,6 +113,9 @@ class WarmStart
bool m_initialized;
bool m_enabled;
bool m_systemWarmRebootEnabled;
std::string m_appName;
std::string m_dockerName;
WarmStartState m_warmbootState;
};

}
Expand Down
Loading
Loading