Skip to content

Commit 6baebc9

Browse files
committed
Improve logging to include more identifiable information for agent managers
1 parent f788f82 commit 6baebc9

File tree

10 files changed

+106
-60
lines changed

10 files changed

+106
-60
lines changed

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,8 @@ public void processStartupAnswer(final Answer answer, final Response response, f
594594
return;
595595
}
596596

597-
logger.info("Process agent startup answer, agent id = {}", startup.getHostId());
597+
logger.info("Process agent startup answer, agent [id: {}, name: {}] connected to the server",
598+
startup.getHostId(), startup.getHostName());
598599

599600
setId(startup.getHostId());
600601
_pingInterval = (long)startup.getPingInterval() * 1000; // change to ms.
@@ -604,7 +605,8 @@ public void processStartupAnswer(final Answer answer, final Response response, f
604605

605606
_ugentTaskPool.setKeepAliveTime(2 * _pingInterval, TimeUnit.MILLISECONDS);
606607

607-
logger.info("Startup Response Received: agent id = {}", getId());
608+
logger.info("Startup Response Received: agent [id: {}, name: {}]",
609+
getId(), startup.getHostName());
608610
}
609611

610612
protected void processRequest(final Request request, final Link link) {

core/src/main/java/com/cloud/agent/api/StartupAnswer.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121

2222
public class StartupAnswer extends Answer {
2323
long hostId;
24+
String hostName;
2425
int pingInterval;
2526

2627
protected StartupAnswer() {
2728
}
2829

29-
public StartupAnswer(StartupCommand cmd, long hostId, int pingInterval) {
30+
public StartupAnswer(StartupCommand cmd, long hostId, String hostName, int pingInterval) {
3031
super(cmd);
3132
this.hostId = hostId;
33+
this.hostName = hostName;
3234
this.pingInterval = pingInterval;
3335
}
3436

@@ -40,6 +42,10 @@ public long getHostId() {
4042
return hostId;
4143
}
4244

45+
public String getHostName() {
46+
return hostName;
47+
}
48+
4349
public int getPingInterval() {
4450
return pingInterval;
4551
}

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ private AgentControlAnswer handleControlCommand(final AgentAttache attache, fina
302302
}
303303
}
304304

305-
logger.warn("No handling of agent control command: {} sent from {}", cmd, attache.getId());
305+
logger.warn("No handling of agent control command: {} sent from [id: {} name: {}]",
306+
cmd, attache.getId(), attache.getName());
306307
return new AgentControlAnswer(cmd);
307308
}
308309

@@ -344,7 +345,7 @@ public Answer sendTo(final Long dcId, final HypervisorType type, final Command c
344345
answer = easySend(targetHostId, cmd);
345346
} catch (final Exception e) {
346347
String errorMsg = String.format("Error sending command %s to host %s, due to %s", cmd.getClass().getName(),
347-
host.getUuid(), e.getLocalizedMessage());
348+
host, e.getLocalizedMessage());
348349
logger.error(errorMsg);
349350
logger.debug(errorMsg, e);
350351
}
@@ -464,11 +465,11 @@ protected Status investigate(final AgentAttache agent) {
464465
final Long hostId = agent.getId();
465466
final HostVO host = _hostDao.findById(hostId);
466467
if (host != null && host.getType() != null && !host.getType().isVirtual()) {
467-
logger.debug("Checking if agent ({}) is alive", hostId);
468+
logger.debug("Checking if agent ({}) is alive", host);
468469
final Answer answer = easySend(hostId, new CheckHealthCommand());
469470
if (answer != null && answer.getResult()) {
470471
final Status status = Status.Up;
471-
logger.debug("Agent ({}) responded to checkHealthCommand, reporting that agent is {}", hostId, status);
472+
logger.debug("Agent ({}) responded to checkHealthCommand, reporting that agent is {}", host, status);
472473
return status;
473474
}
474475
return _haMgr.investigate(hostId);
@@ -493,7 +494,9 @@ protected AgentAttache getAttache(final Long hostId) throws AgentUnavailableExce
493494
public long send(final Long hostId, final Commands commands, final Listener listener) throws AgentUnavailableException {
494495
final AgentAttache agent = getAttache(hostId);
495496
if (agent.isClosed()) {
496-
throw new AgentUnavailableException("Agent " + agent.getId() + " is closed", agent.getId());
497+
throw new AgentUnavailableException(String.format(
498+
"Agent [id: %d, name: %s] is closed",
499+
agent.getId(), agent.getName()), agent.getId());
497500
}
498501

499502
final Command[] cmds = checkForCommandsAndTag(commands);
@@ -510,7 +513,7 @@ public void removeAgent(final AgentAttache attache, final Status nextState) {
510513
return;
511514
}
512515
final long hostId = attache.getId();
513-
logger.debug("Remove Agent : {}", hostId);
516+
logger.debug("Remove Agent : [id: {}, name: {}]", hostId, attache.getName());
514517
AgentAttache removed = null;
515518
boolean conflict = false;
516519
synchronized (_agents) {
@@ -522,7 +525,8 @@ public void removeAgent(final AgentAttache attache, final Status nextState) {
522525
}
523526
}
524527
if (conflict) {
525-
logger.debug("Agent for host {} is created when it is being disconnected", hostId);
528+
logger.debug("Agent for host [id: {}, name: {}] is created when it is being disconnected",
529+
hostId, attache.getName());
526530
}
527531
if (removed != null) {
528532
removed.disconnect(nextState);
@@ -565,11 +569,15 @@ protected AgentAttache notifyMonitorsOfConnection(final AgentAttache attache, fi
565569
}
566570
} else if (e instanceof HypervisorVersionChangedException) {
567571
handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true);
568-
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
572+
throw new CloudRuntimeException(String.format(
573+
"Unable to connect [id: %d, name: %s]",
574+
attache.getId(), attache.getName()), e);
569575
} else {
570576
logger.error("Monitor {} says there is an error in the connect process for {} due to {}", monitor.second().getClass().getSimpleName(), hostId, e.getMessage(), e);
571577
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
572-
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
578+
throw new CloudRuntimeException(String.format(
579+
"Unable to connect [id: %d, name: %s]",
580+
attache.getId(), attache.getName()), e);
573581
}
574582
}
575583
}
@@ -1004,21 +1012,26 @@ public void reconnect(final long hostId) throws AgentUnavailableException {
10041012
}
10051013

10061014
if (host.getRemoved() != null) {
1007-
throw new CloudRuntimeException("Host has already been removed: " + hostId);
1015+
throw new CloudRuntimeException(String.format(
1016+
"Host has already been removed: %s", host));
10081017
}
10091018

10101019
if (host.getStatus() == Status.Disconnected) {
1011-
logger.debug("Host is already disconnected, no work to be done: {}", hostId);
1020+
logger.debug("Host is already disconnected, no work to be done: {}", host);
10121021
return;
10131022
}
10141023

10151024
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) {
1016-
throw new CloudRuntimeException("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus());
1025+
throw new CloudRuntimeException(String.format(
1026+
"Unable to disconnect host because it is not in the correct state: host=%s; Status=%s",
1027+
host, host.getStatus()));
10171028
}
10181029

10191030
AgentAttache attache = findAttache(hostId);
10201031
if (attache == null) {
1021-
throw new CloudRuntimeException("Unable to disconnect host because it is not connected to this server: " + hostId);
1032+
throw new CloudRuntimeException(String.format(
1033+
"Unable to disconnect host because it is not connected to this server: %s",
1034+
host));
10221035
}
10231036
disconnectWithoutInvestigation(attache, Event.ShutdownRequested);
10241037
}
@@ -1118,7 +1131,8 @@ private AgentAttache sendReadyAndGetAttache(HostVO host, ReadyCommand ready, Lin
11181131
joinLock.unlock();
11191132
}
11201133
} else {
1121-
throw new ConnectionException(true, "Unable to acquire lock on host " + host.getUuid());
1134+
throw new ConnectionException(true,
1135+
String.format("Unable to acquire lock on host %s", host));
11221136
}
11231137
joinLock.releaseRef();
11241138
return attache;
@@ -1240,7 +1254,7 @@ protected void connectAgent(final Link link, final Command[] cmds, final Request
12401254
cmd = cmds[i];
12411255
if (cmd instanceof StartupRoutingCommand || cmd instanceof StartupProxyCommand || cmd instanceof StartupSecondaryStorageCommand ||
12421256
cmd instanceof StartupStorageCommand) {
1243-
answers[i] = new StartupAnswer((StartupCommand) cmds[i], 0, mgmtServiceConf.getPingInterval());
1257+
answers[i] = new StartupAnswer((StartupCommand) cmds[i], 0, "", mgmtServiceConf.getPingInterval());
12441258
break;
12451259
}
12461260
}
@@ -1349,16 +1363,16 @@ protected void processRequest(final Link link, final Request request) {
13491363
if (cmd instanceof StartupRoutingCommand) {
13501364
final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
13511365
processStartupRoutingCommand(startup, hostId);
1352-
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
1366+
answer = new StartupAnswer(startup, attache.getId(), attache.getName(), mgmtServiceConf.getPingInterval());
13531367
} else if (cmd instanceof StartupProxyCommand) {
13541368
final StartupProxyCommand startup = (StartupProxyCommand) cmd;
1355-
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
1369+
answer = new StartupAnswer(startup, attache.getId(), attache.getName(), mgmtServiceConf.getPingInterval());
13561370
} else if (cmd instanceof StartupSecondaryStorageCommand) {
13571371
final StartupSecondaryStorageCommand startup = (StartupSecondaryStorageCommand) cmd;
1358-
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
1372+
answer = new StartupAnswer(startup, attache.getId(), attache.getName(), mgmtServiceConf.getPingInterval());
13591373
} else if (cmd instanceof StartupStorageCommand) {
13601374
final StartupStorageCommand startup = (StartupStorageCommand) cmd;
1361-
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
1375+
answer = new StartupAnswer(startup, attache.getId(), attache.getName(), mgmtServiceConf.getPingInterval());
13621376
} else if (cmd instanceof ShutdownCommand) {
13631377
final ShutdownCommand shutdown = (ShutdownCommand)cmd;
13641378
final String reason = shutdown.getReason();
@@ -1518,8 +1532,9 @@ public boolean agentStatusTransitTo(final HostVO host, final Status.Event e, fin
15181532
try {
15191533
return _statusStateMachine.transitTo(host, e, host.getId(), _hostDao);
15201534
} catch (final NoTransitionException e1) {
1521-
logger.debug("Cannot transit agent status with event {} for host {}, name={}, management server id is {}", e, host.getId(), host.getName(), msId);
1522-
throw new CloudRuntimeException("Cannot transit agent status with event " + e + " for host " + host.getId() + ", management server id is " + msId + "," + e1.getMessage());
1535+
logger.debug("Cannot transit agent status with event {} for host {}, management server id is {}", e, host, msId);
1536+
throw new CloudRuntimeException(String.format(
1537+
"Cannot transit agent status with event %s for host %s, management server id is %d, %s", e, host, msId, e1.getMessage()));
15231538
}
15241539
} finally {
15251540
_agentStatusLock.unlock();
@@ -1600,7 +1615,7 @@ public boolean handleDirectConnectAgent(final Host host, final StartupCommand[]
16001615
attache = createAttacheForDirectConnect(host, resource);
16011616
final StartupAnswer[] answers = new StartupAnswer[cmds.length];
16021617
for (int i = 0; i < answers.length; i++) {
1603-
answers[i] = new StartupAnswer(cmds[i], attache.getId(), mgmtServiceConf.getPingInterval());
1618+
answers[i] = new StartupAnswer(cmds[i], attache.getId(), attache.getName(), mgmtServiceConf.getPingInterval());
16041619
}
16051620
attache.process(answers);
16061621

0 commit comments

Comments
 (0)