Skip to content
This repository was archived by the owner on Feb 17, 2022. It is now read-only.

Commit 7d68df3

Browse files
committed
Adding more logging into MaintenanceController.
Bugs closed: AURORA-1170 Reviewed at https://reviews.apache.org/r/32014/
1 parent 128e554 commit 7d68df3

File tree

5 files changed

+47
-22
lines changed

5 files changed

+47
-22
lines changed

config/legacy_untested_classes.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@ org/apache/aurora/scheduler/log/mesos/MesosLogStreamModule$4
5555
org/apache/aurora/scheduler/log/mesos/MesosLogStreamModule$5
5656
org/apache/aurora/scheduler/mesos/DriverFactoryImpl
5757
org/apache/aurora/scheduler/mesos/LibMesosLoadingModule
58-
org/apache/aurora/scheduler/state/MaintenanceController$MaintenanceControllerImpl$4
59-
org/apache/aurora/scheduler/state/MaintenanceController$MaintenanceControllerImpl$8
6058
org/apache/aurora/scheduler/stats/AsyncStatsModule$OfferAdapter$1
6159
org/apache/aurora/scheduler/storage/log/LogStorage$RecoveryFailedException
6260
org/apache/aurora/scheduler/storage/mem/Util

src/main/java/org/apache/aurora/scheduler/state/MaintenanceController.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,9 @@ public void execute(MutableStoreProvider store) {
166166
LOG.info(String.format("Moving host %s into DRAINED", host));
167167
setMaintenanceMode(store, ImmutableSet.of(host), DRAINED);
168168
} else {
169-
LOG.info(
170-
String.format("Host %s is DRAINING with active tasks: %s", host, activeTasks));
169+
LOG.info(String.format("Host %s is DRAINING with active tasks: %s",
170+
host,
171+
Tasks.ids(activeTasks)));
171172
}
172173
}
173174
}
@@ -268,9 +269,11 @@ private Set<HostStatus> setMaintenanceMode(
268269
AttributeStore.Mutable store = storeProvider.getAttributeStore();
269270
ImmutableSet.Builder<HostStatus> statuses = ImmutableSet.builder();
270271
for (String host : hosts) {
272+
LOG.info(String.format("Setting maintenance mode to %s for host %s", mode, host));
271273
Optional<IHostAttributes> toSave = AttributeStore.Util.mergeMode(store, host, mode);
272274
if (toSave.isPresent()) {
273275
store.saveHostAttributes(toSave.get());
276+
LOG.info("Updated host attributes: " + toSave.get());
274277
HostStatus status = new HostStatus().setHost(host).setMode(mode);
275278
statuses.add(status);
276279
}

src/main/python/apache/aurora/admin/host_maintenance.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ def _drain_hosts(self, drainable_hosts):
7171
self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS))
7272

7373
statuses = self.check_status(list(not_drained_hostnames))
74-
not_drained_hostnames = set([h[0] for h in statuses if h[1] != 'DRAINED'])
74+
not_drained_hostnames = set(h[0] for h in statuses if h[1] != 'DRAINED')
7575

7676
total_wait += self.STATUS_POLL_INTERVAL
7777
if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT:
78-
log.warning('Failed to move all hosts into DRAINED within %s' % self.MAX_STATUS_WAIT)
78+
log.warning('Failed to move all hosts into DRAINED within %s:\n%s' %
79+
(self.MAX_STATUS_WAIT,
80+
'\n'.join("\tHost:%s\tStatus:%s" % h for h in sorted(statuses) if h[1] != 'DRAINED')))
7981
break
8082

8183
return not_drained_hostnames

src/test/java/org/apache/aurora/scheduler/state/MaintenanceControllerImplTest.java

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import static org.apache.aurora.gen.MaintenanceMode.DRAINING;
4747
import static org.apache.aurora.gen.MaintenanceMode.NONE;
4848
import static org.apache.aurora.gen.MaintenanceMode.SCHEDULED;
49-
import static org.apache.aurora.gen.ScheduleStatus.FINISHED;
49+
import static org.apache.aurora.gen.ScheduleStatus.KILLED;
5050
import static org.apache.aurora.gen.ScheduleStatus.RUNNING;
5151
import static org.apache.aurora.scheduler.state.MaintenanceController.MaintenanceControllerImpl;
5252
import static org.easymock.EasyMock.expect;
@@ -97,21 +97,23 @@ private static ScheduledTask makeTask(String host, String taskId) {
9797

9898
@Test
9999
public void testMaintenanceCycle() {
100-
ScheduledTask task = makeTask(HOST_A, "taskA");
100+
ScheduledTask task1 = makeTask(HOST_A, "taskA");
101+
ScheduledTask task2 = makeTask(HOST_A, "taskB");
101102

102103
expectMaintenanceModeChange(HOST_A, SCHEDULED);
103-
expectFetchTasksByHost(HOST_A, ImmutableSet.of(task));
104-
expect(stateManager.changeState(
105-
storageUtil.mutableStoreProvider,
106-
Tasks.id(task),
107-
Optional.<ScheduleStatus>absent(),
108-
ScheduleStatus.DRAINING,
109-
MaintenanceControllerImpl.DRAINING_MESSAGE))
110-
.andReturn(true);
104+
expectFetchTasksByHost(HOST_A, ImmutableSet.of(task1, task2));
105+
expectTaskDraining(task1);
106+
expectTaskDraining(task2);
111107
expectMaintenanceModeChange(HOST_A, DRAINING);
112-
expect(storageUtil.attributeStore.getHostAttributes(HOST_A)).andReturn(Optional.of(
113-
IHostAttributes.build(new HostAttributes().setHost(HOST_A).setMode(DRAINING))));
114-
// TaskA is FINISHED and therefore no longer active
108+
IHostAttributes attributes =
109+
IHostAttributes.build(new HostAttributes().setHost(HOST_A).setMode(DRAINING));
110+
111+
expect(storageUtil.attributeStore.getHostAttributes(HOST_A))
112+
.andReturn(Optional.of(attributes)).times(2);
113+
114+
expect(storageUtil.attributeStore.getHostAttributes()).andReturn(ImmutableSet.of(attributes));
115+
expectFetchTasksByHost(HOST_A, ImmutableSet.of(task2));
116+
// TaskA is KILLED and therefore no longer active
115117
expectFetchTasksByHost(HOST_A, ImmutableSet.<ScheduledTask>of());
116118
expectMaintenanceModeChange(HOST_A, DRAINED);
117119
expectMaintenanceModeChange(HOST_A, NONE);
@@ -120,8 +122,11 @@ public void testMaintenanceCycle() {
120122

121123
assertStatus(HOST_A, SCHEDULED, maintenance.startMaintenance(A));
122124
assertStatus(HOST_A, DRAINING, maintenance.drain(A));
125+
assertStatus(HOST_A, DRAINING, maintenance.getStatus(A));
123126
eventSink.post(
124-
TaskStateChange.transition(IScheduledTask.build(task.setStatus(FINISHED)), RUNNING));
127+
TaskStateChange.transition(IScheduledTask.build(task1.setStatus(KILLED)), RUNNING));
128+
eventSink.post(
129+
TaskStateChange.transition(IScheduledTask.build(task2.setStatus(KILLED)), RUNNING));
125130
assertStatus(HOST_A, NONE, maintenance.endMaintenance(A));
126131
}
127132

@@ -165,7 +170,7 @@ public void testEndEarly() {
165170
// Make sure a later transition on the host does not cause any ill effects that could surface
166171
// from stale internal state.
167172
eventSink.post(TaskStateChange.transition(
168-
IScheduledTask.build(makeTask(HOST_A, "taskA").setStatus(FINISHED)), RUNNING));
173+
IScheduledTask.build(makeTask(HOST_A, "taskA").setStatus(KILLED)), RUNNING));
169174
}
170175

171176
@Test
@@ -181,6 +186,16 @@ public void testGetMode() {
181186
assertEquals(NONE, maintenance.getMode("unknown"));
182187
}
183188

189+
private void expectTaskDraining(ScheduledTask task) {
190+
expect(stateManager.changeState(
191+
storageUtil.mutableStoreProvider,
192+
Tasks.id(task),
193+
Optional.<ScheduleStatus>absent(),
194+
ScheduleStatus.DRAINING,
195+
MaintenanceControllerImpl.DRAINING_MESSAGE))
196+
.andReturn(true);
197+
}
198+
184199
private void expectFetchTasksByHost(String hostName, ImmutableSet<ScheduledTask> tasks) {
185200
expect(storageUtil.taskStore.fetchTasks(Query.slaveScoped(hostName).active()))
186201
.andReturn(IScheduledTask.setFromBuilders(tasks));

src/test/python/apache/aurora/admin/test_host_maintenance.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#
1414

1515
import copy
16+
import textwrap
1617
import unittest
1718
from contextlib import contextmanager
1819

@@ -103,12 +104,13 @@ def fake_maintenance_status_side_effect(hosts):
103104
(Hosts(set(TEST_HOSTNAMES))),
104105
(Hosts(set([TEST_HOSTNAMES[0]])))]
105106

107+
@mock.patch("twitter.common.log.warning", spec=log.warning)
106108
@mock.patch("apache.aurora.client.api.AuroraClientAPI.maintenance_status",
107109
spec=AuroraClientAPI.maintenance_status)
108110
@mock.patch("apache.aurora.client.api.AuroraClientAPI.drain_hosts",
109111
spec=AuroraClientAPI.drain_hosts)
110112
@mock.patch("threading._Event.wait")
111-
def test_drain_hosts_timed_out_wait(self, _, mock_drain_hosts, mock_maintenance_status):
113+
def test_drain_hosts_timed_out_wait(self, _, mock_drain_hosts, mock_maintenance_status, mock_log):
112114
fake_maintenance_status_response = Response(
113115
responseCode=ResponseCode.OK,
114116
result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
@@ -128,6 +130,11 @@ def test_drain_hosts_timed_out_wait(self, _, mock_drain_hosts, mock_maintenance_
128130
assert mock_maintenance_status.call_count == 1
129131
mock_drain_hosts.assert_called_once_with(test_hosts)
130132
mock_maintenance_status.assert_called_once_with((Hosts(set(TEST_HOSTNAMES))))
133+
assert mock_log.mock_calls == [mock.call(textwrap.dedent("""\
134+
Failed to move all hosts into DRAINED within 1 ms:
135+
\tHost:us-west-001.example.com\tStatus:SCHEDULED
136+
\tHost:us-west-002.example.com\tStatus:SCHEDULED
137+
\tHost:us-west-003.example.com\tStatus:SCHEDULED"""))]
131138

132139
@mock.patch("twitter.common.log.warning", spec=log.warning)
133140
@mock.patch("apache.aurora.client.api.AuroraClientAPI.maintenance_status",

0 commit comments

Comments
 (0)