Skip to content

Commit aead204

Browse files
authored
Do not exclude exit code -1 from restart (#5361)
* Do not exclude exit code -1 from restart * add more logging to swallowed exceptions * update with arg validation * rename exception name
1 parent d30073f commit aead204

File tree

5 files changed

+48
-17
lines changed

5 files changed

+48
-17
lines changed

src/WebJobs.Script/Workers/Http/HttpWorkerProcess.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,15 @@ internal override Process CreateWorkerProcess()
5151
return _processFactory.CreateWorkerProcess(workerContext);
5252
}
5353

54-
internal override void HandleWorkerProcessExitError(WorkerProcessExitException langExc)
54+
internal override void HandleWorkerProcessExitError(WorkerProcessExitException httpWorkerProcessExitException)
5555
{
56-
// The subscriber of WorkerErrorEvent is expected to Dispose() the errored channel
57-
if (langExc != null && langExc.ExitCode != -1)
56+
if (httpWorkerProcessExitException == null)
5857
{
59-
_workerProcessLogger.LogDebug(langExc, $"Language Worker Process exited.", _workerProcessArguments.ExecutablePath);
60-
_eventManager.Publish(new HttpWorkerErrorEvent(_workerId, langExc));
58+
throw new ArgumentNullException(nameof(httpWorkerProcessExitException));
6159
}
60+
// The subscriber of WorkerErrorEvent is expected to Dispose() the errored channel
61+
_workerProcessLogger.LogDebug(httpWorkerProcessExitException, $"Language Worker Process exited. Pid={httpWorkerProcessExitException.Pid}.", _workerProcessArguments.ExecutablePath);
62+
_eventManager.Publish(new HttpWorkerErrorEvent(_workerId, httpWorkerProcessExitException));
6263
}
6364

6465
internal override void HandleWorkerProcessRestart()

src/WebJobs.Script/Workers/ProcessManagement/WorkerProcess.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,13 @@ private void OnProcessExited(object sender, EventArgs e)
136136
{
137137
var processExitEx = new WorkerProcessExitException($"{_process.StartInfo.FileName} exited with code {_process.ExitCode}\n {exceptionMessage}");
138138
processExitEx.ExitCode = _process.ExitCode;
139+
processExitEx.Pid = _process.Id;
139140
HandleWorkerProcessExitError(processExitEx);
140141
}
141142
}
142-
catch (Exception)
143+
catch (Exception exc)
143144
{
145+
_workerProcessLogger?.LogDebug(exc, "Exception on worker process exit.");
144146
// ignore process is already disposed
145147
}
146148
}
@@ -182,8 +184,9 @@ public void Dispose()
182184
_process.Dispose();
183185
}
184186
}
185-
catch (Exception)
187+
catch (Exception exc)
186188
{
189+
_workerProcessLogger?.LogDebug(exc, "Exception on worker disposal.");
187190
//ignore
188191
}
189192
}

src/WebJobs.Script/Workers/ProcessManagement/WorkerProcessExitException.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,7 @@ internal WorkerProcessExitException(string message, Exception innerException) :
1616
}
1717

1818
internal int ExitCode { get; set; }
19+
20+
internal int Pid { get; set; }
1921
}
2022
}

src/WebJobs.Script/Workers/Rpc/FunctionRegistration/RpcFunctionInvocationDispatcher.cs

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,17 @@ public async void WorkerError(WorkerErrorEvent workerError)
278278
{
279279
if (!_disposing)
280280
{
281-
_logger.LogDebug("Handling WorkerErrorEvent for runtime:{runtime}, workerId:{workerId}", workerError.Language, workerError.WorkerId);
282-
AddOrUpdateErrorBucket(workerError);
283-
await DisposeAndRestartWorkerChannel(workerError.Language, workerError.WorkerId);
281+
if (string.Equals(_workerRuntime, workerError.Language))
282+
{
283+
_logger.LogDebug("Handling WorkerErrorEvent for runtime:{runtime}, workerId:{workerId}", workerError.Language, workerError.WorkerId);
284+
AddOrUpdateErrorBucket(workerError);
285+
await DisposeAndRestartWorkerChannel(workerError.Language, workerError.WorkerId);
286+
}
287+
else
288+
{
289+
_logger.LogDebug("Received WorkerErrorEvent for runtime:{runtime}, workerId:{workerId}", workerError.Language, workerError.WorkerId);
290+
_logger.LogDebug("WorkerErrorEvent runtime:{runtime} does not match current runtime:{currentRuntime}. Failed with: {exception}", workerError.Language, _workerRuntime, workerError.Exception);
291+
}
284292
}
285293
}
286294

@@ -297,21 +305,37 @@ private async Task DisposeAndRestartWorkerChannel(string runtime, string workerI
297305
{
298306
bool isWebHostChannel = await _webHostLanguageWorkerChannelManager.ShutdownChannelIfExistsAsync(runtime, workerId);
299307
bool isJobHostChannel = false;
300-
if (!isWebHostChannel)
308+
309+
// Dispose old worker
310+
if (isWebHostChannel)
311+
{
312+
_logger.LogDebug("Disposing WebHost channel for workerId: {channelId}, for runtime:{language}", workerId, runtime);
313+
}
314+
else
301315
{
302-
_logger.LogDebug("Disposing channel for workerId: {channelId}, for runtime:{language}", workerId, runtime);
303316
var channel = _jobHostLanguageWorkerChannelManager.GetChannels().Where(ch => ch.Id == workerId).FirstOrDefault();
304317
if (channel != null)
305318
{
319+
_logger.LogDebug("Disposing JobHost channel for workerId: {channelId}, for runtime:{language}", workerId, runtime);
306320
isJobHostChannel = true;
307321
_jobHostLanguageWorkerChannelManager.DisposeAndRemoveChannel(channel);
308322
}
323+
else
324+
{
325+
_logger.LogDebug("Did not find WebHost or JobHost channel to dispose for workerId: {channelId}, runtime:{language}", workerId, runtime);
326+
}
309327
}
328+
310329
if (ShouldRestartWorkerChannel(runtime, isWebHostChannel, isJobHostChannel))
311330
{
312331
_logger.LogDebug("Restarting worker channel for runtime:{runtime}", runtime);
313332
await RestartWorkerChannel(runtime, workerId);
314333
}
334+
else
335+
{
336+
_logger.LogDebug("Skipping worker channel restart for errored worker runtime:{runtime}, current runtime:{currentRuntime}, isWebHostChannel:{isWebHostChannel}, isJobHostChannel:{isJobHostChannel}",
337+
runtime, _workerRuntime, isWebHostChannel, isJobHostChannel);
338+
}
315339
}
316340

317341
internal bool ShouldRestartWorkerChannel(string runtime, bool isWebHostChannel, bool isJobHostChannel)

src/WebJobs.Script/Workers/Rpc/RpcWorkerProcess.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,15 @@ internal override Process CreateWorkerProcess()
4747
return _processFactory.CreateWorkerProcess(workerContext);
4848
}
4949

50-
internal override void HandleWorkerProcessExitError(WorkerProcessExitException langExc)
50+
internal override void HandleWorkerProcessExitError(WorkerProcessExitException rpcWorkerProcessExitException)
5151
{
52-
// The subscriber of WorkerErrorEvent is expected to Dispose() the errored channel
53-
if (langExc != null && langExc.ExitCode != -1)
52+
if (rpcWorkerProcessExitException == null)
5453
{
55-
_workerProcessLogger.LogDebug(langExc, $"Language Worker Process exited.", _workerProcessArguments.ExecutablePath);
56-
_eventManager.Publish(new WorkerErrorEvent(_runtime, _workerId, langExc));
54+
throw new ArgumentNullException(nameof(rpcWorkerProcessExitException));
5755
}
56+
// The subscriber of WorkerErrorEvent is expected to Dispose() the errored channel
57+
_workerProcessLogger.LogDebug(rpcWorkerProcessExitException, $"Language Worker Process exited. Pid={rpcWorkerProcessExitException.Pid}.", _workerProcessArguments.ExecutablePath);
58+
_eventManager.Publish(new WorkerErrorEvent(_runtime, _workerId, rpcWorkerProcessExitException));
5859
}
5960

6061
internal override void HandleWorkerProcessRestart()

0 commit comments

Comments
 (0)