Skip to content

Commit 032551f

Browse files
authored
[in-proc] Backport log level changes and failure handling from #11100 (#11156)
* Set errors to warning * Check for _hostId first so we avoid lookups. * Disable service when there's any failure (even if transient) as part of flush logs, purge events or execute batch.
1 parent 89d150e commit 032551f

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

src/WebJobs.Script.WebHost/Diagnostics/DiagnosticEventTableStorageRepository.Log.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,16 @@ private static class Logger
3838
LoggerMessage.Define<string>(LogLevel.Debug, new EventId(6, nameof(DeletingTableWithOutdatedEventVersion)), "Deleting table '{tableName}' as it contains records with an outdated EventVersion.");
3939

4040
private static readonly Action<ILogger, Exception> _errorPurgingDiagnosticEventVersions =
41-
LoggerMessage.Define(LogLevel.Error, new EventId(7, nameof(ErrorPurgingDiagnosticEventVersions)), "Error occurred when attempting to purge previous diagnostic event versions.");
41+
LoggerMessage.Define(LogLevel.Warning, new EventId(7, nameof(ErrorPurgingDiagnosticEventVersions)), "Error occurred when attempting to purge previous diagnostic event versions.");
4242

4343
private static readonly Action<ILogger, Exception> _unableToGetTableReference =
44-
LoggerMessage.Define(LogLevel.Error, new EventId(8, nameof(UnableToGetTableReference)), "Unable to get table reference. Aborting write operation.");
44+
LoggerMessage.Define(LogLevel.Warning, new EventId(8, nameof(UnableToGetTableReference)), "Unable to get table reference. Aborting write operation.");
4545

4646
private static readonly Action<ILogger, Exception> _unableToGetTableReferenceOrCreateTable =
47-
LoggerMessage.Define(LogLevel.Error, new EventId(9, nameof(UnableToGetTableReferenceOrCreateTable)), "Unable to get table reference or create table. Aborting write operation.");
47+
LoggerMessage.Define(LogLevel.Warning, new EventId(9, nameof(UnableToGetTableReferenceOrCreateTable)), "Unable to get table reference or create table. Aborting write operation.");
4848

4949
private static readonly Action<ILogger, Exception> _unableToWriteDiagnosticEvents =
50-
LoggerMessage.Define(LogLevel.Error, new EventId(10, nameof(UnableToWriteDiagnosticEvents)), "Unable to write diagnostic events to table storage.");
50+
LoggerMessage.Define(LogLevel.Warning, new EventId(10, nameof(UnableToWriteDiagnosticEvents)), "Unable to write diagnostic events to table storage.");
5151

5252
private static readonly Action<ILogger, Exception> _primaryHostStateProviderNotAvailable =
5353
LoggerMessage.Define(LogLevel.Debug, new EventId(11, nameof(PrimaryHostStateProviderNotAvailable)), "PrimaryHostStateProvider is not available. Skipping the check for primary host.");

src/WebJobs.Script.WebHost/Diagnostics/DiagnosticEventTableStorageRepository.cs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ internal string HostId
107107
{
108108
get
109109
{
110-
if (!_environment.IsPlaceholderModeEnabled() && string.IsNullOrEmpty(_hostId))
110+
if (string.IsNullOrEmpty(_hostId) && !_environment.IsPlaceholderModeEnabled())
111111
{
112112
_hostId = _hostIdProvider?.GetHostIdAsync(CancellationToken.None).GetAwaiter().GetResult();
113113
}
@@ -192,9 +192,23 @@ await Utility.InvokeWithRetriesAsync(async () =>
192192

193193
_purged = true;
194194
}
195+
catch (RequestFailedException ex) when (ex.Status == (int)HttpStatusCode.Forbidden)
196+
{
197+
// If we reach this point, we already checked for permissions on TableClient initialization.
198+
// It is possible that the permissions changed after the initialization, any firewall/network rules were changed or it's a custom role where we don't have permissions to query entities.
199+
// We will log the error and disable the service.
200+
Logger.ErrorPurgingDiagnosticEventVersions(_logger, ex);
201+
DisableService();
202+
Logger.ServiceDisabledUnauthorizedClient(_logger, ex);
203+
}
195204
catch (Exception ex)
196205
{
206+
// We failed to connect to the table storage account. This could be due to a transient error or a configuration issue (e.g., network problems).
207+
// To avoid repeatedly retrying in a potentially unhealthy state, we will disable the service.
208+
// The operation may succeed in a future instance if the underlying issue is resolved.
197209
Logger.ErrorPurgingDiagnosticEventVersions(_logger, ex);
210+
DisableService();
211+
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
198212
}
199213
}, maxRetries: 5, retryInterval: TimeSpan.FromSeconds(5));
200214

@@ -245,12 +259,16 @@ internal virtual async Task FlushLogs(TableClient table = null)
245259
Logger.UnableToGetTableReferenceOrCreateTable(_logger, ex);
246260
DisableService();
247261
Logger.ServiceDisabledUnauthorizedClient(_logger, ex);
262+
return;
248263
}
249264
catch (Exception ex)
250265
{
266+
// We failed to connect to the table storage account. This could be due to a transient error or a configuration issue (e.g., network problems).
267+
// To avoid repeatedly retrying in a potentially unhealthy state, we will disable the service.
268+
// The operation may succeed in a future instance if the underlying issue is resolved.
251269
Logger.UnableToGetTableReferenceOrCreateTable(_logger, ex);
252-
// Clearing the memory cache to avoid memory build up.
253-
_events.Clear();
270+
DisableService();
271+
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
254272
return;
255273
}
256274

@@ -292,12 +310,14 @@ internal async Task ExecuteBatchAsync(ConcurrentDictionary<string, DiagnosticEve
292310
catch (Exception ex)
293311
{
294312
Logger.UnableToWriteDiagnosticEvents(_logger, ex);
313+
DisableService();
314+
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
295315
}
296316
}
297317

298318
public void WriteDiagnosticEvent(DateTime timestamp, string errorCode, LogLevel level, string message, string helpLink, Exception exception)
299319
{
300-
if (TableClient is null || string.IsNullOrEmpty(HostId))
320+
if (TableClient is null || string.IsNullOrEmpty(HostId) || !IsEnabled())
301321
{
302322
return;
303323
}

0 commit comments

Comments
 (0)