77using System . Diagnostics ;
88using System . Linq ;
99using System . Threading ;
10- using System . Threading . Tasks ;
1110using Microsoft . Azure . WebJobs . Script . Configuration ;
1211using Microsoft . Azure . WebJobs . Script . Description ;
1312using Microsoft . Azure . WebJobs . Script . Diagnostics ;
@@ -45,7 +44,7 @@ public MetricsEventManager(IOptionsMonitor<AppServiceOptions> appServiceOptions,
4544 // Initialize the periodic log flush timer
4645 _metricsFlushTimer = new Timer ( TimerFlush , null , metricsFlushIntervalMS , metricsFlushIntervalMS ) ;
4746
48- _functionActivityTracker = new FunctionActivityTracker ( _appServiceOptions , _eventGenerator , metricsPublisher , linuxContainerActivityPublisher , _functionActivityFlushIntervalSeconds ) ;
47+ _functionActivityTracker = new FunctionActivityTracker ( _appServiceOptions , _eventGenerator , metricsPublisher , linuxContainerActivityPublisher , _functionActivityFlushIntervalSeconds , _logger ) ;
4948 }
5049
5150 /// <summary>
@@ -316,7 +315,7 @@ protected virtual void Dispose(bool disposing)
316315
317316 if ( _functionActivityTracker != null )
318317 {
319- _functionActivityTracker . StopEtwTaskAndRaiseFinishedEvent ( ) ;
318+ _functionActivityTracker . StopTimerAndRaiseFinishedEvent ( ) ;
320319 _functionActivityTracker . Dispose ( ) ;
321320 }
322321 }
@@ -334,24 +333,30 @@ public void Dispose()
334333
335334 private class FunctionActivityTracker : IDisposable
336335 {
336+ // this interval should stay at 1 second because the timer is also
337+ // used to emit events every Nth second
338+ private const int _activityTimerIntervalMS = 1000 ;
339+
337340 private readonly IMetricsPublisher _metricsPublisher ;
338341 private readonly ILinuxContainerActivityPublisher _linuxContainerActivityPublisher ;
339342 private readonly object _runningFunctionsSyncLock = new object ( ) ;
343+ private readonly Timer _activityTimer ;
344+ private readonly ILogger < MetricsEventManager > _logger ;
340345
341346 private ulong _totalExecutionCount = 0 ;
342347 private int _activeFunctionCount = 0 ;
343348 private int _functionActivityFlushInterval ;
344- private CancellationTokenSource _etwTaskCancellationSource = new CancellationTokenSource ( ) ;
345349 private ConcurrentQueue < FunctionMetrics > _functionMetricsQueue = new ConcurrentQueue < FunctionMetrics > ( ) ;
346350 private List < FunctionStartedEvent > _runningFunctions = new List < FunctionStartedEvent > ( ) ;
347351 private bool _disposed = false ;
348352 private IOptionsMonitor < AppServiceOptions > _appServiceOptions ;
353+ private int _activityFlushCounter ;
349354
350355 // This ID is just an event grouping mechanism that can be used by event consumers
351356 // to group events coming from the same app host.
352357 private string _executionId = Guid . NewGuid ( ) . ToString ( ) ;
353358
354- internal FunctionActivityTracker ( IOptionsMonitor < AppServiceOptions > appServiceOptions , IEventGenerator generator , IMetricsPublisher metricsPublisher , ILinuxContainerActivityPublisher linuxContainerActivityPublisher , int functionActivityFlushInterval )
359+ internal FunctionActivityTracker ( IOptionsMonitor < AppServiceOptions > appServiceOptions , IEventGenerator generator , IMetricsPublisher metricsPublisher , ILinuxContainerActivityPublisher linuxContainerActivityPublisher , int functionActivityFlushInterval , ILogger < MetricsEventManager > logger )
355360 {
356361 MetricsEventGenerator = generator ;
357362 _appServiceOptions = appServiceOptions ;
@@ -367,43 +372,37 @@ internal FunctionActivityTracker(IOptionsMonitor<AppServiceOptions> appServiceOp
367372 _metricsPublisher = metricsPublisher ;
368373 }
369374
370- StartActivityTimer ( ) ;
375+ _activityFlushCounter = _functionActivityFlushInterval ;
376+ _activityTimer = new Timer ( TimerFlush , null , _activityTimerIntervalMS , _activityTimerIntervalMS ) ;
377+
378+ _logger = logger ;
371379 }
372380
373381 internal IEventGenerator MetricsEventGenerator { get ; private set ; }
374382
375- private void StartActivityTimer ( )
383+ private void TimerFlush ( object state )
376384 {
377- Task . Run (
378- async ( ) =>
385+ try
386+ {
387+ // we raise these events every interval as needed
388+ RaiseMetricsPerFunctionEvent ( ) ;
389+
390+ // only raise these events every Nth interval
391+ if ( _activityFlushCounter >= _functionActivityFlushInterval )
379392 {
380- try
381- {
382- int currentSecond = _functionActivityFlushInterval ;
383- while ( ! _etwTaskCancellationSource . Token . IsCancellationRequested )
384- {
385- RaiseMetricsPerFunctionEvent ( ) ;
386-
387- if ( currentSecond >= _functionActivityFlushInterval )
388- {
389- RaiseFunctionMetricEvents ( ) ;
390- currentSecond = 0 ;
391- }
392- else
393- {
394- currentSecond = currentSecond + 1 ;
395- }
396-
397- await Task . Delay ( TimeSpan . FromSeconds ( 1 ) , _etwTaskCancellationSource . Token ) ;
398- }
399- }
400- catch ( TaskCanceledException )
401- {
402- // This exception gets throws when cancellation request is raised via cancellation token.
403- // Let's eat this exception and continue
404- }
405- } ,
406- _etwTaskCancellationSource . Token ) ;
393+ RaiseFunctionMetricEvents ( ) ;
394+ _activityFlushCounter = 0 ;
395+ }
396+ else
397+ {
398+ _activityFlushCounter += 1 ;
399+ }
400+ }
401+ catch ( Exception ex )
402+ {
403+ // log error and continue
404+ _logger . LogError ( ex , "Error occurred when logging function activity" ) ;
405+ }
407406 }
408407
409408 protected virtual void Dispose ( bool disposing )
@@ -412,7 +411,7 @@ protected virtual void Dispose(bool disposing)
412411 {
413412 if ( disposing )
414413 {
415- _etwTaskCancellationSource . Dispose ( ) ;
414+ _activityTimer ? . Dispose ( ) ;
416415 }
417416 _disposed = true ;
418417 }
@@ -450,9 +449,11 @@ internal void FunctionCompleted(FunctionStartedEvent startedEvent)
450449 RaiseFunctionMetricEvent ( startedEvent , _activeFunctionCount , DateTime . UtcNow ) ;
451450 }
452451
453- internal void StopEtwTaskAndRaiseFinishedEvent ( )
452+ internal void StopTimerAndRaiseFinishedEvent ( )
454453 {
455- _etwTaskCancellationSource . Cancel ( ) ;
454+ // stop the timer if it has been started
455+ _activityTimer ? . Change ( Timeout . Infinite , Timeout . Infinite ) ;
456+
456457 RaiseMetricsPerFunctionEvent ( ) ;
457458 }
458459
@@ -474,20 +475,23 @@ private void RaiseFunctionMetricEvents()
474475
475476 // We only need to raise events here for functions that aren't completed.
476477 // Events are raised immediately for completed functions elsewhere.
477- var runningFunctions = new List < FunctionStartedEvent > ( ) ;
478+ FunctionStartedEvent [ ] runningFunctionsSnapshot = null ;
478479 lock ( _runningFunctionsSyncLock )
479480 {
480481 // effectively we're pruning all the completed invocations here
481- runningFunctions = _runningFunctions = _runningFunctions . Where ( p => ! p . Completed ) . ToList ( ) ;
482+ _runningFunctions = _runningFunctions . Where ( p => ! p . Completed ) . ToList ( ) ;
483+
484+ // create a snapshot within the lock so we can enumerate below
485+ runningFunctionsSnapshot = _runningFunctions . ToArray ( ) ;
482486 }
483487
484488 // we calculate concurrency here based on count, since these events are raised
485489 // on a background thread, so we want the actual count for this interval, not
486490 // the current count.
487491 var currentTime = DateTime . UtcNow ;
488- foreach ( var runningFunction in runningFunctions )
492+ foreach ( var runningFunction in runningFunctionsSnapshot )
489493 {
490- RaiseFunctionMetricEvent ( runningFunction , runningFunctions . Count , currentTime ) ;
494+ RaiseFunctionMetricEvent ( runningFunction , runningFunctionsSnapshot . Length , currentTime ) ;
491495 }
492496 }
493497
0 commit comments