2121
2222package com .microsoft .applicationinsights .agent .internal .telemetry ;
2323
24+ import static java .util .Arrays .asList ;
2425import static java .util .Collections .singletonList ;
2526
2627import com .azure .core .http .HttpMethod ;
4950import java .io .IOException ;
5051import java .io .StringWriter ;
5152import java .net .URL ;
52- import java .net .UnknownHostException ;
5353import java .nio .ByteBuffer ;
54- import java .nio .channels .UnresolvedAddressException ;
5554import java .util .ArrayList ;
5655import java .util .HashMap ;
56+ import java .util .HashSet ;
5757import java .util .List ;
5858import java .util .Map ;
59+ import java .util .Set ;
5960import java .util .concurrent .atomic .AtomicBoolean ;
61+ import java .util .concurrent .atomic .AtomicInteger ;
6062import java .util .function .Consumer ;
6163import java .util .zip .GZIPOutputStream ;
6264import javax .annotation .Nullable ;
@@ -74,18 +76,19 @@ public class TelemetryChannel {
7476
7577 private static final AppInsightsByteBufferPool byteBufferPool = new AppInsightsByteBufferPool ();
7678
77- // TODO (heya) should we suppress logging statsbeat telemetry ingestion issues?
78- private static final OperationLogger operationLogger =
79- new OperationLogger (TelemetryChannel .class , "Sending telemetry to the ingestion service" );
80-
81- private static final OperationLogger retryOperationLogger =
82- new OperationLogger (
83- TelemetryChannel .class , "Sending telemetry to the ingestion service (retry)" );
79+ private final OperationLogger operationLogger ;
80+ private final OperationLogger retryOperationLogger ;
8481
8582 // TODO (kryalama) do we still need this AtomicBoolean, or can we use throttling built in to the
8683 // operationLogger?
8784 private final AtomicBoolean friendlyExceptionThrown = new AtomicBoolean ();
8885
86+ private final AtomicInteger statsbeatUnableToReachBreezeCounter = new AtomicInteger ();
87+ // TODO (trask) remove this boolean and shutdown the disk loader for statsbeat instead
88+ private final AtomicBoolean statsbeatHasBeenShutdown = new AtomicBoolean ();
89+
90+ private volatile boolean statsbeatHasReachedBreezeAtLeastOnce ;
91+
8992 @ SuppressWarnings ("CatchAndPrintStackTrace" )
9093 private static ObjectMapper createObjectMapper () {
9194 ObjectMapper mapper = new ObjectMapper ();
@@ -121,6 +124,10 @@ public CompletableResultCode sendRawBytes(
121124 String instrumentationKey ,
122125 Runnable onSuccess ,
123126 Consumer <Boolean > onFailure ) {
127+ if (isStatsbeat && statsbeatHasBeenShutdown .get ()) {
128+ // let it be deleted from disk so that it won't keep getting retried
129+ return CompletableResultCode .ofSuccess ();
130+ }
124131 return internalSend (
125132 singletonList (buffer ), instrumentationKey , onSuccess , onFailure , retryOperationLogger );
126133 }
@@ -137,6 +144,18 @@ public TelemetryChannel(
137144 this .localFileWriter = localFileWriter ;
138145 this .statsbeatModule = statsbeatModule ;
139146 this .isStatsbeat = isStatsbeat ;
147+
148+ if (isStatsbeat ) {
149+ // suppress all logging for statsbeat telemetry failures
150+ operationLogger = OperationLogger .NOOP ;
151+ retryOperationLogger = OperationLogger .NOOP ;
152+ } else {
153+ operationLogger =
154+ new OperationLogger (TelemetryChannel .class , "Sending telemetry to the ingestion service" );
155+ retryOperationLogger =
156+ new OperationLogger (
157+ TelemetryChannel .class , "Sending telemetry to the ingestion service (retry)" );
158+ }
140159 }
141160
142161 public CompletableResultCode send (List <TelemetryItem > telemetryItems ) {
@@ -184,7 +203,7 @@ public CompletableResultCode internalSendByInstrumentationKey(
184203
185204 List <ByteBuffer > encode (List <TelemetryItem > telemetryItems ) throws IOException {
186205
187- if (logger .isDebugEnabled ()) {
206+ if (! isStatsbeat && logger .isDebugEnabled ()) {
188207 StringWriter debug = new StringWriter ();
189208 try (JsonGenerator jg = mapper .createGenerator (debug )) {
190209 writeTelemetryItems (jg , telemetryItems );
@@ -282,6 +301,13 @@ private CompletableResultCode internalSend(
282301 return result ;
283302 }
284303
304+ // not including 401/403/503 in this list because those are commonly returned by proxy servers
305+ // when they are not configured to allow traffic for westus-0
306+ // not including 307/308 in this list because redirects only bubble up to this class if they have
307+ // reached the 10 redirect threshold, in which case they are considered non-retryable exceptions
308+ private static final Set <Integer > RESPONSE_CODES_INDICATING_REACHED_BREEZE =
309+ new HashSet <>(asList (200 , 206 , 402 , 408 , 429 , 439 , 500 ));
310+
285311 private Consumer <HttpResponse > responseHandler (
286312 String instrumentationKey ,
287313 long startTime ,
@@ -296,6 +322,13 @@ private Consumer<HttpResponse> responseHandler(
296322 .subscribe (
297323 body -> {
298324 int statusCode = response .getStatusCode ();
325+ if (isStatsbeat && !statsbeatHasReachedBreezeAtLeastOnce ) {
326+ if (RESPONSE_CODES_INDICATING_REACHED_BREEZE .contains (statusCode )) {
327+ statsbeatHasReachedBreezeAtLeastOnce = true ;
328+ } else {
329+ statsbeatDidNotReachBreeze ();
330+ }
331+ }
299332 switch (statusCode ) {
300333 case 200 : // SUCCESS
301334 operationLogger .recordSuccess ();
@@ -364,19 +397,13 @@ private Consumer<Throwable> errorHandler(
364397 String instrumentationKey , Consumer <Boolean > onFailure , OperationLogger operationLogger ) {
365398
366399 return error -> {
367- if (isStatsbeat
368- && (error instanceof UnknownHostException
369- || error instanceof UnresolvedAddressException )) {
370- // when sending a Statsbeat request and server returns an UnknownHostException, it's
371- // likely that it's using AMPLS. In that case, we use the kill-switch to turn off Statsbeat.
372- statsbeatModule .shutdown ();
373- onFailure .accept (false );
374- return ;
400+ if (isStatsbeat && !statsbeatHasReachedBreezeAtLeastOnce ) {
401+ statsbeatDidNotReachBreeze ();
375402 }
376403
377- // TODO (trask) only log one-time friendly exception if no prior successes
378- if ( !NetworkFriendlyExceptions .logSpecialOneTimeFriendlyException (
379- error , endpointUrl .toString (), friendlyExceptionThrown , logger )) {
404+ if (! isStatsbeat
405+ && !NetworkFriendlyExceptions .logSpecialOneTimeFriendlyException (
406+ error , endpointUrl .toString (), friendlyExceptionThrown , logger )) {
380407 operationLogger .recordFailure (
381408 "Error sending telemetry items: " + error .getMessage (), error );
382409 }
@@ -389,6 +416,21 @@ private Consumer<Throwable> errorHandler(
389416 };
390417 }
391418
419+ private void statsbeatDidNotReachBreeze () {
420+ if (statsbeatUnableToReachBreezeCounter .getAndIncrement () >= 10
421+ && !statsbeatHasBeenShutdown .getAndSet (true )) {
422+ // shutting down statsbeat because it's unlikely that it will ever get through at this point
423+ // some possible reasons:
424+ // * AMPLS
425+ // * proxy that has not been configured to allow westus-0
426+ // * local firewall that has not been configured to allow westus-0
427+ //
428+ // TODO need to figure out a way that statsbeat telemetry can be sent to the same endpoint as
429+ // the customer data for these cases
430+ statsbeatModule .shutdown ();
431+ }
432+ }
433+
392434 private static String getErrorMessageFromPartialSuccessResponse (String body ) {
393435 JsonNode jsonNode ;
394436 try {
0 commit comments