Skip to content

Commit 42701b8

Browse files
Agent CDN - Added Warning in Initialize Phase when new Agent CDN is not reachable - AB#2241315 (#5202)
* Agent CDN - Added Warning in Initialize Phase when new Agent CDN is not reachable * Added AgentCDNAccessStatus Telemetry Event
1 parent ce1182e commit 42701b8

File tree

4 files changed

+105
-1
lines changed

4 files changed

+105
-1
lines changed

src/Agent.Sdk/Knob/AgentKnobs.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,13 +786,21 @@ public class AgentKnobs
786786
"Timeout for channel communication between agent listener and worker processes.",
787787
new EnvironmentKnobSource("PIPELINE_ARTIFACT_ASSOCIATE_TIMEOUT"),
788788
new BuiltInDefaultKnobSource("900")); // 15 * 60 - Setting the timeout to 15 minutes to account for slowness from azure storage and retries.
789-
789+
790790
public static readonly Knob RollbackToDefaultTfExe = new Knob(
791791
nameof(RollbackToDefaultTfExe),
792792
"If true, the agent will install the default versions of TF, vstsom",
793793
new RuntimeKnobSource("ROLLBACK_TO_DEFAULT_TF_EXE"),
794794
new EnvironmentKnobSource("ROLLBACK_TO_DEFAULT_TF_EXE"),
795795
new PipelineFeatureSource("RollbackToDefaultTfExe"),
796796
new BuiltInDefaultKnobSource("false"));
797+
798+
public static readonly Knob AgentCDNConnectivityFailWarning = new Knob(
799+
nameof(AgentCDNConnectivityFailWarning),
800+
"Show warning message when the Agent CDN Endpoint (download.agent.dev.azure.com) is not reachable. ",
801+
new RuntimeKnobSource("AGENT_CDN_CONNECTIVITY_FAIL_WARNING"),
802+
new EnvironmentKnobSource("AGENT_CDN_CONNECTIVITY_FAIL_WARNING"),
803+
new PipelineFeatureSource("AgentCDNConnectivityFailWarning"),
804+
new BuiltInDefaultKnobSource("false"));
797805
}
798806
}

src/Agent.Sdk/Util/PlatformUtil.cs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
using Newtonsoft.Json;
1919
using System.ServiceProcess;
2020
using Agent.Sdk.Util;
21+
using System.Net.Http;
22+
using System.Net;
2123

2224
namespace Agent.Sdk
2325
{
@@ -438,6 +440,47 @@ public static bool DetectAzureVM()
438440
}
439441
return isAzureVM;
440442
}
443+
444+
// The URL of the agent package hosted on Azure CDN
445+
private const string _agentPackageUri = "https://download.agent.dev.azure.com/agent/4.252.0/vsts-agent-win-x64-4.252.0.zip";
446+
447+
#nullable enable
448+
/// <summary>
449+
/// Checks if the agent CDN endpoint is accessible by sending an HTTP HEAD request.
450+
/// </summary>
451+
/// <param name="webProxy">
452+
/// Optional <see cref="IWebProxy"/> to route the request through a proxy. If null, the system default proxy settings are used.
453+
/// </param>
454+
/// <remarks>
455+
/// - Returns <c>true</c> if the endpoint responds with a successful (2xx) status code.
456+
/// - Returns <c>false</c> if the endpoint responds with a non-success status code (4xx, 5xx).
457+
/// - Throws exceptions (e.g., timeout, DNS failure) if the request cannot be completed.
458+
/// - Uses a 5-second timeout to avoid hanging.
459+
/// - All HTTP resources are properly disposed after the request completes.
460+
/// </remarks>
461+
/// <returns><c>true</c> if the endpoint is reachable and returns success; otherwise, <c>false</c>.</returns>
462+
public static async Task<bool> IsAgentCdnAccessibleAsync(IWebProxy? webProxy = null)
463+
{
464+
// Configure the HttpClientHandler with the proxy if provided
465+
using HttpClientHandler handler = new()
466+
{
467+
Proxy = webProxy,
468+
UseProxy = webProxy is not null
469+
};
470+
handler.CheckCertificateRevocationList = true; // Check for certificate revocation
471+
using HttpClient httpClient = new(handler);
472+
473+
// Construct a HEAD request to avoid downloading the full file
474+
using HttpRequestMessage request = new(HttpMethod.Head, _agentPackageUri);
475+
476+
// Apply a 5-second timeout to prevent hanging
477+
using CancellationTokenSource cts = new(TimeSpan.FromSeconds(5));
478+
479+
// Send the request and return whether the response status indicates success
480+
HttpResponseMessage response = await httpClient.SendAsync(request, cts.Token);
481+
return response.IsSuccessStatusCode;
482+
}
483+
#nullable disable
441484
}
442485

443486
#pragma warning disable CS0659 // Type overrides Object.Equals(object o) but does not override Object.GetHashCode()

src/Agent.Worker/JobExtension.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
using Agent.Sdk.Knob;
1717
using Newtonsoft.Json;
1818
using Microsoft.VisualStudio.Services.Agent.Worker.Telemetry;
19+
using Microsoft.Identity.Client.TelemetryCore.TelemetryClient;
1920

2021
namespace Microsoft.VisualStudio.Services.Agent.Worker
2122
{
@@ -230,6 +231,32 @@ public async Task<List<IStep>> InitializeJob(IExecutionContext jobContext, Pipel
230231
}
231232
}
232233

234+
// Check if the Agent CDN is accessible
235+
if (AgentKnobs.AgentCDNConnectivityFailWarning.GetValue(context).AsBoolean())
236+
{
237+
try
238+
{
239+
Trace.Verbose("Checking if the Agent CDN Endpoint (download.agent.dev.azure.com) is reachable");
240+
bool isAgentCDNAccessible = await PlatformUtil.IsAgentCdnAccessibleAsync(agentWebProxy.WebProxy);
241+
if (isAgentCDNAccessible)
242+
{
243+
context.Output("Agent CDN is accessible.");
244+
}
245+
else
246+
{
247+
context.Warning(StringUtil.Loc("AgentCdnAccessFailWarning"));
248+
}
249+
PublishAgentCDNAccessStatusTelemetry(context, isAgentCDNAccessible);
250+
}
251+
catch (Exception ex)
252+
{
253+
// Handles network-level or unexpected exceptions (DNS failure, timeout, etc.)
254+
context.Warning(StringUtil.Loc("AgentCdnAccessFailWarning"));
255+
PublishAgentCDNAccessStatusTelemetry(context, false);
256+
Trace.Error($"Exception when attempting a HEAD request to Agent CDN: {ex}");
257+
}
258+
}
259+
233260
if (PlatformUtil.RunningOnWindows)
234261
{
235262
// This is for internal testing and is not publicly supported. This will be removed from the agent at a later time.
@@ -755,6 +782,31 @@ private void PublishKnobsInfo(IExecutionContext jobContext)
755782
PublishTelemetry(jobContext, telemetryData, "KnobsStatus");
756783
}
757784

785+
private void PublishAgentCDNAccessStatusTelemetry(IExecutionContext context, bool isAgentCDNAccessible)
786+
{
787+
try
788+
{
789+
var telemetryData = new Dictionary<string, string>
790+
{
791+
["JobId"] = context?.Variables?.System_JobId?.ToString() ?? string.Empty,
792+
["isAgentCDNAccessible"] = isAgentCDNAccessible.ToString()
793+
};
794+
795+
var cmd = new Command("telemetry", "publish")
796+
{
797+
Data = JsonConvert.SerializeObject(telemetryData)
798+
};
799+
cmd.Properties["area"] = "PipelinesTasks";
800+
cmd.Properties["feature"] = "CDNConnectivityCheck";
801+
802+
PublishTelemetry(context, telemetryData, "AgentCDNAccessStatus");
803+
}
804+
catch (Exception ex)
805+
{
806+
Trace.Verbose($"Ignoring exception during 'AgentCDNAccessStatus' telemetry publish: '{ex.Message}'");
807+
}
808+
}
809+
758810
private void PublishTelemetry(IExecutionContext context, Dictionary<string, string> telemetryData, string feature)
759811
{
760812
try

src/Misc/layoutbin/en-US/strings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"AddEnvironmentVMResourceTags": "Environment Virtual Machine resource tags? (Y/N)",
1111
"AgentAddedSuccessfully": "Successfully added the agent",
1212
"AgentAlreadyInsideContainer": "Container feature is not supported when agent is already running inside container. Please reference documentation (https://go.microsoft.com/fwlink/?linkid=875268)",
13+
"AgentCdnAccessFailWarning": "Action Required: Azure Pipelines Agent cannot reach the new CDN URL. Allowlist 'download.agent.dev.azure.com' now to prevent pipeline failures. Details: https://devblogs.microsoft.com/devops/cdn-domain-url-change-for-agents-in-pipelines/",
1314
"AgentDoesNotSupportContainerFeatureRhel6": "Agent does not support the container feature on Red Hat Enterprise Linux 6 or CentOS 6.",
1415
"AgentDowngrade": "Downgrading agent to a lower version. This is usually due to a rollback of the currently published agent for a bug fix. To disable this behavior, set environment variable AZP_AGENT_DOWNGRADE_DISABLED=true before launching your agent.",
1516
"AgentExit": "Agent will exit shortly for update, should back online within 10 seconds.",

0 commit comments

Comments
 (0)