Skip to content

Commit db078cf

Browse files
marcelolynchsemihokur
authored andcommitted
Merged PR 747091: Use full hostname for agents in ADO distributed builds
Hostname resolution stopped working when removing the private subnet we were using to run distributed builds. It turns out that DNS resolution in the "default network" needs the hostnames to be qualified with a domain (https://learn.microsoft.com/en-us/azure/virtual-network/virtual-networks-name-resolution-for-vms-and-role-instances). This PR adds an option (to be set by the AdoBuildRunner) so we can inject the correct hostnames in the build, without changing how this works for CloudBuild (where Dns.GetHostName works) Related work items: #2116072
1 parent d934cd3 commit db078cf

File tree

18 files changed

+88
-54
lines changed

18 files changed

+88
-54
lines changed

.azdo/linux/job-selfhost.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,15 +139,14 @@ jobs:
139139
# - we also disable early worker release to avoid releasing a worker before attachment, which tends to happen
140140
# when the build is highly cached: the intention is to have as much of a distributed build as possible for validation purposes
141141
# Set a 60m timeout so we can catch hangs *and* get logs collected at the same time. Otherwise the whole job will timeout (check 'timeoutInMinutes' above).
142-
timeout --signal 9 60m ./bxl.sh --use-dev --use-adobuildrunner ${{ parameters.BxlCommonArgs }} /logsDirectory:"Out/Logs/${{ parameters.validationName }}" ${{ parameters.bxlExtraArgs }} "/f:tag='test'" /earlyWorkerRelease- /p:BuildXLWorkerAttachTimeoutMin=10 /logToKusto /logToKustoBlobUri:https://adomessages.blob.core.windows.net/adomessages /logToKustoIdentityId:6e0959cf-a9ba-4988-bbf1-7facd9deda51 /logToKustoTenantId:975f013f-7f24-47e8-a7d3-abc4752bf346 /historicMetadataCache-
142+
timeout --signal 9 60m ./bxl.sh --use-dev --use-adobuildrunner ${{ parameters.BxlCommonArgs }} /logsDirectory:"Out/Logs/${{ parameters.validationName }}" ${{ parameters.bxlExtraArgs }} "/f:tag='test'" /earlyWorkerRelease- /p:BuildXLWorkerAttachTimeoutMin=5 /logToKusto /logToKustoBlobUri:https://adomessages.blob.core.windows.net/adomessages /logToKustoIdentityId:6e0959cf-a9ba-4988-bbf1-7facd9deda51 /logToKustoTenantId:975f013f-7f24-47e8-a7d3-abc4752bf346 /historicMetadataCache- /p:BuildXLGrpcVerbosityEnabled=1 /p:BuildXLGrpcVerbosityLevel=1 /dynamicBuildWorkerSlots:1
143143
displayName: Test (${{ parameters.validationName }})
144144
workingDirectory: /home/subst
145145
env:
146146
PAT1esSharedAssets: $(PAT-TseBuild-AzureDevOps-1esSharedAssets-Package-Read)
147147
PATCloudBuild: $(PAT-TseBuild-AzureDevOps-CloudBuild-Packaging-Read)
148148
VSTSPERSONALACCESSTOKEN: $(PAT-TseBuild-AzureDevOps-mseng-buildcache)
149149
SYSTEM_ACCESSTOKEN: $(System.AccessToken)
150-
AdoBuildRunnerWaitForOrchestratorExit: true
151150
AdoBuildRunnerInvocationKey: LinuxSelfhostValidation_${{ parameters.validationName }}
152151
153152
- task: PublishTestResults@2

.azdo/linux/pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ extends:
4141
# Build and test selfhost with BuildXL
4242
- template: /.azdo/linux/job-selfhost.yml@self
4343
parameters:
44-
Distributed: false
44+
Distributed: true
4545
ValidationName: InternalRelease
4646
BxlExtraArgs: --internal /q:ReleaseLinux /forceAddExecutionPermission-
4747

Documentation/Wiki/Flags.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ This page lists flags that can be used to configure BuildXL.
140140
| LogToConsole | Displays the specified messages in the console. |
141141
| LogToKusto | Whether to send log events to Kusto. If enabled, a valid authentication mechanism should be available with enough permissions to write into the blob storage account where logs are piped to Kusto. Use /logToKustoBlobUri:https://{storage-account-name}/{container-name} and /logToKustoIdentityId:{Identity guid} to specify the destination of the log messages. |
142142
| LowPriority | Runs the build engine and all tools at a lower priority in order to provide better responsiveness to interactive processes on the current machine. |
143+
| MachineHostName | Specifies the host name where the machine running the build can be reached. This value should only be overriden by build runners, never by a user. In particular, we need it to be overriddable because on ADO networks the machines are not reachable in the hostname that GetHostName returns, and we need a special suffix that is appended by the AdoBuildRunner. |
143144
| ManageMemoryMode | Specifies the mode to manage memory under pressure. Defaults to CancellationRam where {ShortProductName} attemps to cancel processes. EmptyWorkingSet mode will empty working set of processes instead of cancellation. Suspend mode will suspend processes to free memory. |
144145
| MaskUntrackedAccesses | When enabled, {ShortProductName} does not consider any access under untracked paths or scopes for sake of cache lookup. Defaults to on. |
145146
| MaxCacheLookup | Specifies the maximum number of cache lookups that {ShortProductName} will launch at one time. The default value is three times the number of processors in the current machine. |

Public/Src/App/Bxl/Args.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,9 @@ public bool TryParse(string[] args, PathTable pathTable, out ICommandLineConfigu
790790
OptionHandlerFactory.CreateBoolOption(
791791
"lowPriority",
792792
sign => schedulingConfiguration.LowPriority = sign),
793+
OptionHandlerFactory.CreateOption(
794+
"machineHostName",
795+
opt => distributionConfiguration.MachineHostName = opt.Value),
793796
OptionHandlerFactory.CreateOption(
794797
"manageMemoryMode",
795798
opt => schedulingConfiguration.ManageMemoryMode = CommandLineUtilities.ParseEnumOption<ManageMemoryMode>(opt)),

Public/Src/App/Bxl/HelpText.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,11 @@ public static void DisplayHelp(HelpLevel helpLevel)
956956
Strings.HelpText_DisplayHelp_DistributedBuildOrchestratorLocation,
957957
HelpLevel.Verbose);
958958

959+
hw.WriteOption(
960+
"/machineHostName:<host name>",
961+
Strings.HelpText_DisplayHelp_MachineHostName,
962+
HelpLevel.Verbose);
963+
959964
hw.WriteOption(
960965
"/enableWorkerSourceFileMaterialization[+|-]",
961966
Strings.HelpText_DisplayHelp_DistributedBuildWorkerSourceMaterialization,

Public/Src/App/Bxl/Strings.resx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -504,10 +504,13 @@
504504
<data name="HelpText_DisplayHelp_DistributedBuildBanner" xml:space="preserve">
505505
<value>DISTRIBUTED BUILD</value>
506506
</data>
507-
<data name="HelpText_DisplayHelp_DistributedBuildOrchestratorLocation" xml:space="preserve">
507+
<data name="HelpText_DisplayHelp_DistributedBuildOrchestratorLocation" xml:space="preserve">
508508
<value>Specifies the IP address or host name and TCP port of the orchestrator machine to which a worker will connect to join a build session. This argument is redundant if the orchestratro is invoked with /distributedBuildWorker specified for this worker. (short form: /dbo)</value>
509509
</data>
510-
<data name="HelpText_DisplayHelp_DistributedBuildWorker" xml:space="preserve">
510+
<data name="HelpText_DisplayHelp_MachineHostName" xml:space="preserve">
511+
<value>Specifies the host name where the machine running the build can be reached. This value should only be overriden by build runners, never by a user. In particular, we need it to be overriddable because on ADO networks the machines are not reachable in the hostname that GetHostName returns, and we need a special suffix that is appended by the AdoBuildRunner.</value>
512+
</data>
513+
<data name="HelpText_DisplayHelp_DistributedBuildWorker" xml:space="preserve">
511514
<value>Specifies the IP address or host name and TCP port of remote worker build services which this process can dispatch work to during a distributed build (can specify multiple). This argument is redundant if the corresponding worker is invoked with /distributedBuildOrchestratorLocation specified. (short form: /dbw)</value>
512515
</data>
513516
<data name="HelpText_DisplayHelp_DistributedBuildServicePort" xml:space="preserve">

Public/Src/Engine/Dll/Distribution/DistributionHelpers.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,6 @@ public static ArraySegment<byte> ToArraySegmentByte(this ByteString byteString)
7474
return new ArraySegment<byte>(byteString.ToByteArray());
7575
}
7676

77-
internal static string GetServiceName(int port)
78-
{
79-
return GetServiceName(System.Net.Dns.GetHostName(), port);
80-
}
81-
8277
internal static string GetServiceName(string ipAddress, int port)
8378
{
8479
return string.Format(CultureInfo.InvariantCulture, "{0}::{1}", ipAddress, port);

Public/Src/Engine/Dll/Distribution/OrchestratorService.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ internal ExecutionResultSerializer ResultSerializer
5959
}
6060
}
6161

62+
internal readonly string Hostname;
63+
6264
private readonly RemoteWorker[] m_remoteWorkers;
6365
private readonly LoggingContext m_loggingContext;
6466

@@ -76,6 +78,8 @@ public OrchestratorService(IDistributionConfiguration config, LoggingContext log
7678
{
7779
Contract.Requires(config != null && config.BuildRole.IsOrchestrator());
7880

81+
Hostname = config.MachineHostName;
82+
7983
// Create all remote workers
8084
m_buildServicePort = config.BuildServicePort;
8185
m_remoteWorkers = new RemoteWorker[config.RemoteWorkerCount];
@@ -381,7 +385,7 @@ public bool Hello(ServiceLocation workerLocation)
381385
{
382386
lock (m_remoteWorkers)
383387
{
384-
if (m_remoteWorkers.Any(rw => rw.Location.IpAddress == workerLocation.IpAddress && rw.Location.Port == workerLocation.Port))
388+
if (m_remoteWorkers.Any(rw => rw.Location?.IpAddress == workerLocation.IpAddress && rw.Location?.Port == workerLocation.Port))
385389
{
386390
// We already know this worker (presumably, from the command line).
387391
// Just acknowledge the RPC.

Public/Src/Engine/Dll/Distribution/RemoteWorker.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ private async Task<bool> TryAttachAsync()
442442
FingerprintSalt = m_orchestratorService.Environment.ContentFingerprinter.FingerprintSalt,
443443
OrchestratorLocation = new ServiceLocation
444444
{
445-
IpAddress = Dns.GetHostName(),
445+
IpAddress = m_orchestratorService.Hostname,
446446
Port = m_orchestratorService.Port,
447447
},
448448
};

Public/Src/Engine/Dll/Distribution/WorkerService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ async Task IWorkerService.SayHelloAsync(IDistributionServiceLocation orchestrato
303303
m_orchestratorInitialized = true;
304304
m_orchestratorClient.Initialize(orchestratorLocation.IpAddress, orchestratorLocation.BuildServicePort, OnConnectionFailureAsync);
305305

306-
var helloResult = await m_orchestratorClient.SayHelloAsync(new ServiceLocation() { IpAddress = Dns.GetHostName(), Port = m_port });
306+
var helloResult = await m_orchestratorClient.SayHelloAsync(new ServiceLocation() { IpAddress = m_config.Distribution.MachineHostName, Port = m_port });
307307
if (!helloResult.Succeeded)
308308
{
309309
// If we can't say hello there is no hope for attachment

0 commit comments

Comments
 (0)