Skip to content

Commit 2e17f06

Browse files
chore(e2e): Added checks to userdata scripts to reduce setup flakiness (#5991)
* chore(e2e): Added checks to userdata scripts to reduce setup flakiness
1 parent 41e5cac commit 2e17f06

File tree

3 files changed

+237
-85
lines changed
  • enos/modules

3 files changed

+237
-85
lines changed

enos/modules/aws_rdp_member_server/main.tf

Lines changed: 108 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,56 @@ resource "aws_instance" "member_server" {
6161

6262
user_data = <<EOF
6363
<powershell>
64+
# set variables for retry loops
65+
$timeout = 300
66+
$interval = 30
67+
6468
# Set up SSH so we can remotely manage the instance
6569
## Install OpenSSH Server and Client
66-
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
67-
Set-Service -Name sshd -StartupType 'Automatic'
68-
Start-Service sshd
69-
70-
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
71-
Set-Service -Name ssh-agent -StartupType Automatic
72-
Start-Service ssh-agent
70+
# Loop to make sure that SSH installs correctly
71+
$elapsed = 0
72+
do {
73+
try {
74+
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
75+
Set-Service -Name sshd -StartupType 'Automatic'
76+
Start-Service sshd
77+
$result = Get-Process -Name "sshd" -ErrorAction SilentlyContinue
78+
if ($result) {
79+
Write-Host "Successfully added and started openSSH server"
80+
break
81+
}
82+
} catch {
83+
Write-Host "SSH server was not installed, retrying"
84+
Start-Sleep -Seconds $interval
85+
$elapsed += $interval
86+
}
87+
if ($elapsed -ge $timeout) {
88+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
89+
exit 1
90+
}
91+
} while ($true)
92+
93+
$elapsed = 0
94+
do {
95+
try {
96+
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
97+
Set-Service -Name ssh-agent -StartupType Automatic
98+
Start-Service ssh-agent
99+
$result = Get-Process -Name "ssh-agent" -ErrorAction SilentlyContinue
100+
if ($result) {
101+
Write-Host "Successfully added and started openSSH agent"
102+
break
103+
}
104+
} catch {
105+
Write-Host "SSH server was not installed, retrying"
106+
Start-Sleep -Seconds $interval
107+
$elapsed += $interval
108+
}
109+
if ($elapsed -ge $timeout) {
110+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
111+
exit 1
112+
}
113+
} while ($true)
73114
74115
## Set PowerShell as the default SSH shell
75116
New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value (Get-Command powershell.exe).Path -PropertyType String -Force
@@ -91,45 +132,68 @@ resource "aws_instance" "member_server" {
91132
92133
# Adds member server to the domain
93134
[int]$intix = Get-NetAdapter | % { Process { If ( $_.Status -eq "up" ) { $_.ifIndex } }}
94-
Set-DNSClientServerAddress -interfaceIndex $intix -ServerAddresses ("${var.domain_controller_ip}","127.0.0.1")
95-
$here_string_password = @'
135+
Set-DNSClientServerAddress -interfaceIndex $intix -ServerAddresses ("${var.domain_controller_ip}","127.0.0.1")
136+
$here_string_password = @'
96137
${var.domain_admin_password}
97138
'@
98-
$password = ConvertTo-SecureString $here_string_password -AsPlainText -Force
99-
$username = "${local.domain_sld}\Administrator"
100-
$credential = New-Object System.Management.Automation.PSCredential($username,$password)
101-
102-
# check that domain can be reached
103-
$timeout = 300
104-
$interval = 10
105-
$elapsed = 0
106-
107-
do {
108-
try {
109-
$result = Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction Stop
110-
if ($result) {
111-
Write-Host "DNS resolved successfully."
112-
break
113-
}
114-
} catch {
115-
Write-Host "DNS not resolved yet. Retrying in $interval seconds..."
116-
Start-Sleep -Seconds $interval
117-
$elapsed += $interval
118-
}
119-
if ($elapsed -ge $timeout) {
120-
Write-Host "DNS resolution failed after 5 minutes. Exiting."
121-
exit 1
122-
}
123-
} while ($true)
124-
125-
# add computer to domain
126-
Add-Computer -DomainName "${var.active_directory_domain}" -Credential $credential
127-
128-
# Enable audio
129-
Set-Service -Name "Audiosrv" -StartupType Automatic
130-
Start-Service -Name "Audiosrv"
131-
132-
Restart-Computer -Force
139+
$password = ConvertTo-SecureString $here_string_password -AsPlainText -Force
140+
$username = "${local.domain_sld}\Administrator"
141+
$credential = New-Object System.Management.Automation.PSCredential($username,$password)
142+
143+
# check that domain can be reached
144+
$timeout = 300
145+
$interval = 10
146+
$elapsed = 0
147+
148+
# check that domain can be reached
149+
do {
150+
try {
151+
Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction Stop
152+
Write-Host "resolved domain successfully."
153+
break
154+
} catch {
155+
Write-Host "Could not resolve domain. Retrying in $interval seconds..."
156+
Start-Sleep -Seconds $interval
157+
$elapsed += $interval
158+
}
159+
if ($elapsed -ge $timeout) {
160+
Write-Host "Resovling domain after 5 minutes. Exiting."
161+
exit 1
162+
}
163+
} while ($true)
164+
165+
#logging to troubleshoot domain issues
166+
Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction SilentlyContinue
167+
Get-Service -Name LanmanWorkstation, Netlogon, RpcSs | Select-Object Name, DisplayName, Status
168+
169+
# Add computer to domain
170+
$elapsed = 0
171+
do {
172+
try {
173+
Add-Computer -DomainName "${var.active_directory_domain}" -Credential $credential
174+
$result = (Get-WmiObject Win32_ComputerSystem).Domain
175+
if ($result -ne "WORKGROUP") {
176+
Write-Host "Added to domain successfully."
177+
break
178+
}
179+
} catch {
180+
Write-Host "Could not add to domain. Retrying in $interval seconds..."
181+
Start-Sleep -Seconds $interval
182+
$elapsed += $interval
183+
}
184+
if ($elapsed -ge $timeout) {
185+
Write-Host "Adding to domain after 5 minutes. Exiting."
186+
exit 1
187+
}
188+
} while ($true)
189+
# Logging to determine domain and ssh state for debugging
190+
(Get-WmiObject Win32_ComputerSystem).Domain
191+
Get-Process -Name *ssh* -ErrorAction SilentlyContinue
192+
193+
# Enable audio
194+
Set-Service -Name "Audiosrv" -StartupType Automatic
195+
Start-Service -Name "Audiosrv"
196+
Restart-Computer -Force
133197
</powershell>
134198
EOF
135199

enos/modules/aws_rdp_member_server_with_worker/main.tf

Lines changed: 81 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -87,36 +87,56 @@ resource "aws_instance" "worker" {
8787

8888
user_data = <<EOF
8989
<powershell>
90+
# set variables for retry loops
91+
$timeout = 300
92+
$interval = 30
93+
9094
# Set up SSH so we can remotely manage the instance
9195
## Install OpenSSH Server and Client
92-
$timeout = 300
93-
$interval = 10
94-
# Loop to make sure that SSH installs correctly
96+
# Loop to make sure that SSH installs correctly
97+
$elapsed = 0
9598
do {
96-
try {
97-
$result = Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
99+
try {
100+
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
101+
Set-Service -Name sshd -StartupType 'Automatic'
102+
Start-Service sshd
103+
$result = Get-Process -Name "sshd" -ErrorAction SilentlyContinue
98104
if ($result) {
99-
Write-Host "Successfully added openSSH server"
105+
Write-Host "Successfully added and started openSSH server"
100106
break
101107
}
102-
} catch {
103-
Write-Host "SSH server was not installed, retrying"
104-
Start-Sleep -Seconds $interval
105-
$elapsed += $interval
106-
}
107-
if ($elapsed -ge $timeout) {
108-
Write-Host "SSH server installation failed after 5 minutes. Exiting."
109-
exit 1
110-
}
108+
} catch {
109+
Write-Host "SSH server was not installed, retrying"
110+
Start-Sleep -Seconds $interval
111+
$elapsed += $interval
112+
}
113+
if ($elapsed -ge $timeout) {
114+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
115+
exit 1
116+
}
111117
} while ($true)
112118
113-
114-
Set-Service -Name sshd -StartupType 'Automatic'
115-
Start-Service sshd
116-
117-
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
118-
Set-Service -Name ssh-agent -StartupType Automatic
119-
Start-Service ssh-agent
119+
$elapsed = 0
120+
do {
121+
try {
122+
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
123+
Set-Service -Name ssh-agent -StartupType Automatic
124+
Start-Service ssh-agent
125+
$result = Get-Process -Name "ssh-agent" -ErrorAction SilentlyContinue
126+
if ($result) {
127+
Write-Host "Successfully added and started openSSH agent"
128+
break
129+
}
130+
} catch {
131+
Write-Host "SSH server was not installed, retrying"
132+
Start-Sleep -Seconds $interval
133+
$elapsed += $interval
134+
}
135+
if ($elapsed -ge $timeout) {
136+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
137+
exit 1
138+
}
139+
} while ($true)
120140
121141
# Set PowerShell as the default SSH shell
122142
New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value (Get-Command powershell.exe).Path -PropertyType String -Force
@@ -152,24 +172,51 @@ ${var.domain_admin_password}
152172
$elapsed = 0
153173
do {
154174
try {
155-
$result = Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction Stop
156-
if ($result) {
157-
Write-Host "DNS resolved successfully."
175+
Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction Stop
176+
Write-Host "resolved domain successfully."
177+
break
178+
} catch {
179+
Write-Host "Could not resolve domain. Retrying in $interval seconds..."
180+
Start-Sleep -Seconds $interval
181+
$elapsed += $interval
182+
}
183+
if ($elapsed -ge $timeout) {
184+
Write-Host "Resolving domain after 5 minutes. Exiting."
185+
exit 1
186+
}
187+
} while ($true)
188+
189+
#logging to troubleshoot domain issues
190+
Resolve-DnsName -Name "${var.active_directory_domain}" -Server "${var.domain_controller_ip}" -ErrorAction SilentlyContinue
191+
Get-Service -Name LanmanWorkstation, Netlogon, RpcSs | Select-Object Name, DisplayName, Status
192+
193+
194+
$timeout = 900
195+
$interval = 30
196+
# Add computer to domain
197+
$elapsed = 0
198+
do {
199+
try {
200+
Add-Computer -DomainName "${var.active_directory_domain}" -Credential $credential
201+
$result = (Get-WmiObject Win32_ComputerSystem).Domain
202+
if ($result -ne "WORKGROUP") {
203+
Write-Host "Added to domain successfully."
158204
break
159205
}
160-
} catch {
161-
Write-Host "DNS not resolved yet. Retrying in $interval seconds..."
206+
} catch {
207+
Write-Host "Could not add to domain. Retrying in $interval seconds..."
162208
Start-Sleep -Seconds $interval
163209
$elapsed += $interval
164-
}
165-
if ($elapsed -ge $timeout) {
166-
Write-Host "DNS resolution failed after 5 minutes. Exiting."
167-
exit 1
168-
}
210+
}
211+
if ($elapsed -ge $timeout) {
212+
Write-Host "Adding to domain after 5 minutes. Exiting."
213+
exit 1
214+
}
169215
} while ($true)
170216
171-
# add computer to domain
172-
Add-Computer -DomainName "${var.active_directory_domain}" -Credential $credential
217+
# Logging to determine domain and ssh state for debugging
218+
(Get-WmiObject Win32_ComputerSystem).Domain
219+
Get-Process -Name *ssh* -ErrorAction SilentlyContinue
173220
</powershell>
174221
EOF
175222

enos/modules/aws_windows_client/main.tf

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,56 @@ resource "aws_instance" "client" {
154154

155155
user_data = <<EOF
156156
<powershell>
157+
# set variables for retry loops
158+
$timeout = 300
159+
$interval = 30
160+
157161
# Set up SSH so we can remotely manage the instance
158162
## Install OpenSSH Server and Client
159-
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
160-
Set-Service -Name sshd -StartupType 'Automatic'
161-
Start-Service sshd
162-
163-
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
164-
Set-Service -Name ssh-agent -StartupType Automatic
165-
Start-Service ssh-agent
163+
# Loop to make sure that SSH installs correctly
164+
$elapsed = 0
165+
do {
166+
try {
167+
Add-WindowsCapability -Online -Name OpenSSH.Server~~~~0.0.1.0
168+
Set-Service -Name sshd -StartupType 'Automatic'
169+
Start-Service sshd
170+
$result = Get-Process -Name "sshd" -ErrorAction SilentlyContinue
171+
if ($result) {
172+
Write-Host "Successfully added and started openSSH server"
173+
break
174+
}
175+
} catch {
176+
Write-Host "SSH server was not installed, retrying"
177+
Start-Sleep -Seconds $interval
178+
$elapsed += $interval
179+
}
180+
if ($elapsed -ge $timeout) {
181+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
182+
exit 1
183+
}
184+
} while ($true)
185+
186+
$elapsed = 0
187+
do {
188+
try {
189+
Add-WindowsCapability -Online -Name OpenSSH.Client~~~~0.0.1.0
190+
Set-Service -Name ssh-agent -StartupType Automatic
191+
Start-Service ssh-agent
192+
$result = Get-Process -Name "ssh-agent" -ErrorAction SilentlyContinue
193+
if ($result) {
194+
Write-Host "Successfully added and started openSSH agent"
195+
break
196+
}
197+
} catch {
198+
Write-Host "SSH server was not installed, retrying"
199+
Start-Sleep -Seconds $interval
200+
$elapsed += $interval
201+
}
202+
if ($elapsed -ge $timeout) {
203+
Write-Host "SSH server installation failed after 5 minutes. Exiting."
204+
exit 1
205+
}
206+
} while ($true)
166207
167208
## Set PowerShell as the default SSH shell
168209
New-ItemProperty -Path "HKLM:\SOFTWARE\OpenSSH" -Name DefaultShell -Value (Get-Command powershell.exe).Path -PropertyType String -Force

0 commit comments

Comments
 (0)