Skip to content

Commit f756bad

Browse files
authored
handle empty agent config in Go client and fix build failures (#1338)
* handle empty agent config in Go client * handle empty agent config in Go client * add retrycount on task failure for build * add backup trivy db and retry * use cached buildx image * add env variables for trivy backup dbs * add env variables for trivy backup dbs * implement trivy db and javadb fallback mechanism * fix windows image flaky build failures * fix windows image flaky build failures * fix windows image flaky build failures * fix windows image flaky build failures * fix windows image flaky build failures * revert changes
1 parent 2b37fbb commit f756bad

File tree

3 files changed

+114
-19
lines changed

3 files changed

+114
-19
lines changed

.pipelines/azure_pipeline_mergedbranches.yaml

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ jobs:
7878
7979
cd $(Build.SourcesDirectory)/deployment/arc-k8s-extension/ServiceGroupRoot/Scripts
8080
tar -czvf ../artifacts.tar.gz ../../../../charts/azuremonitor-containers/ pushChartToAcr.sh
81-
81+
8282
cd $(Build.SourcesDirectory)/deployment/arc-k8s-extension-release-v2/ServiceGroupRoot/Scripts
8383
tar -czvf ../artifacts.tar.gz arcExtensionRelease.sh
8484
@@ -95,7 +95,7 @@ jobs:
9595
9696
- task: CredScan@3
9797
displayName: "SDL : Run credscan"
98-
98+
9999
- task: CopyFiles@2
100100
displayName: "Copy ev2 deployment artifacts"
101101
inputs:
@@ -140,7 +140,7 @@ jobs:
140140
inputs:
141141
pathToPublish: '$(Build.ArtifactStagingDirectory)'
142142
artifactName: drop
143-
143+
144144
- task: Armory@2
145145
displayName: 'Run ARMory'
146146
inputs:
@@ -175,6 +175,7 @@ jobs:
175175
176176
sudo apt-get update && sudo apt-get -y install qemu binfmt-support qemu-user-static
177177
docker system prune --all -f
178+
docker images -q --filter "dangling=true" | xargs docker rmi
178179
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
179180
180181
docker buildx create --name testbuilder
@@ -185,6 +186,11 @@ jobs:
185186
az account set -s ${{ variables.subscription }}
186187
az acr login -n ${{ variables.containerRegistry }}
187188
189+
# NOTE: Using the prometheus-collector team's cached buildx image since moby/buildkit:buildx-stable-1 getting throttled
190+
docker pull mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1
191+
docker buildx create --name dockerbuilder --driver docker-container --driver-opt image=mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1 --use
192+
docker buildx inspect --bootstrap
193+
188194
if [ "$(Build.Reason)" != "PullRequest" ]; then
189195
docker buildx build --platform $(BUILD_PLATFORMS) --tag ${{ variables.repoImageName }}:$(linuxImagetag) -f kubernetes/linux/Dockerfile.multiarch --metadata-file $(Build.ArtifactStagingDirectory)/linux/metadata.json --build-arg IMAGE_TAG=$(linuxTelemetryTag) --build-arg GOLANG_BASE_IMAGE=$(GOLANG_BASE_IMAGE) --build-arg CI_BASE_IMAGE=$(CI_BASE_IMAGE) --push --provenance=false .
190196
@@ -274,8 +280,54 @@ jobs:
274280
scriptLocation: inlineScript
275281
inlineScript: |
276282
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
283+
PRIMARY_TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db"
284+
SECONDARY_TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db"
285+
PRIMARY_TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db"
286+
SECONDARY_TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db"
287+
288+
# Set initial repositories to primary
289+
export TRIVY_DB_REPOSITORY=$PRIMARY_TRIVY_DB_REPOSITORY
290+
export TRIVY_JAVA_DB_REPOSITORY=$PRIMARY_TRIVY_JAVA_DB_REPOSITORY
291+
292+
# Function to run Trivy scan and handle output
293+
run_trivy_scan() {
294+
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM "${{ variables.repoImageName }}:$(linuxImagetag)" > trivy_output.log 2>&1
295+
return $?
296+
}
277297
278-
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 ${{ variables.repoImageName }}:$(linuxImagetag)
298+
# Attempt scan up to 5 times with repository fallback
299+
for i in {1..5}; do
300+
echo "Running Trivy scan attempt $i..."
301+
302+
# Run the Trivy scan and capture exit code
303+
run_trivy_scan
304+
TRIVY_EXIT_CODE=$?
305+
306+
# Check if scan was successful
307+
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
308+
echo "Trivy scan succeeded."
309+
cat trivy_output.log
310+
break
311+
fi
312+
313+
# If the first attempt fails, switch to secondary repositories
314+
if [ $i -eq 1 ]; then
315+
echo "Primary repositories failed with an error. Switching to secondary repositories."
316+
export TRIVY_DB_REPOSITORY=$SECONDARY_TRIVY_DB_REPOSITORY
317+
export TRIVY_JAVA_DB_REPOSITORY=$SECONDARY_TRIVY_JAVA_DB_REPOSITORY
318+
fi
319+
320+
# Log and wait before retrying if an error occurred
321+
echo "Error: Trivy scan attempt $i failed. Retrying... ($i/5)"
322+
cat trivy_output.log
323+
sleep 5 # Wait 5 seconds before retrying
324+
done
325+
326+
# Final check: if still failing after 5 attempts, exit with error
327+
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
328+
echo "Error: Trivy scan failed after 5 retries."
329+
exit 1
330+
fi
279331
280332
# Find in cache or download a specific version of Go and add it to the PATH.
281333
- task: GoTool@0
@@ -287,7 +339,7 @@ jobs:
287339
ls
288340
make
289341
displayName: 'Execute Makefile for Linux Build'
290-
342+
291343
- task: CodeQL3000Finalize@0
292344
condition: eq(variables.IS_MAIN_BRANCH, true)
293345

@@ -355,6 +407,7 @@ jobs:
355407
azureSubscription: ${{ variables.armServiceConnectionName }}
356408
scriptType: ps
357409
scriptLocation: inlineScript
410+
retryCountOnTaskFailure: 2
358411
inlineScript: |
359412
mkdir -p $(Build.ArtifactStagingDirectory)/windows
360413
cd kubernetes/windows
@@ -398,7 +451,7 @@ jobs:
398451
399452
echo "Extract fluent-bit"
400453
docker cp signingContainer:C:\opt\fluent-bit .
401-
454+
402455
echo "Extract Ruby"
403456
docker cp signingContainer:C:\ruby31 .
404457
@@ -525,7 +578,7 @@ jobs:
525578
targetType: 'inline'
526579
script: |
527580
docker create --name pushContainer ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2019BaseImageVersion)-unsigned
528-
581+
529582
echo "Copy Signed binaries/folders back to docker image..."
530583
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.exe pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.exe
531584
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.dll pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.dll
@@ -588,6 +641,7 @@ jobs:
588641
azureSubscription: ${{ variables.armServiceConnectionName }}
589642
scriptType: ps
590643
scriptLocation: inlineScript
644+
retryCountOnTaskFailure: 2
591645
inlineScript: |
592646
mkdir -p $(Build.ArtifactStagingDirectory)/windows
593647
cd kubernetes/windows
@@ -631,7 +685,7 @@ jobs:
631685
632686
echo "Extract fluent-bit"
633687
docker cp signingContainer:C:\opt\fluent-bit .
634-
688+
635689
echo "Extract Ruby"
636690
docker cp signingContainer:C:\ruby31 .
637691
@@ -758,7 +812,7 @@ jobs:
758812
targetType: 'inline'
759813
script: |
760814
docker create --name pushContainer ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2022BaseImageVersion)-unsigned
761-
815+
762816
echo "Copy Signed binaries/folders back to docker image..."
763817
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.exe pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.exe
764818
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.dll pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.dll
@@ -907,7 +961,7 @@ jobs:
907961
inputs:
908962
pathToPublish: '$(Build.ArtifactStagingDirectory)'
909963
artifactName: drop
910-
964+
911965
- task: AntiMalware@4
912966
displayName: 'Run MpCmdRun.exe'
913967
inputs:

kubernetes/linux/Dockerfile.multiarch

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,29 @@ RUN ln -s /lib/libnssckbi.so /lib/p11-kit-trust.so
141141
FROM distroless_image AS vulnscan
142142
COPY .trivyignore .trivyignore
143143
RUN ["/bin/bash", "-c", "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.39.0"]
144+
145+
# Set up primary and secondary repository URLs
146+
ENV PRIMARY_TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db"
147+
ENV SECONDARY_TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db"
148+
ENV PRIMARY_TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db"
149+
ENV SECONDARY_TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db"
150+
151+
# Download Trivy main database with a fallback mechanism
152+
RUN export TRIVY_DB_REPOSITORY=$PRIMARY_TRIVY_DB_REPOSITORY && \
153+
trivy image --download-db-only || \
154+
(echo "Primary TRIVY_DB_REPOSITORY failed, trying secondary." && \
155+
export TRIVY_DB_REPOSITORY=$SECONDARY_TRIVY_DB_REPOSITORY && \
156+
trivy image --download-db-only) || \
157+
(echo "Both TRIVY_DB_REPOSITORY sources failed." && exit 1)
158+
159+
# Download Trivy Java database with a fallback mechanism
160+
RUN export TRIVY_JAVA_DB_REPOSITORY=$PRIMARY_TRIVY_JAVA_DB_REPOSITORY && \
161+
trivy fs --scanners vuln --vuln-type library --download-java-db-only || \
162+
(echo "Primary TRIVY_JAVA_DB_REPOSITORY failed, trying secondary." && \
163+
export TRIVY_JAVA_DB_REPOSITORY=$SECONDARY_TRIVY_JAVA_DB_REPOSITORY && \
164+
trivy fs --scanners vuln --vuln-type library --download-java-db-only) || \
165+
(echo "Both TRIVY_JAVA_DB_REPOSITORY sources failed." && exit 1)
166+
144167
RUN ["/bin/bash", "-c", "trivy rootfs --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --skip-files \"/usr/local/bin/trivy\" /"]
145168
RUN ["/bin/bash", "-c", "trivy rootfs --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM /usr/lib"]
146169
RUN ["/bin/bash", "-c", "trivy rootfs --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --skip-files \"/usr/local/bin/trivy\" / > /dev/null 2>&1 && trivy rootfs --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM /usr/lib > /dev/null 2>&1"]

source/plugins/go/src/ingestion_token_utils.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -444,17 +444,35 @@ func getAgentConfiguration(imdsAccessToken string) (configurationId string, chan
444444
return configurationId, channelId, err
445445
}
446446

447-
if len(agentConfiguration.Configurations[0].Content.Channels) == 0 {
448-
message := "getAgentConfiguration: Received empty agentConfiguration.Configurations[0].Content.Channels"
449-
Log(message)
450-
SendException(message)
451-
return configurationId, channelId, err
447+
for _, config := range agentConfiguration.Configurations {
448+
if len(config.Content.Channels) == 0 {
449+
// this is expected because AMCS will return agent config based on OS Type. For example, syslog is not supported on windows hence config will not have channels and data sources
450+
message := "getAgentConfiguration: Received empty config.Content.Channels"
451+
Log(message)
452+
continue
453+
}
454+
455+
configurationId = config.Configurationid
456+
for _, channel := range config.Content.Channels {
457+
if channel.ID != "" {
458+
channelId = channel.ID
459+
break
460+
}
461+
}
462+
463+
if !ContainerLogV2ConfigMap && len(config.Content.Extensionconfigurations.Containerinsights) > 0 {
464+
for _, ciExtensionInstance := range config.Content.Extensionconfigurations.Containerinsights {
465+
ContainerLogSchemaV2 = ciExtensionInstance.Extensionsettings.DataCollectionSettings.EnableContainerLogV2
466+
}
467+
}
468+
break
452469
}
453470

454-
configurationId = agentConfiguration.Configurations[0].Configurationid
455-
channelId = agentConfiguration.Configurations[0].Content.Channels[0].ID
456-
if !ContainerLogV2ConfigMap && len(agentConfiguration.Configurations[0].Content.Extensionconfigurations.Containerinsights) > 0 {
457-
ContainerLogSchemaV2 = agentConfiguration.Configurations[0].Content.Extensionconfigurations.Containerinsights[0].Extensionsettings.DataCollectionSettings.EnableContainerLogV2
471+
if configurationId == "" || channelId == "" {
472+
message := "getAgentConfiguration: Failed to obtain configurationId or channelId"
473+
Log(message)
474+
SendException(message)
475+
return configurationId, channelId, errors.New(message)
458476
}
459477
Log("getAgentConfiguration: obtained configurationId: %s, channelId: %s", configurationId, channelId)
460478
Log("Info getAgentConfiguration: end")

0 commit comments

Comments
 (0)