Skip to content

Commit 59a554d

Browse files
feat: run traces command every 5 minutes for 30 minutes (#2876)
Co-authored-by: svcAPLBot <[email protected]>
1 parent d4fab33 commit 59a554d

File tree

7 files changed

+158
-52
lines changed

7 files changed

+158
-52
lines changed

package-lock.json

Lines changed: 32 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/apply.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import { runtimeUpgrade } from '../common/runtime-upgrade'
1414
import { applyAsApps } from './apply-as-apps'
1515
import { applyTeams } from './apply-teams'
1616
import { commit } from './commit'
17-
import { collectTraces } from './traces'
1817

1918
const cmdName = getFilename(__filename)
2019
const dir = '/tmp/otomi/'
@@ -84,12 +83,6 @@ export const apply = async (): Promise<void> => {
8483
await applyAll()
8584
} catch (e) {
8685
d.error(e)
87-
// Collect traces on apply failure
88-
try {
89-
await collectTraces()
90-
} catch (traceError) {
91-
d.error('Failed to collect traces:', traceError)
92-
}
9386
d.info(`Retrying in ${retryOptions.maxTimeout} ms`)
9487
await deletePendingHelmReleases()
9588
throw e

src/cmd/install.ts

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,7 @@ import { cleanupHandler, prepareEnvironment } from 'src/common/cli'
44
import { logLevelString, terminal } from 'src/common/debug'
55
import { env } from 'src/common/envalid'
66
import { deployEssential, hf, HF_DEFAULT_SYNC_ARGS } from 'src/common/hf'
7-
import {
8-
applyServerSide,
9-
deletePendingHelmReleases,
10-
getDeploymentState,
11-
getHelmReleases,
12-
setDeploymentState,
13-
waitForCRD,
14-
} from 'src/common/k8s'
7+
import { applyServerSide, getDeploymentState, getHelmReleases, setDeploymentState, waitForCRD } from 'src/common/k8s'
158
import { getFilename, rootDir } from 'src/common/utils'
169
import { getImageTagFromValues, getPackageVersion, writeValuesToFile } from 'src/common/values'
1710
import { getParsedArgs, HelmArguments, helmOptions, setParsedArgs } from 'src/common/yargs'
@@ -24,7 +17,6 @@ import {
2417
createWelcomeConfigMap,
2518
initialSetupData,
2619
} from './commit'
27-
import { collectTraces } from './traces'
2820

2921
const cmdName = getFilename(__filename)
3022
const dir = '/tmp/otomi/'
@@ -129,13 +121,6 @@ const install = async (): Promise<void> => {
129121
await installAll()
130122
} catch (e) {
131123
d.error(e)
132-
// Collect traces on installation failure
133-
try {
134-
await collectTraces()
135-
await deletePendingHelmReleases()
136-
} catch (traceError) {
137-
d.error('Failed to collect traces:', traceError)
138-
}
139124
throw e
140125
}
141126
return

src/cmd/traces.test.ts

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,13 @@ describe('Collect Traces Command', () => {
386386
jest.clearAllMocks()
387387
})
388388

389+
// Helper function to extract report from timestamped key
390+
const extractReportFromConfigMap = (data: Record<string, string>): any => {
391+
const reportKey = Object.keys(data).find((key) => key.startsWith('report-'))
392+
if (!reportKey) throw new Error('No report key found in ConfigMap data')
393+
return JSON.parse(data[reportKey])
394+
}
395+
389396
it('should detect all types of failed resources and store in ConfigMap', async () => {
390397
// Mock various failing resources
391398
mockCoreApi.listPodForAllNamespaces.mockResolvedValue({
@@ -480,12 +487,15 @@ describe('Collect Traces Command', () => {
480487

481488
await collectTraces()
482489

483-
expect(mockCreateUpdateConfigMap).toHaveBeenCalledWith(mockCoreApi, 'apl-traces-report', 'apl-operator', {
484-
report: expect.any(String),
485-
})
490+
expect(mockCreateUpdateConfigMap).toHaveBeenCalledWith(
491+
mockCoreApi,
492+
'apl-traces-report',
493+
'apl-operator',
494+
expect.objectContaining({}),
495+
)
486496

487-
const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
488-
const reportData = JSON.parse(configMapCall[3].report)
497+
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
498+
const reportData = extractReportFromConfigMap(configMapData)
489499

490500
// Should have all resource types
491501
expect(reportData.failedResources.length).toBeGreaterThan(0)
@@ -516,8 +526,12 @@ describe('Collect Traces Command', () => {
516526

517527
await collectTraces()
518528

519-
// Should not create ConfigMap for healthy cluster
520-
expect(mockCreateUpdateConfigMap).not.toHaveBeenCalled()
529+
// Should always create ConfigMap (even when healthy, for timestamp visibility)
530+
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()
531+
532+
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
533+
const reportData = extractReportFromConfigMap(configMapData)
534+
expect(reportData.failedResources).toEqual([])
521535
})
522536

523537
it('should call createUpdateConfigMap when there are issues', async () => {
@@ -586,8 +600,8 @@ describe('Collect Traces Command', () => {
586600
// Should create ConfigMap with deployment issues
587601
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()
588602

589-
const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
590-
const reportData = JSON.parse(configMapCall[3].report)
603+
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
604+
const reportData = extractReportFromConfigMap(configMapData)
591605

592606
// Should have deployment in failed resources
593607
expect(reportData.failedResources).toEqual(
@@ -638,8 +652,8 @@ describe('Collect Traces Command', () => {
638652

639653
await collectTraces()
640654

641-
const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
642-
const reportData = JSON.parse(configMapCall[3].report)
655+
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
656+
const reportData = extractReportFromConfigMap(configMapData)
643657

644658
// Should not have errors field when all collections succeed
645659
expect(reportData.errors).toBeUndefined()
@@ -659,7 +673,12 @@ describe('Collect Traces Command', () => {
659673

660674
// Should complete without throwing despite all failures
661675
expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled()
662-
// Should not create ConfigMap when no issues found and all failed
663-
expect(mockCreateUpdateConfigMap).not.toHaveBeenCalled()
676+
// Should always create ConfigMap (even when all fail, for timestamp visibility and error reporting)
677+
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()
678+
679+
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
680+
const reportData = extractReportFromConfigMap(configMapData)
681+
expect(reportData.errors).toBeDefined()
682+
expect(reportData.errors.length).toBeGreaterThan(0)
664683
})
665684
})

0 commit comments

Comments
 (0)