Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c1b79d3
feat(traces): implement periodic trace collection and timestamp repor…
CasLubbers Jan 27, 2026
9a2297e
refactor(traces): remove trace collection on installation and apply f…
CasLubbers Jan 27, 2026
2f9b239
feat(traces): implement trace collection loop with dynamic timing bas…
CasLubbers Jan 27, 2026
a32f5bd
fix(traces): update collection interval to 5 minutes for improved tra…
CasLubbers Jan 27, 2026
9680a23
Merge branch 'main' into APL-1444
svcAPLBot Jan 27, 2026
f0924ab
Merge branch 'main' into APL-1444
svcAPLBot Jan 27, 2026
e4e8a09
Merge branch 'main' into APL-1444
svcAPLBot Jan 28, 2026
fbd7e7d
Merge branch 'main' into APL-1444
svcAPLBot Jan 28, 2026
20a874c
Merge branch 'main' into APL-1444
svcAPLBot Jan 28, 2026
725b133
fix: use seconds instead of ms
CasLubbers Jan 28, 2026
e6bd552
Merge branch 'main' into APL-1444
svcAPLBot Jan 28, 2026
807f3ae
Merge branch 'main' into APL-1444
svcAPLBot Jan 28, 2026
bc528b8
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
d46a238
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
d22eb89
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
f064022
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
a8d2b6f
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
7c8d355
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
1314b31
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
11b8faf
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
3a8888e
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
c372340
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
43039b0
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
64d61f4
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
ae7a51e
Merge branch 'main' into APL-1444
svcAPLBot Jan 29, 2026
970300d
Merge branch 'main' into APL-1444
svcAPLBot Jan 30, 2026
dbb2288
Merge branch 'main' into APL-1444
svcAPLBot Jan 30, 2026
6047dbf
fix: review comments
CasLubbers Jan 30, 2026
d0716d9
Merge branch 'main' into APL-1444
svcAPLBot Jan 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 0 additions & 7 deletions src/cmd/apply.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import { runtimeUpgrade } from '../common/runtime-upgrade'
import { applyAsApps } from './apply-as-apps'
import { applyTeams } from './apply-teams'
import { commit } from './commit'
import { collectTraces } from './traces'

const cmdName = getFilename(__filename)
const dir = '/tmp/otomi/'
Expand Down Expand Up @@ -84,12 +83,6 @@ export const apply = async (): Promise<void> => {
await applyAll()
} catch (e) {
d.error(e)
// Collect traces on apply failure
try {
await collectTraces()
} catch (traceError) {
d.error('Failed to collect traces:', traceError)
}
d.info(`Retrying in ${retryOptions.maxTimeout} ms`)
await deletePendingHelmReleases()
throw e
Expand Down
17 changes: 1 addition & 16 deletions src/cmd/install.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,7 @@ import { cleanupHandler, prepareEnvironment } from 'src/common/cli'
import { logLevelString, terminal } from 'src/common/debug'
import { env } from 'src/common/envalid'
import { deployEssential, hf, HF_DEFAULT_SYNC_ARGS } from 'src/common/hf'
import {
applyServerSide,
deletePendingHelmReleases,
getDeploymentState,
getHelmReleases,
setDeploymentState,
waitForCRD,
} from 'src/common/k8s'
import { applyServerSide, getDeploymentState, getHelmReleases, setDeploymentState, waitForCRD } from 'src/common/k8s'
import { getFilename, rootDir } from 'src/common/utils'
import { getImageTagFromValues, getPackageVersion, writeValuesToFile } from 'src/common/values'
import { getParsedArgs, HelmArguments, helmOptions, setParsedArgs } from 'src/common/yargs'
Expand All @@ -24,7 +17,6 @@ import {
createWelcomeConfigMap,
initialSetupData,
} from './commit'
import { collectTraces } from './traces'

const cmdName = getFilename(__filename)
const dir = '/tmp/otomi/'
Expand Down Expand Up @@ -129,13 +121,6 @@ const install = async (): Promise<void> => {
await installAll()
} catch (e) {
d.error(e)
// Collect traces on installation failure
try {
await collectTraces()
await deletePendingHelmReleases()
} catch (traceError) {
d.error('Failed to collect traces:', traceError)
}
throw e
}
return
Expand Down
45 changes: 32 additions & 13 deletions src/cmd/traces.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,13 @@ describe('Collect Traces Command', () => {
jest.clearAllMocks()
})

// Helper function to extract report from timestamped key
const extractReportFromConfigMap = (data: Record<string, string>): any => {
const reportKey = Object.keys(data).find((key) => key.startsWith('report-'))
if (!reportKey) throw new Error('No report key found in ConfigMap data')
return JSON.parse(data[reportKey])
}

it('should detect all types of failed resources and store in ConfigMap', async () => {
// Mock various failing resources
mockCoreApi.listPodForAllNamespaces.mockResolvedValue({
Expand Down Expand Up @@ -480,12 +487,15 @@ describe('Collect Traces Command', () => {

await collectTraces()

expect(mockCreateUpdateConfigMap).toHaveBeenCalledWith(mockCoreApi, 'apl-traces-report', 'apl-operator', {
report: expect.any(String),
})
expect(mockCreateUpdateConfigMap).toHaveBeenCalledWith(
mockCoreApi,
'apl-traces-report',
'apl-operator',
expect.objectContaining({}),
)

const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = JSON.parse(configMapCall[3].report)
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = extractReportFromConfigMap(configMapData)

// Should have all resource types
expect(reportData.failedResources.length).toBeGreaterThan(0)
Expand Down Expand Up @@ -516,8 +526,12 @@ describe('Collect Traces Command', () => {

await collectTraces()

// Should not create ConfigMap for healthy cluster
expect(mockCreateUpdateConfigMap).not.toHaveBeenCalled()
// Should always create ConfigMap (even when healthy, for timestamp visibility)
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()

const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = extractReportFromConfigMap(configMapData)
expect(reportData.failedResources).toEqual([])
})

it('should call createUpdateConfigMap when there are issues', async () => {
Expand Down Expand Up @@ -586,8 +600,8 @@ describe('Collect Traces Command', () => {
// Should create ConfigMap with deployment issues
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()

const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = JSON.parse(configMapCall[3].report)
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = extractReportFromConfigMap(configMapData)

// Should have deployment in failed resources
expect(reportData.failedResources).toEqual(
Expand Down Expand Up @@ -638,8 +652,8 @@ describe('Collect Traces Command', () => {

await collectTraces()

const configMapCall = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = JSON.parse(configMapCall[3].report)
const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = extractReportFromConfigMap(configMapData)

// Should not have errors field when all collections succeed
expect(reportData.errors).toBeUndefined()
Expand All @@ -659,7 +673,12 @@ describe('Collect Traces Command', () => {

// Should complete without throwing despite all failures
expect(mockCoreApi.listPodForAllNamespaces).toHaveBeenCalled()
// Should not create ConfigMap when no issues found and all failed
expect(mockCreateUpdateConfigMap).not.toHaveBeenCalled()
// Should always create ConfigMap (even when all fail, for timestamp visibility and error reporting)
expect(mockCreateUpdateConfigMap).toHaveBeenCalled()

const [, , , configMapData] = mockCreateUpdateConfigMap.mock.calls[0]
const reportData = extractReportFromConfigMap(configMapData)
expect(reportData.errors).toBeDefined()
expect(reportData.errors.length).toBeGreaterThan(0)
})
})
Loading
Loading