Skip to content

Commit 44f74f6

Browse files
patrykw-splunkPatryk Wasielewski
andauthored
fix isAppAlreadyInstalled error handling (#1608)
* fix isAppAlreadyInstalled error handling --------- Co-authored-by: Patryk Wasielewski <pwasiele@splunk.com>
1 parent d7abbf5 commit 44f74f6

File tree

2 files changed

+227
-31
lines changed

2 files changed

+227
-31
lines changed

pkg/splunk/enterprise/afwscheduler.go

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -766,8 +766,15 @@ func installApp(rctx context.Context, localCtx *localScopePlaybookContext, cr sp
766766
streamOptions := splutil.NewStreamOptionsObject(command)
767767

768768
stdOut, stdErr, err := localCtx.podExecClient.RunPodExecCommand(rctx, streamOptions, []string{"/bin/sh"})
769-
// if the app was already installed previously, then just mark it for install complete
770-
if stdErr != "" || err != nil {
769+
770+
// TODO(patrykw-splunk): remove this once we have confirm that we are not using stderr for error detection at all
771+
// Log stderr content for debugging but don't use it for error detection
772+
if stdErr != "" {
773+
scopedLog.Info("App install command stderr output (informational only)", "stderr", stdErr)
774+
}
775+
776+
// Check only the actual command execution error, not stderr content
777+
if err != nil {
771778
phaseInfo.FailCount++
772779
scopedLog.Error(err, "local scoped app package install failed", "stdout", stdOut, "stderr", stdErr, "app pkg path", appPkgPathOnPod, "failCount", phaseInfo.FailCount)
773780
return fmt.Errorf("local scoped app package install failed. stdOut: %s, stdErr: %s, app pkg path: %s, failCount: %d", stdOut, stdErr, appPkgPathOnPod, phaseInfo.FailCount)
@@ -785,28 +792,51 @@ func isAppAlreadyInstalled(ctx context.Context, cr splcommon.MetaObject, podExec
785792

786793
scopedLog.Info("check app's installation state")
787794

788-
command := fmt.Sprintf("/opt/splunk/bin/splunk list app %s -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED; echo -n $?", appTopFolder)
795+
command := fmt.Sprintf("/opt/splunk/bin/splunk list app %s -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED", appTopFolder)
789796

790797
streamOptions := splutil.NewStreamOptionsObject(command)
791798

792799
stdOut, stdErr, err := podExecClient.RunPodExecCommand(ctx, streamOptions, []string{"/bin/sh"})
793800

801+
// Handle specific stderr cases first
794802
if strings.Contains(stdErr, "Could not find object") {
795803
// when app is not installed you will see something like on StdErr:
796804
// "Could not find object id=<app_name>"
797805
// which mean app is not installed (no need to check enabled at this time)
798806
return false, nil
799807
}
800808

801-
if stdErr != "" || err != nil {
802-
return false, fmt.Errorf("could not get installed app status stdOut: %s, stdErr: %s, command: %s", stdOut, stdErr, command)
809+
// Log any other stderr content for debugging but don't use it for error detection
810+
if stdErr != "" {
811+
scopedLog.Info("Command stderr output (informational only)", "stderr", stdErr)
803812
}
804813

805-
appInstallCheck, _ := strconv.Atoi(stdOut)
814+
// Now check the actual command result
815+
if err != nil {
816+
// The command pipeline ends with 'grep ENABLED', so exit codes follow grep semantics:
817+
// For grep: exit code 1 = pattern not found, exit code 2+ = actual error
818+
errMsg := err.Error()
819+
820+
// Check for grep exit code 1 (pattern not found)
821+
if strings.Contains(errMsg, "exit status 1") || strings.Contains(errMsg, "command terminated with exit code 1") {
822+
// grep exit code 1 means "ENABLED" pattern not found - app exists but is not enabled
823+
scopedLog.Info("App not enabled - grep pattern not found", "stdout", stdOut, "stderr", stdErr)
824+
return false, nil
825+
}
826+
827+
// Any other exit code indicates a real error (splunk command failed, etc.)
828+
return false, fmt.Errorf("could not get installed app status stdOut: %s, stdErr: %s, error: %v, command: %s", stdOut, stdErr, err, command)
829+
}
806830

807-
scopedLog.Info("Apps installation state", stdOut, stdOut)
831+
// If we reach here, grep found "ENABLED" (exit code 0)
832+
// stdOut should contain the app status line with "ENABLED"
833+
if stdOut == "" {
834+
// This shouldn't happen if grep succeeded, but let's be safe
835+
return false, fmt.Errorf("command succeeded but no output received, command: %s", command)
836+
}
808837

809-
return appInstallCheck == 0, nil
838+
scopedLog.Info("App installation state check successful - app is enabled", "appStatus", strings.TrimSpace(stdOut))
839+
return true, nil
810840
}
811841

812842
// get the name of top folder from the package.
@@ -1320,6 +1350,15 @@ installPhase:
13201350
phaseInfo := getPhaseInfoByPhaseType(ctx, installWorker, enterpriseApi.PhaseInstall)
13211351
if isPhaseMaxRetriesReached(ctx, phaseInfo, installWorker.afwConfig) {
13221352
phaseInfo.Status = enterpriseApi.AppPkgInstallError
1353+
1354+
// For fanout CRs, also update the main PhaseInfo to reflect the failure
1355+
if isFanOutApplicableToCR(installWorker.cr) {
1356+
scopedLog.Info("Max retries reached for fanout CR - updating main phase info", "app", installWorker.appDeployInfo.AppName, "failCount", phaseInfo.FailCount)
1357+
installWorker.appDeployInfo.PhaseInfo.Phase = enterpriseApi.PhaseInstall
1358+
installWorker.appDeployInfo.PhaseInfo.Status = enterpriseApi.AppPkgInstallError
1359+
installWorker.appDeployInfo.DeployStatus = enterpriseApi.DeployStatusError
1360+
}
1361+
13231362
ppln.deleteWorkerFromPipelinePhase(ctx, phaseInfo.Phase, installWorker)
13241363
} else if isPhaseStatusComplete(phaseInfo) {
13251364
ppln.deleteWorkerFromPipelinePhase(ctx, phaseInfo.Phase, installWorker)

pkg/splunk/enterprise/afwscheduler_test.go

Lines changed: 180 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,18 +3061,23 @@ func TestRunLocalScopedPlaybook(t *testing.T) {
30613061
t.Errorf("Failed to detect that steps to get installed app failed")
30623062
}
30633063

3064-
// Test3: get installed app name passes but getting installed app name failed
3064+
// Test3: get installed app name passes but isAppAlreadyInstalled fails with real error (not "Could not find object")
30653065
mockPodExecReturnContexts[1].StdErr = ""
3066+
mockPodExecReturnContexts[2].StdErr = "Some other real error message" // Real error, not "Could not find object"
3067+
mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 2") // Real error, not grep exit code 1
30663068
localInstallCtxt.sem <- struct{}{}
30673069
waiter.Add(1)
30683070
err = localInstallCtxt.runPlaybook(ctx)
30693071
if err == nil {
3070-
t.Errorf("Failed to detect not able to get installed app name: err")
3072+
t.Errorf("Failed to detect isAppAlreadyInstalled error")
30713073
}
30723074

3073-
// Test4: get installed app command passes but installing app fails
3074-
mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled
3075-
mockPodExecReturnContexts[2].StdErr = "" //no error thrown
3075+
// Test4: isAppAlreadyInstalled returns app not enabled (grep exit code 1), then install fails
3076+
mockPodExecReturnContexts[2].StdOut = "" // No stdout means grep didn't find ENABLED
3077+
mockPodExecReturnContexts[2].StdErr = "" // No stderr
3078+
mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 1") // grep exit code 1 = pattern not found
3079+
mockPodExecReturnContexts[3].StdErr = "real installation error" // This is just logged now
3080+
mockPodExecReturnContexts[3].Err = fmt.Errorf("install command failed") // This causes the actual failure
30763081

30773082
localInstallCtxt.sem <- struct{}{}
30783083
waiter.Add(1)
@@ -3081,28 +3086,35 @@ func TestRunLocalScopedPlaybook(t *testing.T) {
30813086
t.Errorf("Expected app install failed")
30823087
}
30833088

3084-
mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled
3085-
mockPodExecReturnContexts[2].StdErr = "Could not find object"
3089+
// Test5: App not found scenario (Could not find object) - should proceed to install but install fails
3090+
mockPodExecReturnContexts[2].StdOut = ""
3091+
mockPodExecReturnContexts[2].StdErr = "Could not find object id=app1"
3092+
mockPodExecReturnContexts[2].Err = nil // This should return false, nil (app not installed)
3093+
// Keep the installation error from previous test to make install fail
3094+
mockPodExecReturnContexts[3].StdErr = "real installation error" // Install should fail
3095+
mockPodExecReturnContexts[3].Err = fmt.Errorf("install failed")
30863096

30873097
localInstallCtxt.sem <- struct{}{}
30883098
waiter.Add(1)
30893099
err = localInstallCtxt.runPlaybook(ctx)
30903100
if err == nil {
3091-
t.Errorf("Expected app install failed")
3101+
t.Errorf("Expected app install failed due to installation error")
30923102
}
30933103

3094-
// Test5: install app should be successful
3095-
3096-
mockPodExecReturnContexts[3].StdErr = "" //no error for app install
3104+
// Test6: Install succeeds with stderr content (should be ignored), but cleanup fails
3105+
mockPodExecReturnContexts[3].StdErr = "Some informational message in stderr" // Stderr content should be ignored
3106+
mockPodExecReturnContexts[3].Err = nil // No actual error for install
3107+
// Keep cleanup failure from previous test setup to make overall test fail
3108+
// mockPodExecReturnContexts[4] still has error from earlier
30973109

30983110
localInstallCtxt.sem <- struct{}{}
30993111
waiter.Add(1)
31003112
err = localInstallCtxt.runPlaybook(ctx)
31013113
if err == nil {
3102-
t.Errorf("Expected app install succeeded but app arhive deletion failed")
3114+
t.Errorf("Expected app install succeeded but app archive deletion failed")
31033115
}
31043116

3105-
// Test6: successful scenario where everything succeeds
3117+
// Test7: successful scenario where everything succeeds
31063118
mockPodExecReturnContexts[4].StdErr = ""
31073119
localInstallCtxt.sem <- struct{}{}
31083120
waiter.Add(1)
@@ -3301,19 +3313,24 @@ func TestPremiumAppScopedPlaybook(t *testing.T) {
33013313
t.Errorf("Failed to detect that steps to get installed app failed")
33023314
}
33033315

3304-
// Test3: get installed app name passes but getting installed app name failed
3316+
// Test3: get installed app name passes but isAppAlreadyInstalled fails with real error (not "Could not find object")
33053317
mockPodExecReturnContexts[1].StdErr = ""
3318+
mockPodExecReturnContexts[2].StdErr = "Some other real error message" // Real error, not "Could not find object"
3319+
mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 2") // Real error, not grep exit code 1
33063320
localInstallCtxt.sem <- struct{}{}
33073321
waiter.Add(1)
33083322
err = pCtx.runPlaybook(ctx)
33093323
if err == nil {
3310-
t.Errorf("Failed to detect not able to get installed app name: err")
3324+
t.Errorf("Failed to detect isAppAlreadyInstalled error")
33113325
}
33123326

3313-
// Test4: get installed app command passes, it returns app is not enabled
3314-
// so app install will run and it should fail
3315-
mockPodExecReturnContexts[2].StdOut = "1" //app is not yet installed or it is not enabled
3316-
mockPodExecReturnContexts[2].StdErr = "" //no error thrown
3327+
// Test4: isAppAlreadyInstalled returns app is not enabled (grep exit code 1)
3328+
// so app install will run and it should fail with real error
3329+
mockPodExecReturnContexts[2].StdOut = "" // No stdout means grep didn't find ENABLED
3330+
mockPodExecReturnContexts[2].StdErr = "" // No stderr
3331+
mockPodExecReturnContexts[2].Err = fmt.Errorf("exit status 1") // grep exit code 1 = pattern not found
3332+
mockPodExecReturnContexts[3].StdErr = "real installation error" // This is just logged now
3333+
mockPodExecReturnContexts[3].Err = fmt.Errorf("install command failed") // This causes the actual failure
33173334

33183335
localInstallCtxt.sem <- struct{}{}
33193336
waiter.Add(1)
@@ -3322,9 +3339,9 @@ func TestPremiumAppScopedPlaybook(t *testing.T) {
33223339
t.Errorf("Expected app install failed")
33233340
}
33243341

3325-
// Test5: install app should be successful but es post install fails
3326-
3327-
mockPodExecReturnContexts[3].StdErr = "" //no error for app install
3342+
// Test5: Install succeeds with stderr content (should be ignored), but post install fails
3343+
mockPodExecReturnContexts[3].StdErr = "Some informational message in stderr" // Stderr content should be ignored
3344+
mockPodExecReturnContexts[3].Err = nil // No actual error for install
33283345

33293346
localInstallCtxt.sem <- struct{}{}
33303347
waiter.Add(1)
@@ -3343,7 +3360,24 @@ func TestPremiumAppScopedPlaybook(t *testing.T) {
33433360
t.Errorf("Expected es post install succeeded but app arhive deletion failed")
33443361
}
33453362

3346-
// Test7: successful scenario where everything succeeds
3363+
// Test7: App already installed with stderr content - should skip installation
3364+
// Reset all mock contexts for this test
3365+
mockPodExecReturnContexts[0].Err = nil // File exists check passes
3366+
mockPodExecReturnContexts[1].StdErr = "" // Get app name passes
3367+
mockPodExecReturnContexts[1].StdOut = "app1" // App name is found
3368+
mockPodExecReturnContexts[2].StdOut = "app1 CONFIGURED ENABLED VISIBLE" // App is already enabled
3369+
mockPodExecReturnContexts[2].StdErr = "Some informational message in stderr"
3370+
mockPodExecReturnContexts[2].Err = nil // No error - app is found and enabled
3371+
// Install step should be skipped, but cleanup should still work
3372+
mockPodExecReturnContexts[5].StdErr = "" // Cleanup should succeed
3373+
localInstallCtxt.sem <- struct{}{}
3374+
waiter.Add(1)
3375+
err = pCtx.runPlaybook(ctx)
3376+
if err != nil {
3377+
t.Errorf("runPlayBook should not have returned error when app is already installed with stderr content. err=%s", err.Error())
3378+
}
3379+
3380+
// Test8: successful scenario where everything succeeds
33473381
mockPodExecReturnContexts[5].StdErr = ""
33483382
localInstallCtxt.sem <- struct{}{}
33493383
waiter.Add(1)
@@ -4462,3 +4496,126 @@ func TestAddTelAppCManager(t *testing.T) {
44624496
// Negative testing
44634497
addTelApp(ctx, mockPodExecClient, 2, &crNew)
44644498
}
4499+
4500+
func TestIsAppAlreadyInstalled(t *testing.T) {
4501+
ctx := context.TODO()
4502+
4503+
tests := []struct {
4504+
name string
4505+
stdOut string
4506+
stdErr string
4507+
err error
4508+
expectedResult bool
4509+
expectedError bool
4510+
description string
4511+
}{
4512+
{
4513+
name: "App is enabled - success case",
4514+
stdOut: "myapp CONFIGURED ENABLED VISIBLE",
4515+
stdErr: "",
4516+
err: nil,
4517+
expectedResult: true,
4518+
expectedError: false,
4519+
description: "App is found and enabled",
4520+
},
4521+
{
4522+
name: "App not found - grep exit code 1",
4523+
stdOut: "",
4524+
stdErr: "",
4525+
err: fmt.Errorf("command terminated with exit code 1"),
4526+
expectedResult: false,
4527+
expectedError: false,
4528+
description: "App not enabled - grep pattern not found",
4529+
},
4530+
{
4531+
name: "App not found - Could not find object",
4532+
stdOut: "",
4533+
stdErr: "Could not find object id=myapp",
4534+
err: nil,
4535+
expectedResult: false,
4536+
expectedError: false,
4537+
description: "App not installed at all",
4538+
},
4539+
{
4540+
name: "App enabled with stderr content",
4541+
stdOut: "myapp CONFIGURED ENABLED VISIBLE",
4542+
stdErr: "Some informational message in stderr",
4543+
err: nil,
4544+
expectedResult: true,
4545+
expectedError: false,
4546+
description: "Stderr content should be ignored when app is enabled",
4547+
},
4548+
{
4549+
name: "App not enabled with stderr content",
4550+
stdOut: "",
4551+
stdErr: "Some informational message in stderr",
4552+
err: fmt.Errorf("exit status 1"),
4553+
expectedResult: false,
4554+
expectedError: false,
4555+
description: "Stderr content should be ignored, grep exit code 1 means not enabled",
4556+
},
4557+
{
4558+
name: "Real error - exit code 2",
4559+
stdOut: "",
4560+
stdErr: "Some real error occurred",
4561+
err: fmt.Errorf("exit status 2"),
4562+
expectedResult: false,
4563+
expectedError: true,
4564+
description: "Real error should be returned",
4565+
},
4566+
{
4567+
name: "Command succeeded but no output",
4568+
stdOut: "",
4569+
stdErr: "",
4570+
err: nil,
4571+
expectedResult: false,
4572+
expectedError: true,
4573+
description: "Should error if command succeeds but no output",
4574+
},
4575+
}
4576+
4577+
for _, tt := range tests {
4578+
t.Run(tt.name, func(t *testing.T) {
4579+
// Create a test CR
4580+
cr := &enterpriseApi.Standalone{
4581+
ObjectMeta: metav1.ObjectMeta{
4582+
Name: "test-standalone",
4583+
Namespace: "test",
4584+
},
4585+
}
4586+
4587+
// Create mock pod exec client with CR
4588+
mockPodExecClient := &spltest.MockPodExecClient{Cr: cr}
4589+
mockPodExecClient.SetTargetPodName(ctx, "test-pod")
4590+
4591+
// Set up the mock return context
4592+
mockReturnContext := &spltest.MockPodExecReturnContext{
4593+
StdOut: tt.stdOut,
4594+
StdErr: tt.stdErr,
4595+
Err: tt.err,
4596+
}
4597+
4598+
// Add the mock command and return context - use the exact command pattern
4599+
command := "/opt/splunk/bin/splunk list app testapp -auth admin:`cat /mnt/splunk-secrets/password`| grep ENABLED"
4600+
mockPodExecClient.AddMockPodExecReturnContexts(ctx, []string{command}, mockReturnContext)
4601+
4602+
// Call the function
4603+
result, err := isAppAlreadyInstalled(ctx, cr, mockPodExecClient, "testapp")
4604+
4605+
// Check results
4606+
if tt.expectedError {
4607+
if err == nil {
4608+
t.Errorf("Expected error but got none for test: %s", tt.description)
4609+
}
4610+
} else {
4611+
if err != nil {
4612+
t.Errorf("Unexpected error for test '%s': %v", tt.description, err)
4613+
}
4614+
}
4615+
4616+
if result != tt.expectedResult {
4617+
t.Errorf("Expected result %v but got %v for test: %s", tt.expectedResult, result, tt.description)
4618+
}
4619+
})
4620+
}
4621+
}

0 commit comments

Comments
 (0)