Skip to content

Commit 67d7250

Browse files
authored
surfacing the error from provision-wait mode (#7382)
1 parent c91a875 commit 67d7250

File tree

2 files changed

+154
-41
lines changed

2 files changed

+154
-41
lines changed

aks-node-controller/app.go

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package main
33
import (
44
"bytes"
55
"context"
6+
"encoding/json"
67
"errors"
78
"flag"
89
"fmt"
@@ -11,6 +12,7 @@ import (
1112
"os"
1213
"os/exec"
1314
"path/filepath"
15+
"strconv"
1416

1517
"github.com/Azure/agentbaker/aks-node-controller/parser"
1618
"github.com/Azure/agentbaker/aks-node-controller/pkg/nodeconfigutils"
@@ -157,14 +159,6 @@ func (a *App) writeCompleteFileOnError(err error) {
157159
}
158160

159161
func (a *App) ProvisionWait(ctx context.Context, filepaths ProvisionStatusFiles) (string, error) {
160-
if _, err := os.Stat(filepaths.ProvisionCompleteFile); err == nil {
161-
data, err := os.ReadFile(filepaths.ProvisionJSONFile)
162-
if err != nil {
163-
return "", fmt.Errorf("failed to read provision.json: %w. One reason could be that AKSNodeConfig is not properly set", err)
164-
}
165-
return string(data), nil
166-
}
167-
168162
watcher, err := fsnotify.NewWatcher()
169163
if err != nil {
170164
return "", fmt.Errorf("failed to create watcher: %w", err)
@@ -179,15 +173,19 @@ func (a *App) ProvisionWait(ctx context.Context, filepaths ProvisionStatusFiles)
179173
return "", fmt.Errorf("failed to watch directory: %w", err)
180174
}
181175

176+
if _, statErr := os.Stat(filepaths.ProvisionCompleteFile); statErr == nil {
177+
// Fast path: provision.complete already exists when we enter. Avoid watcher overhead.
178+
// We read and evaluate once and return immediately. Only this branch executes in this scenario.
179+
return readAndEvaluateProvision(filepaths.ProvisionJSONFile)
180+
}
181+
182182
for {
183183
select {
184184
case event := <-watcher.Events:
185185
if event.Op&fsnotify.Create == fsnotify.Create && event.Name == filepaths.ProvisionCompleteFile {
186-
data, err := os.ReadFile(filepaths.ProvisionJSONFile)
187-
if err != nil {
188-
return "", fmt.Errorf("failed to read provision.json: %w. One reason could be that AKSNodeConfig is not properly set", err)
189-
}
190-
return string(data), nil
186+
// Event path: provision.complete was created after we started watching. Read and evaluate now.
187+
// This is mutually exclusive with the fast path above; only one of these calls runs per invocation.
188+
return readAndEvaluateProvision(filepaths.ProvisionJSONFile)
191189
}
192190

193191
case err := <-watcher.Errors:
@@ -198,6 +196,45 @@ func (a *App) ProvisionWait(ctx context.Context, filepaths ProvisionStatusFiles)
198196
}
199197
}
200198

199+
// evaluateProvisionStatus inspects the serialized CSEStatus (provision.json contents).
200+
// If ExitCode is non-zero we return an error so that provision-wait exits with a failure code.
201+
// We still surface the full JSON on stdout (handled by caller) for diagnostics.
202+
func evaluateProvisionStatus(data []byte) error {
203+
// provision.json values are emitted as strings by the shell jq invocation.
204+
// We only care about ExitCode + Error + Output (snippet) for failure detection.
205+
type provisionResult struct {
206+
ExitCode string `json:"ExitCode"`
207+
Error string `json:"Error"`
208+
Output string `json:"Output"`
209+
}
210+
var result provisionResult
211+
if err := json.Unmarshal(data, &result); err != nil {
212+
return fmt.Errorf("parse provision.json: %w", err)
213+
}
214+
if result.ExitCode == "" {
215+
return fmt.Errorf("missing ExitCode in provision.json")
216+
}
217+
code, err := strconv.Atoi(result.ExitCode)
218+
if err != nil {
219+
return fmt.Errorf("invalid ExitCode in provision.json: %s", result.ExitCode)
220+
}
221+
if code != 0 {
222+
outSnippet := result.Output
223+
return fmt.Errorf("provision failed: exitCode=%d error=%s output=%q", code, result.Error, outSnippet)
224+
}
225+
return nil
226+
}
227+
228+
// readAndEvaluateProvision reads provision.json content from the given path and evaluates its status.
229+
// It returns the raw JSON string plus any error derived from its parsed ExitCode.
230+
func readAndEvaluateProvision(path string) (string, error) {
231+
data, err := os.ReadFile(path)
232+
if err != nil {
233+
return "", fmt.Errorf("failed to read provision.json: %w. One reason could be that AKSNodeConfig is not properly set", err)
234+
}
235+
return string(data), evaluateProvisionStatus(data)
236+
}
237+
201238
var _ ExitCoder = &exec.ExitError{}
202239

203240
type ExitCoder interface {

aks-node-controller/app_test.go

Lines changed: 104 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,7 @@ func TestApp_Provision(t *testing.T) {
120120
name: "command runner error",
121121
flags: ProvisionFlags{ProvisionConfig: "parser/testdata/test_aksnodeconfig.json"},
122122
setupMocks: func(mc *MockCmdRunner) {
123-
mc.RunFunc = func(cmd *exec.Cmd) error {
124-
return errors.New("command runner error")
125-
}
123+
mc.RunFunc = func(cmd *exec.Cmd) error { return errors.New("command runner error") }
126124
},
127125
wantErr: true,
128126
},
@@ -134,11 +132,7 @@ func TestApp_Provision(t *testing.T) {
134132
if tt.setupMocks != nil {
135133
tt.setupMocks(mc)
136134
}
137-
138-
app := &App{
139-
cmdRunner: mc.Run,
140-
}
141-
135+
app := &App{cmdRunner: mc.Run}
142136
err := app.Provision(context.Background(), tt.flags)
143137
if tt.wantErr {
144138
assert.Error(t, err)
@@ -150,9 +144,7 @@ func TestApp_Provision(t *testing.T) {
150144
}
151145

152146
func TestApp_Provision_DryRun(t *testing.T) {
153-
app := &App{
154-
cmdRunner: cmdRunner,
155-
}
147+
app := &App{cmdRunner: cmdRunner}
156148
result := app.Run(context.Background(), []string{"aks-node-controller", "provision", "--provision-config=parser/testdata/test_aksnodeconfig.json", "--dry-run"})
157149
assert.Equal(t, 0, result)
158150
if reflect.ValueOf(app.cmdRunner).Pointer() != reflect.ValueOf(cmdRunnerDryRun).Pointer() {
@@ -161,31 +153,39 @@ func TestApp_Provision_DryRun(t *testing.T) {
161153
}
162154

163155
func TestApp_ProvisionWait(t *testing.T) {
164-
testData := "hello world"
165-
156+
testData := `{"ExitCode": "0", "Output": "hello world", "Error": ""}`
166157
tests := []struct {
167158
name string
168159
wantsErr bool
169160
errString string
170-
setup func(provisionStatusFiles ProvisionStatusFiles)
161+
setup func(ProvisionStatusFiles)
171162
}{
172163
{
173-
name: "provision already complete",
174-
wantsErr: false,
164+
name: "event path (file created after call)",
175165
setup: func(provisionStatusFiles ProvisionStatusFiles) {
176-
// Run the test in a goroutine to simulate file creation after some delay
166+
// This goroutine simulates an external process writing the files after a short delay.
167+
// It's running asynchronously from the main test flow.
177168
go func() {
178-
time.Sleep(200 * time.Millisecond) // Simulate file creation delay
169+
time.Sleep(150 * time.Millisecond)
179170
_ = os.WriteFile(provisionStatusFiles.ProvisionJSONFile, []byte(testData), 0644)
180171
_, _ = os.Create(provisionStatusFiles.ProvisionCompleteFile)
181172
}()
182173
},
183174
},
184175
{
185-
name: "wait for provision completion",
186-
wantsErr: false,
176+
name: "fast path (file exists before call)",
187177
setup: func(provisionStatusFiles ProvisionStatusFiles) {
188178
_ = os.WriteFile(provisionStatusFiles.ProvisionJSONFile, []byte(testData), 0644)
179+
_, _ = os.Create(provisionStatusFiles.ProvisionCompleteFile) // pre-create to trigger immediate return
180+
},
181+
},
182+
{
183+
name: "provision completion with failure ExitCode",
184+
wantsErr: true,
185+
errString: "provision failed",
186+
setup: func(provisionStatusFiles ProvisionStatusFiles) {
187+
failJSON := `{"ExitCode": "7", "Error": "boom", "Output": "trace"}`
188+
_ = os.WriteFile(provisionStatusFiles.ProvisionJSONFile, []byte(failJSON), 0644)
189189
_, _ = os.Create(provisionStatusFiles.ProvisionCompleteFile)
190190
},
191191
},
@@ -199,25 +199,21 @@ func TestApp_ProvisionWait(t *testing.T) {
199199
for _, tt := range tests {
200200
t.Run(tt.name, func(t *testing.T) {
201201
mc := &MockCmdRunner{}
202-
// Setup a temporary directory
203202
tempDir, err := os.MkdirTemp("", "provisiontest")
204203
assert.NoError(t, err)
205204
tempFile := filepath.Join(tempDir, "testfile.txt")
206205
completeFile := filepath.Join(tempDir, "provision.complete")
207206
defer os.RemoveAll(tempDir)
208207

209-
provisionStatusFiles := ProvisionStatusFiles{ProvisionJSONFile: tempFile, ProvisionCompleteFile: completeFile}
208+
p := ProvisionStatusFiles{ProvisionJSONFile: tempFile, ProvisionCompleteFile: completeFile}
210209
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
211210
defer cancel()
212-
213-
app := &App{
214-
cmdRunner: mc.Run,
215-
}
211+
app := &App{cmdRunner: mc.Run}
216212
if tt.setup != nil {
217-
tt.setup(provisionStatusFiles)
213+
tt.setup(p)
218214
}
219215

220-
data, err := app.ProvisionWait(ctx, ProvisionStatusFiles{ProvisionJSONFile: tempFile, ProvisionCompleteFile: completeFile})
216+
data, err := app.ProvisionWait(ctx, p)
221217
if tt.wantsErr {
222218
assert.Error(t, err)
223219
assert.Contains(t, err.Error(), tt.errString)
@@ -273,3 +269,83 @@ func TestApp_Run_Integration(t *testing.T) {
273269
assert.Equal(t, 1, exitCode)
274270
})
275271
}
272+
273+
func Test_readAndEvaluateProvision(t *testing.T) {
274+
type testCase struct {
275+
name string
276+
fileContent string // raw content to place in file (empty => file absent)
277+
createFile bool // whether to create the file
278+
expectErrSub string // substring that should appear in error; empty means success expected
279+
expectContains string // substring that must appear in successful content
280+
}
281+
282+
tests := []testCase{
283+
{
284+
name: "valid provision file",
285+
createFile: true,
286+
fileContent: `{"ExitCode":"0","Output":"ok","Error":"","ExecDuration":"1"}`,
287+
expectContains: `"ExitCode":"0"`,
288+
},
289+
{
290+
name: "missing provision file",
291+
createFile: false,
292+
expectErrSub: "no such file",
293+
},
294+
{
295+
name: "invalid provision file (bad JSON)",
296+
createFile: true,
297+
fileContent: `not-json`,
298+
expectErrSub: "invalid character",
299+
},
300+
{
301+
name: "non-zero ExitCode returns error",
302+
createFile: true,
303+
fileContent: `{"ExitCode":"7","Output":"boom","Error":"bad"}`,
304+
expectErrSub: "provision failed",
305+
},
306+
{
307+
name: "invalid ExitCode returns error",
308+
createFile: true,
309+
fileContent: `{"ExitCode":"unknown","Output":"boom","Error":"bad"}`,
310+
expectErrSub: "invalid ExitCode",
311+
},
312+
{
313+
name: "missing ExitCode returns error",
314+
createFile: true,
315+
fileContent: `{"Output":"boom","Error":"bad"}`,
316+
expectErrSub: "missing ExitCode",
317+
},
318+
}
319+
320+
writeTemp := func(t *testing.T, content string) string {
321+
t.Helper()
322+
f, err := os.CreateTemp(t.TempDir(), "provision_*.json")
323+
assert.NoError(t, err)
324+
_, errWS := f.WriteString(content)
325+
assert.NoError(t, errWS)
326+
f.Close()
327+
return f.Name()
328+
}
329+
330+
for _, tc := range tests {
331+
tc := tc
332+
t.Run(tc.name, func(t *testing.T) {
333+
p := filepath.Join(t.TempDir(), "does_not_exist.json")
334+
if tc.createFile {
335+
p = writeTemp(t, tc.fileContent)
336+
}
337+
got, err := readAndEvaluateProvision(p)
338+
if tc.expectErrSub != "" { // expected error
339+
assert.Error(t, err, "expected an error")
340+
if err != nil { // avoid panic if err is nil
341+
assert.Contains(t, err.Error(), tc.expectErrSub, "error should contain substring")
342+
}
343+
} else { // success
344+
assert.NoError(t, err, "unexpected error")
345+
if tc.expectContains != "" {
346+
assert.Contains(t, got, tc.expectContains, "content should contain substring")
347+
}
348+
}
349+
})
350+
}
351+
}

0 commit comments

Comments
 (0)