Skip to content

Commit 303429c

Browse files
roumingeriknordmark
authored andcommitted
pillar/qmp: do several attempts to retrieve valid QEMU status
QEMU status is very crucial to EVE and any error or unexpected status leads to QEMU process is being stopped. There is an issue in the 3rd QMP library: digitalocean/go-qemu#210 And in order to be on a safe side and avoid these kind of problems in the future repeat status qeury several times. Signed-off-by: Roman Penyaev <[email protected]>
1 parent 36de260 commit 303429c

File tree

1 file changed

+53
-10
lines changed

1 file changed

+53
-10
lines changed

pkg/pillar/hypervisor/qmp.go

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,20 @@ func QmpExecDeviceAdd(socket, id string, busnum, devnum uint16) error {
8383
return err
8484
}
8585

86+
// There is errors.Join(), but stupid Yetus has old golang
87+
// and complains with "Join not declared by package errors".
88+
// Use our own.
89+
func joinErrors(err1, err2 error) error {
90+
if err1 == nil {
91+
return err2
92+
}
93+
if err2 == nil {
94+
return err1
95+
}
96+
97+
return fmt.Errorf("%v; %v", err1, err2)
98+
}
99+
86100
func getQemuStatus(socket string) (types.SwState, error) {
87101
// lets parse the status according to
88102
// https://github.com/qemu/qemu/blob/master/qapi/run-state.json#L8
@@ -102,7 +116,21 @@ func getQemuStatus(socket string) (types.SwState, error) {
102116
"preconfig": types.PAUSED,
103117
}
104118

105-
if raw, err := execRawCmd(socket, `{ "execute": "query-status" }`); err == nil {
119+
// We do several retries, because correct QEMU status is very crucial to EVE
120+
// and if for some reason (https://github.com/digitalocean/go-qemu/pull/210)
121+
// the status is unexpected, EVE stops QEMU and game over.
122+
var errs error
123+
state := types.UNKNOWN
124+
for attempt := 1; attempt <= 3; attempt++ {
125+
raw, err := execRawCmd(socket, `{ "execute": "query-status" }`)
126+
if err != nil {
127+
err = fmt.Errorf("[attempt %d] qmp status failed for QMP socket '%s': err: '%v'; (JSON response: '%s')",
128+
attempt, socket, err, raw)
129+
errs = joinErrors(errs, err)
130+
time.Sleep(time.Second)
131+
continue
132+
}
133+
106134
var result struct {
107135
ID string `json:"id"`
108136
Return struct {
@@ -114,18 +142,33 @@ func getQemuStatus(socket string) (types.SwState, error) {
114142
dec := json.NewDecoder(bytes.NewReader(raw))
115143
dec.DisallowUnknownFields()
116144
err = dec.Decode(&result)
117-
var matched bool
118-
var state types.SwState
119145
if err != nil {
120-
err = fmt.Errorf("%v; (JSON received: '%s')", err, raw)
121-
} else if state, matched = qmpStatusMap[result.Return.Status]; !matched {
122-
err = fmt.Errorf("unknown QMP status '%s' for QMP socket '%s'; (JSON response: '%s')",
123-
result.Return.Status, socket, raw)
146+
err = fmt.Errorf("[attempt %d] failed to parse QMP status response for QMP socket '%s': err: '%v'; (JSON response: '%s')",
147+
attempt, socket, err, raw)
148+
errs = joinErrors(errs, err)
149+
time.Sleep(time.Second)
150+
continue
151+
}
152+
var matched bool
153+
if state, matched = qmpStatusMap[result.Return.Status]; !matched {
154+
err = fmt.Errorf("[attempt %d] unknown QMP status '%s' for QMP socket '%s'; (JSON response: '%s')",
155+
attempt, result.Return.Status, socket, raw)
156+
errs = joinErrors(errs, err)
157+
time.Sleep(time.Second)
158+
continue
124159
}
125-
return state, err
126-
} else {
127-
return types.UNKNOWN, err
160+
161+
if errs != nil {
162+
logrus.Errorf("getQemuStatus: %d retrieving status attempts failed '%v', but eventually '%s' status was retrieved, so return SUCCESS and continue",
163+
attempt, errs, result.Return.Status)
164+
errs = nil
165+
}
166+
167+
// Success
168+
break
128169
}
170+
171+
return state, errs
129172
}
130173

131174
func qmpEventHandler(listenerSocket, executorSocket string) {

0 commit comments

Comments
 (0)