Skip to content

Commit 36b57c7

Browse files
committed
Lint fixes part 1
1 parent 2f5cf99 commit 36b57c7

File tree

4 files changed

+40
-54
lines changed

4 files changed

+40
-54
lines changed

internal/checkpointer/checkpointer.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ const (
3131
checkpointSubdirName = "checkpoint"
3232
)
3333

34-
var errNoCheckpointDir = errors.New("Could not find checkpoint directory environment variable")
34+
var errNoCheckpointDir = errors.New("could not find checkpoint directory environment variable")
3535

36-
type FatalCheckpointErr struct {
36+
type FatalCheckpointError struct {
3737
err error
3838
}
3939

40-
func (e *FatalCheckpointErr) Error() string {
41-
return e.Error()
40+
func (e *FatalCheckpointError) Error() string {
41+
return e.err.Error()
4242
}
4343

4444
type Checkpointer interface {
@@ -113,7 +113,7 @@ func (c *checkpointer) Checkpoint(ctx context.Context, cogletCmd *exec.Cmd) erro
113113
return errNoCheckpointDir
114114
}
115115

116-
err := os.MkdirAll(filepath.Join(c.checkpointDir, checkpointSubdirName), 0o666)
116+
err := os.MkdirAll(filepath.Join(c.checkpointDir, checkpointSubdirName), 0o666) //nolint:gosec // coglet needs to write here
117117
if err != nil {
118118
return err
119119
}
@@ -137,7 +137,7 @@ func (c *checkpointer) Checkpoint(ctx context.Context, cogletCmd *exec.Cmd) erro
137137
cudaCmd := strings.TrimSpace(string(data))
138138

139139
// Write said command to a file for later
140-
err = os.WriteFile(filepath.Join(c.checkpointDir, cudaCmdFileName), []byte(cudaCmd), 0o666)
140+
err = os.WriteFile(filepath.Join(c.checkpointDir, cudaCmdFileName), []byte(cudaCmd), 0o644) //nolint:gosec
141141
if err != nil {
142142
return err
143143
}
@@ -157,7 +157,7 @@ func (c *checkpointer) Checkpoint(ctx context.Context, cogletCmd *exec.Cmd) erro
157157
cmd = exec.CommandContext(ctx, cudaCheckpointPath, "--toggle", "--pid", string(cudaPID))
158158
if cudaErr := cmd.Run(); cudaErr != nil {
159159
// Return a fatal error so upstream knows we cannot continue in the current state
160-
return &FatalCheckpointErr{
160+
return &FatalCheckpointError{
161161
err: cudaErr,
162162
}
163163
}
@@ -171,7 +171,7 @@ func (c *checkpointer) Checkpoint(ctx context.Context, cogletCmd *exec.Cmd) erro
171171
cmd = exec.CommandContext(ctx, cudaCheckpointPath, "--toggle", "--pid", string(cudaPID))
172172
if err := cmd.Run(); err != nil {
173173
// Return a fatal error so upstream knows we cannot continue in the current state
174-
return &FatalCheckpointErr{
174+
return &FatalCheckpointError{
175175
err: err,
176176
}
177177
}
@@ -200,7 +200,7 @@ func (c *checkpointer) Restore(ctx context.Context) (*exec.Cmd, func(context.Con
200200
if err != nil {
201201
// If this command failed, we want to best effort try to kill the started process,
202202
// since we'll start a new one
203-
restoreCmd.Process.Kill()
203+
restoreCmd.Process.Kill() //nolint:errcheck
204204

205205
return err
206206
}
@@ -210,7 +210,7 @@ func (c *checkpointer) Restore(ctx context.Context) (*exec.Cmd, func(context.Con
210210
if err := cmd.Run(); err != nil {
211211
// If this command failed, we want to best effort try to kill the started process,
212212
// since we'll start a new one
213-
restoreCmd.Process.Kill()
213+
restoreCmd.Process.Kill() //nolint:errcheck
214214

215215
return err
216216
}
@@ -219,7 +219,7 @@ func (c *checkpointer) Restore(ctx context.Context) (*exec.Cmd, func(context.Con
219219
if err != nil {
220220
// If this command failed, we want to best effort try to kill the started process,
221221
// since we'll start a new one
222-
restoreCmd.Process.Kill()
222+
restoreCmd.Process.Kill() //nolint:errcheck
223223

224224
return err
225225
}
@@ -246,6 +246,5 @@ func downloadCUDACheckpointBinaries(ctx context.Context) error {
246246
if err != nil {
247247
return fmt.Errorf("failed to download and untar CRIU: %w", err)
248248
}
249-
updateEnvVar("LD_LIBRARY_PATH", filepath.Join(dir, "criu-lib"))
250-
return nil
249+
return updateEnvVar("LD_LIBRARY_PATH", filepath.Join(dir, "criu-lib"))
251250
}

internal/checkpointer/utils.go

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,26 @@ import (
1212
"time"
1313
)
1414

15-
var errTimedOutPolling = errors.New("Timed out while polling for file")
15+
var errTimedOutPolling = errors.New("timed out while polling for file")
1616

1717
// updateEnvVar updates an environment variable in-place, adding an item to it
1818
// if it exists or creating it if it doesn't exist
19-
func updateEnvVar(envVarName, newItem string) {
19+
func updateEnvVar(envVarName, newItem string) error {
2020
old := os.Getenv(envVarName)
2121
if old == "" {
22-
os.Setenv(envVarName, newItem)
23-
return
22+
return os.Setenv(envVarName, newItem)
2423
}
2524
path := newItem + string(os.PathListSeparator) + os.Getenv(envVarName)
26-
os.Setenv(envVarName, path)
25+
return os.Setenv(envVarName, path)
2726
}
2827

2928
// downloadFile downloads a file from the URL provided to the path provided
3029
func downloadFile(url, path string) error {
3130
filename := filepath.Base(path)
32-
os.MkdirAll(filepath.Dir(path), 0o755)
31+
err := os.MkdirAll(filepath.Dir(path), 0o600)
32+
if err != nil {
33+
return err
34+
}
3335

3436
resp, err := http.Get(url)
3537
if err != nil {
@@ -41,13 +43,13 @@ func downloadFile(url, path string) error {
4143
return fmt.Errorf("failed to download %s: %w", filename, err)
4244
}
4345

44-
binary, err := os.Create(path)
46+
file, err := os.Create(path)
4547
if err != nil {
4648
return fmt.Errorf("failed to touch file: %w", err)
4749
}
48-
defer binary.Close()
50+
defer file.Close() //nolint: errcheck
4951

50-
_, err = io.Copy(binary, resp.Body)
52+
_, err = io.Copy(file, resp.Body)
5153
if err != nil {
5254
return fmt.Errorf("failed to save %s: %w", filename, err)
5355
}
@@ -63,7 +65,7 @@ func downloadAndChmod(url, path string) error {
6365
return err
6466
}
6567

66-
if err := os.Chmod(path, 0o755); err != nil {
68+
if err := os.Chmod(path, 0o600); err != nil {
6769
return fmt.Errorf("failed to chmod file: %w", err)
6870
}
6971
return nil
@@ -116,37 +118,16 @@ func pollForFileDeletion(target string, timeout, pollInterval time.Duration) err
116118
}
117119
}
118120

119-
// pollForFileExistance waits for a file to exist, up until a timeout. It returns an error if the
120-
// timeout is hit
121-
func pollForFileExistance(target string, timeout, pollInterval time.Duration) error {
122-
deadline := time.After(timeout)
123-
124-
for {
125-
// Check if the file exists, if it doesn't keep looping
126-
if _, err := os.Stat(target); err == nil {
127-
return nil
128-
}
129-
130-
// Check for timeout before sleeping for the polling interval
131-
select {
132-
case <-deadline:
133-
return errTimedOutPolling
134-
default:
135-
time.Sleep(pollInterval)
136-
}
137-
}
138-
}
139-
140121
// https://stackoverflow.com/a/30708914/30548878
141122
func isDirEmpty(name string) (bool, error) {
142123
f, err := os.Open(name)
143124
if err != nil {
144125
return false, err
145126
}
146-
defer f.Close()
127+
defer f.Close() //nolint:errcheck
147128

148129
_, err = f.Readdirnames(1)
149-
if err == io.EOF {
130+
if errors.Is(err, io.EOF) {
150131
return true, nil
151132
}
152133
return false, err
@@ -155,7 +136,7 @@ func isDirEmpty(name string) (bool, error) {
155136
// Touch a file if it doesn't exist, otherwise wipes the contents of the file
156137
func touchFile(name string) error {
157138
// Ensure upstream directory exists for file
158-
err := os.MkdirAll(filepath.Dir(name), 0o755)
139+
err := os.MkdirAll(filepath.Dir(name), 0o644)
159140
if err != nil {
160141
return err
161142
}
@@ -164,8 +145,7 @@ func touchFile(name string) error {
164145
if err != nil {
165146
return err
166147
}
167-
f.Close()
168-
return nil
148+
return f.Close()
169149
}
170150

171151
// setStatusReady ensures the ready files exist

internal/runner/manager.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ func (m *Manager) createDefaultRunner(ctx context.Context) (*Runner, error) {
356356
// Derive the runtime context from the manager's context
357357
runtimeContext, runtimeCancel := context.WithCancel(ctx)
358358

359+
commandSetup:
359360
cmd := exec.CommandContext(runtimeContext, pythonPath, args...) //nolint:gosec // expected subprocess launched with variable
360361
runner, err := m.setupRunner(runtimeContext, runtimeCancel, cmd, env, runnerCtx, cogYaml.Concurrency.Max)
361362

@@ -374,12 +375,18 @@ func (m *Manager) createDefaultRunner(ctx context.Context) (*Runner, error) {
374375

375376
if !cp.HasCheckpoint() {
376377
err := cp.Checkpoint(ctx, cmd)
377-
var fatalCheckpointErr *checkpointer.FatalCheckpointErr
378-
if errors.As(err, &fatalCheckpointErr) {
379-
return nil, fmt.Errorf("fatal error while trying to checkpoint: %w", err)
378+
var FatalCheckpointError *checkpointer.FatalCheckpointError
379+
// If we saw an error that would leave the runner unusable, turn off the
380+
// checkpointer and recreate the command and runner
381+
if errors.As(err, &FatalCheckpointError) {
382+
// TODO: Is this bad? Should we just return the error back up?
383+
// The main concern is what `runner.Config` does leaving artifacts
384+
// between runs, although I think that should be fine?
385+
cp.Disable()
386+
goto commandSetup
380387
}
381388
// If the error is not fatal, we failed to create a checkpoint but are still
382-
// running the original cog process, so we can just continue as if we did
389+
// running the cog process successfully, so we can just continue as if we did
383390
// nothing
384391
}
385392

python/coglet/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def main() -> int:
5454
parser = argparse.ArgumentParser()
5555
parser.add_argument('--name', metavar='NAME', required=True, help='name')
5656
group = parser.add_mutually_exclusive_group()
57-
group.add_argument('--ipc-url', metavar='URL', required=True, help='IPC URL')
57+
group.add_argument('--ipc-url', metavar='URL', help='IPC URL')
5858
group.add_argument('--signal_mode', action='store_true')
5959
parser.add_argument(
6060
'--working-dir', metavar='DIR', required=True, help='working directory'

0 commit comments

Comments
 (0)