Skip to content

Commit 0d88db7

Browse files
authored
Add file locking for List workloads (#1420)
This makes the behaviour consistent with the Get workloads method
1 parent d1d2a99 commit 0d88db7

File tree

2 files changed

+258
-13
lines changed

2 files changed

+258
-13
lines changed

pkg/workloads/statuses/file_status.go

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -347,11 +347,31 @@ func (*fileStatusManager) readStatusFile(statusFilePath string) (*workloadStatus
347347
return nil, fmt.Errorf("failed to read status file: %w", err)
348348
}
349349

350+
// Validate file content before parsing
351+
if len(data) == 0 {
352+
return nil, fmt.Errorf("status file is empty")
353+
}
354+
355+
// Basic JSON structure validation
356+
if !json.Valid(data) {
357+
return nil, fmt.Errorf("status file contains invalid JSON")
358+
}
359+
350360
var statusFile workloadStatusFile
351361
if err := json.Unmarshal(data, &statusFile); err != nil {
352362
return nil, fmt.Errorf("failed to unmarshal status file: %w", err)
353363
}
354364

365+
// Validate essential fields
366+
if statusFile.Status == "" {
367+
return nil, fmt.Errorf("status file missing required 'status' field")
368+
}
369+
370+
// Validate timestamps
371+
if statusFile.CreatedAt.IsZero() {
372+
return nil, fmt.Errorf("status file missing or invalid 'created_at' field")
373+
}
374+
355375
return &statusFile, nil
356376
}
357377

@@ -393,26 +413,52 @@ func (f *fileStatusManager) getWorkloadsFromFiles() (map[string]core.Workload, e
393413
}
394414

395415
workloads := make(map[string]core.Workload)
416+
ctx := context.Background() // Create context for file locking
417+
396418
for _, file := range files {
397419
// Extract workload name from filename (remove .json extension)
398420
workloadName := strings.TrimSuffix(filepath.Base(file), ".json")
399421

400-
// Read the status file
401-
statusFile, err := f.readStatusFile(file)
422+
// Use proper file locking like GetWorkload does
423+
err := f.withFileReadLock(ctx, workloadName, func(statusFilePath string) error {
424+
// Check if file exists first
425+
if _, err := os.Stat(statusFilePath); os.IsNotExist(err) {
426+
logger.Debugf("status file for workload %s no longer exists, skipping", workloadName)
427+
return nil // Not an error, file was removed
428+
} else if err != nil {
429+
return fmt.Errorf("failed to check status file: %w", err)
430+
}
431+
432+
// Read the status file with proper error handling
433+
statusFile, err := f.readStatusFile(statusFilePath)
434+
if err != nil {
435+
// Distinguish between different types of errors
436+
if os.IsPermission(err) {
437+
return fmt.Errorf("permission denied reading status file: %w", err)
438+
}
439+
// For JSON parsing errors or corrupted files, log details
440+
logger.Errorf("failed to read or parse status file %s for workload %s: %v", statusFilePath, workloadName, err)
441+
return fmt.Errorf("corrupted or invalid status file: %w", err)
442+
}
443+
444+
// Create workload from file data
445+
workload := core.Workload{
446+
Name: workloadName,
447+
Status: statusFile.Status,
448+
StatusContext: statusFile.StatusContext,
449+
CreatedAt: statusFile.CreatedAt,
450+
}
451+
452+
workloads[workloadName] = workload
453+
return nil
454+
})
455+
402456
if err != nil {
403-
logger.Warnf("failed to read status file %s: %v", file, err)
457+
// Log the specific error but continue processing other workloads
458+
// This maintains the existing behavior but with better diagnostics
459+
logger.Warnf("failed to process status file for workload %s: %v", workloadName, err)
404460
continue
405461
}
406-
407-
// Create workload from file data
408-
workload := core.Workload{
409-
Name: workloadName,
410-
Status: statusFile.Status,
411-
StatusContext: statusFile.StatusContext,
412-
CreatedAt: statusFile.CreatedAt,
413-
}
414-
415-
workloads[workloadName] = workload
416462
}
417463

418464
return workloads, nil

pkg/workloads/statuses/file_status_test.go

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,3 +958,202 @@ func TestFileStatusManager_ListWorkloads_WithValidation(t *testing.T) {
958958
strings.Contains(proxyDown.StatusContext, "proxy")
959959
assert.True(t, isProxyValidated, "Proxy down workload should be detected as unhealthy")
960960
}
961+
962+
func TestFileStatusManager_GetWorkload_vs_ListWorkloads_Consistency(t *testing.T) {
963+
t.Parallel()
964+
965+
ctrl := gomock.NewController(t)
966+
defer ctrl.Finish()
967+
968+
tempDir := t.TempDir()
969+
mockRuntime := mocks.NewMockRuntime(ctrl)
970+
manager := &fileStatusManager{
971+
baseDir: tempDir,
972+
runtime: mockRuntime,
973+
}
974+
ctx := context.Background()
975+
976+
// Create a workload status file
977+
err := manager.SetWorkloadStatus(ctx, "test-workload", rt.WorkloadStatusStarting, "")
978+
require.NoError(t, err)
979+
980+
// Mock runtime to return empty (workload exists only in file)
981+
mockRuntime.EXPECT().ListWorkloads(gomock.Any()).Return([]rt.ContainerInfo{}, nil)
982+
983+
// GetWorkload for a starting workload doesn't call runtime (only running workloads are validated)
984+
workload, err := manager.GetWorkload(ctx, "test-workload")
985+
require.NoError(t, err)
986+
assert.Equal(t, "test-workload", workload.Name)
987+
assert.Equal(t, rt.WorkloadStatusStarting, workload.Status)
988+
989+
// ListWorkloads should include the same file-based workload
990+
workloads, err := manager.ListWorkloads(ctx, true, nil)
991+
require.NoError(t, err)
992+
993+
// Should find the file-based workload in the list
994+
require.Len(t, workloads, 1)
995+
assert.Equal(t, "test-workload", workloads[0].Name)
996+
assert.Equal(t, rt.WorkloadStatusStarting, workloads[0].Status)
997+
998+
// Both operations should return the same workload data for consistency
999+
assert.Equal(t, workload.Name, workloads[0].Name)
1000+
assert.Equal(t, workload.Status, workloads[0].Status)
1001+
assert.Equal(t, workload.StatusContext, workloads[0].StatusContext)
1002+
}
1003+
1004+
func TestFileStatusManager_ListWorkloads_CorruptedFile(t *testing.T) {
1005+
t.Parallel()
1006+
1007+
ctrl := gomock.NewController(t)
1008+
defer ctrl.Finish()
1009+
1010+
tempDir := t.TempDir()
1011+
mockRuntime := mocks.NewMockRuntime(ctrl)
1012+
manager := &fileStatusManager{
1013+
baseDir: tempDir,
1014+
runtime: mockRuntime,
1015+
}
1016+
ctx := context.Background()
1017+
1018+
// Create a valid workload first
1019+
err := manager.SetWorkloadStatus(ctx, "good-workload", rt.WorkloadStatusStarting, "")
1020+
require.NoError(t, err)
1021+
1022+
// Create a corrupted status file manually
1023+
corruptedFile := filepath.Join(tempDir, "corrupted-workload.json")
1024+
err = os.WriteFile(corruptedFile, []byte(`{"invalid": json content`), 0644)
1025+
require.NoError(t, err)
1026+
1027+
// Create an empty status file
1028+
emptyFile := filepath.Join(tempDir, "empty-workload.json")
1029+
err = os.WriteFile(emptyFile, []byte(``), 0644)
1030+
require.NoError(t, err)
1031+
1032+
// Mock runtime to return empty
1033+
mockRuntime.EXPECT().ListWorkloads(gomock.Any()).Return([]rt.ContainerInfo{}, nil)
1034+
1035+
// ListWorkloads should handle corrupted files gracefully
1036+
workloads, err := manager.ListWorkloads(ctx, true, nil)
1037+
require.NoError(t, err)
1038+
1039+
// Should only return the good workload, corrupted ones should be skipped with warnings
1040+
require.Len(t, workloads, 1)
1041+
assert.Equal(t, "good-workload", workloads[0].Name)
1042+
}
1043+
1044+
func TestFileStatusManager_ListWorkloads_MissingRequiredFields(t *testing.T) {
1045+
t.Parallel()
1046+
1047+
ctrl := gomock.NewController(t)
1048+
defer ctrl.Finish()
1049+
1050+
tempDir := t.TempDir()
1051+
mockRuntime := mocks.NewMockRuntime(ctrl)
1052+
manager := &fileStatusManager{
1053+
baseDir: tempDir,
1054+
runtime: mockRuntime,
1055+
}
1056+
ctx := context.Background()
1057+
1058+
// Create a status file missing required fields
1059+
invalidStatusFile := workloadStatusFile{
1060+
// Missing Status field
1061+
StatusContext: "some context",
1062+
CreatedAt: time.Now(),
1063+
UpdatedAt: time.Now(),
1064+
}
1065+
statusFilePath := filepath.Join(tempDir, "invalid-fields.json")
1066+
data, err := json.MarshalIndent(invalidStatusFile, "", " ")
1067+
require.NoError(t, err)
1068+
err = os.WriteFile(statusFilePath, data, 0644)
1069+
require.NoError(t, err)
1070+
1071+
// Create a status file missing created_at
1072+
invalidStatusFile2 := workloadStatusFile{
1073+
Status: rt.WorkloadStatusRunning,
1074+
StatusContext: "some context",
1075+
// Missing CreatedAt field (will be zero value)
1076+
UpdatedAt: time.Now(),
1077+
}
1078+
statusFilePath2 := filepath.Join(tempDir, "missing-created.json")
1079+
data2, err := json.MarshalIndent(invalidStatusFile2, "", " ")
1080+
require.NoError(t, err)
1081+
err = os.WriteFile(statusFilePath2, data2, 0644)
1082+
require.NoError(t, err)
1083+
1084+
// Mock runtime to return empty
1085+
mockRuntime.EXPECT().ListWorkloads(gomock.Any()).Return([]rt.ContainerInfo{}, nil)
1086+
1087+
// ListWorkloads should handle files with missing required fields gracefully
1088+
workloads, err := manager.ListWorkloads(ctx, true, nil)
1089+
require.NoError(t, err)
1090+
1091+
// Should return empty since both files are invalid
1092+
assert.Len(t, workloads, 0)
1093+
}
1094+
1095+
func TestFileStatusManager_ReadStatusFile_Validation(t *testing.T) {
1096+
t.Parallel()
1097+
1098+
tempDir := t.TempDir()
1099+
manager := &fileStatusManager{baseDir: tempDir}
1100+
1101+
tests := []struct {
1102+
name string
1103+
fileContent string
1104+
expectError string
1105+
}{
1106+
{
1107+
name: "empty file",
1108+
fileContent: "",
1109+
expectError: "status file is empty",
1110+
},
1111+
{
1112+
name: "invalid json",
1113+
fileContent: `{"invalid": json}`,
1114+
expectError: "status file contains invalid JSON",
1115+
},
1116+
{
1117+
name: "missing status field",
1118+
fileContent: `{"status_context": "test", "created_at": "2023-01-01T00:00:00Z", "updated_at": "2023-01-01T00:00:00Z"}`,
1119+
expectError: "status file missing required 'status' field",
1120+
},
1121+
{
1122+
name: "missing created_at field",
1123+
fileContent: `{"status": "running", "status_context": "test", "updated_at": "2023-01-01T00:00:00Z"}`,
1124+
expectError: "status file missing or invalid 'created_at' field",
1125+
},
1126+
{
1127+
name: "valid file",
1128+
fileContent: `{"status": "running", "status_context": "test", "created_at": "2023-01-01T00:00:00Z", "updated_at": "2023-01-01T00:00:00Z"}`,
1129+
expectError: "",
1130+
},
1131+
}
1132+
1133+
for _, tt := range tests {
1134+
t.Run(tt.name, func(t *testing.T) {
1135+
t.Parallel()
1136+
1137+
// Create test file
1138+
testFile := filepath.Join(tempDir, tt.name+".json")
1139+
err := os.WriteFile(testFile, []byte(tt.fileContent), 0644)
1140+
require.NoError(t, err)
1141+
1142+
// Test readStatusFile
1143+
statusFile, err := manager.readStatusFile(testFile)
1144+
1145+
if tt.expectError != "" {
1146+
assert.Error(t, err)
1147+
assert.Contains(t, err.Error(), tt.expectError)
1148+
assert.Nil(t, statusFile)
1149+
} else {
1150+
assert.NoError(t, err)
1151+
assert.NotNil(t, statusFile)
1152+
assert.Equal(t, rt.WorkloadStatusRunning, statusFile.Status)
1153+
}
1154+
1155+
// Clean up
1156+
os.Remove(testFile)
1157+
})
1158+
}
1159+
}

0 commit comments

Comments
 (0)