@@ -49,6 +49,12 @@ const (
49
49
50
50
// Healthcheck message type from conmon (using negative to avoid PID conflicts)
51
51
HealthCheckMsgStatusUpdate = - 100
52
+
53
+ // Healthcheck status values sent by conmon (added to base message type -100)
54
+ HealthCheckStatusNone = 0
55
+ HealthCheckStatusStarting = 1
56
+ HealthCheckStatusHealthy = 2
57
+ HealthCheckStatusUnhealthy = 3
52
58
)
53
59
54
60
// ConmonOCIRuntime is an OCI runtime managed by Conmon.
@@ -1508,75 +1514,61 @@ func readConmonHealthCheckPipeData(ctr *Container, pipe *os.File) {
1508
1514
return
1509
1515
}
1510
1516
1511
- // Log the raw JSON string received from conmon
1512
- logrus .Debugf ("HEALTHCHECK: Raw JSON received from conmon for container %s: %q" , ctr .ID (), string (b ))
1513
- logrus .Debugf ("HEALTHCHECK: JSON length: %d bytes" , len (b ))
1517
+ // Log the raw message received from conmon
1518
+ logrus .Debugf ("HEALTHCHECK: Raw message received from conmon for container %s: %q" , ctr .ID (), string (b ))
1519
+ logrus .Debugf ("HEALTHCHECK: Message length: %d bytes" , len (b ))
1514
1520
1515
- // Parse the JSON
1516
- var si syncInfo
1517
- if err := json .Unmarshal (b , & si ); err != nil {
1518
- logrus .Errorf ("HEALTHCHECK: Failed to parse JSON from conmon for container %s: %v" , ctr .ID (), err )
1521
+ // Parse the message as a simple integer (no JSON)
1522
+ messageStr := strings .TrimSpace (string (b ))
1523
+ messageType , err := strconv .Atoi (messageStr )
1524
+ if err != nil {
1525
+ logrus .Errorf ("HEALTHCHECK: Failed to parse message as integer for container %s: %v" , ctr .ID (), err )
1519
1526
continue
1520
1527
}
1521
1528
1522
- logrus .Debugf ("HEALTHCHECK: Parsed sync info for container %s: Data=%d, Message=%q" , ctr .ID (), si .Data , si .Message )
1523
-
1524
- // Handle healthcheck status updates (negative message types)
1525
- if si .Data == HealthCheckMsgStatusUpdate && si .Message != "" {
1526
- logrus .Infof ("HEALTHCHECK: Received healthcheck status update for container %s: %s" , ctr .ID (), si .Message )
1527
- // Process the healthcheck status update
1528
- if err := handleHealthCheckStatusUpdate (ctr , si .Message ); err != nil {
1529
- logrus .Errorf ("HEALTHCHECK: Failed to process healthcheck status update for container %s: %v" , ctr .ID (), err )
1529
+ logrus .Debugf ("HEALTHCHECK: Parsed message type for container %s: %d" , ctr .ID (), messageType )
1530
+
1531
+ // Handle healthcheck status updates based on your new encoding scheme
1532
+ // Base message type is -100, status values are added to it:
1533
+ // -100 + 0 (none) = -100
1534
+ // -100 + 1 (starting) = -99
1535
+ // -100 + 2 (healthy) = -98
1536
+ // -100 + 3 (unhealthy) = -97
1537
+ if messageType >= HealthCheckMsgStatusUpdate && messageType <= HealthCheckMsgStatusUpdate + HealthCheckStatusUnhealthy {
1538
+ statusValue := messageType - HealthCheckMsgStatusUpdate // Convert back to status value
1539
+ var status string
1540
+
1541
+ switch statusValue {
1542
+ case HealthCheckStatusNone :
1543
+ status = define .HealthCheckReset // "reset" or "none"
1544
+ case HealthCheckStatusStarting :
1545
+ status = define .HealthCheckStarting // "starting"
1546
+ case HealthCheckStatusHealthy :
1547
+ status = define .HealthCheckHealthy // "healthy"
1548
+ case HealthCheckStatusUnhealthy :
1549
+ status = define .HealthCheckUnhealthy // "unhealthy"
1550
+ default :
1551
+ logrus .Errorf ("HEALTHCHECK: Unknown status value %d for container %s" , statusValue , ctr .ID ())
1552
+ continue
1530
1553
}
1531
- } else if si .Data == HealthCheckMsgStatusUpdate {
1532
- logrus .Debugf ("HEALTHCHECK: Received healthcheck message type %d for container %s but no message content" , si .Data , ctr .ID ())
1533
- } else if si .Data < 0 {
1534
- // This might be a healthcheck message with a different negative type number
1535
- logrus .Debugf ("HEALTHCHECK: Received negative message type %d for container %s - might be healthcheck related" , si .Data , ctr .ID ())
1536
- } else if si .Data > 0 {
1537
- // This might be a PID or other positive message - log but don't process as healthcheck
1538
- logrus .Debugf ("HEALTHCHECK: Received positive message type %d for container %s - not healthcheck related" , si .Data , ctr .ID ())
1539
- }
1540
- }
1541
- }
1542
1554
1543
- // handleHealthCheckStatusUpdate processes healthcheck status updates from conmon
1544
- func handleHealthCheckStatusUpdate (ctr * Container , message string ) error {
1545
- logrus .Debugf ("HEALTHCHECK: Processing healthcheck status update for container %s: %s" , ctr .ID (), message )
1555
+ logrus .Infof ("HEALTHCHECK: Received healthcheck status update for container %s: %s (message type: %d, status value: %d)" ,
1556
+ ctr .ID (), status , messageType , statusValue )
1546
1557
1547
- // Parse the healthcheck status JSON from conmon
1548
- type healthCheckStatus struct {
1549
- Type string `json:"type"`
1550
- ContainerID string `json:"container_id"`
1551
- Status string `json:"status"`
1552
- ExitCode int `json:"exit_code"`
1553
- Timestamp int64 `json:"timestamp"`
1554
- }
1555
-
1556
- var hcStatus healthCheckStatus
1557
- if err := json .Unmarshal ([]byte (message ), & hcStatus ); err != nil {
1558
- logrus .Errorf ("HEALTHCHECK: Failed to parse healthcheck status JSON for container %s: %v" , ctr .ID (), err )
1559
- return fmt .Errorf ("failed to parse healthcheck status from conmon: %w" , err )
1560
- }
1561
-
1562
- logrus .Debugf ("HEALTHCHECK: Parsed healthcheck status for container %s: Type=%s, ContainerID=%s, Status=%s, ExitCode=%d, Timestamp=%d" ,
1563
- ctr .ID (), hcStatus .Type , hcStatus .ContainerID , hcStatus .Status , hcStatus .ExitCode , hcStatus .Timestamp )
1564
-
1565
- // Verify this is for the correct container
1566
- if hcStatus .ContainerID != ctr .ID () {
1567
- logrus .Errorf ("HEALTHCHECK: Healthcheck status for wrong container: expected %s, got %s" , ctr .ID (), hcStatus .ContainerID )
1568
- return fmt .Errorf ("healthcheck status for wrong container: expected %s, got %s" , ctr .ID (), hcStatus .ContainerID )
1569
- }
1570
-
1571
- // Update the container's healthcheck status
1572
- logrus .Debugf ("HEALTHCHECK: Updating healthcheck status for container %s to %s" , ctr .ID (), hcStatus .Status )
1573
- if err := ctr .updateHealthStatus (hcStatus .Status ); err != nil {
1574
- logrus .Errorf ("HEALTHCHECK: Failed to update healthcheck status for container %s: %v" , ctr .ID (), err )
1575
- return fmt .Errorf ("failed to update healthcheck status for container %s: %w" , ctr .ID (), err )
1558
+ // Update the container's healthcheck status
1559
+ if err := ctr .updateHealthStatus (status ); err != nil {
1560
+ logrus .Errorf ("HEALTHCHECK: Failed to update healthcheck status for container %s: %v" , ctr .ID (), err )
1561
+ } else {
1562
+ logrus .Infof ("HEALTHCHECK: Successfully updated healthcheck status for container %s to %s" , ctr .ID (), status )
1563
+ }
1564
+ } else if messageType < 0 {
1565
+ // Other negative message types - might be healthcheck related but not recognized
1566
+ logrus .Debugf ("HEALTHCHECK: Received unrecognized negative message type %d for container %s - might be healthcheck related" , messageType , ctr .ID ())
1567
+ } else if messageType > 0 {
1568
+ // Positive message types - not healthcheck related
1569
+ logrus .Debugf ("HEALTHCHECK: Received positive message type %d for container %s - not healthcheck related" , messageType , ctr .ID ())
1570
+ }
1576
1571
}
1577
-
1578
- logrus .Infof ("HEALTHCHECK: Successfully updated healthcheck status for container %s to %s (exit code: %d)" , ctr .ID (), hcStatus .Status , hcStatus .ExitCode )
1579
- return nil
1580
1572
}
1581
1573
1582
1574
// writeConmonPipeData writes nonce data to a pipe
0 commit comments