From 1de5336b5c64449c1bc6fb305e98277f39ea337c Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 30 Jun 2025 15:22:20 +0200 Subject: [PATCH 01/38] Add rollback field to UpgradeRequest --- control_v2.proto | 3 + pkg/control/v2/cproto/control_v2.pb.go | 591 +++++++++++++------------ 2 files changed, 304 insertions(+), 290 deletions(-) diff --git a/control_v2.proto b/control_v2.proto index 55a988a37ff..c5ec6ecb037 100644 --- a/control_v2.proto +++ b/control_v2.proto @@ -116,6 +116,9 @@ message UpgradeRequest { // // If provided Elastic Agent package embedded PGP key is not checked for signature during upgrade. bool skipDefaultPgp = 5; + + // If true it indicates that we wish to rollback the current/last upgrade + bool rollback = 6; } // A upgrade response message. diff --git a/pkg/control/v2/cproto/control_v2.pb.go b/pkg/control/v2/cproto/control_v2.pb.go index 674529fdbad..57c0407751a 100644 --- a/pkg/control/v2/cproto/control_v2.pb.go +++ b/pkg/control/v2/cproto/control_v2.pb.go @@ -580,6 +580,8 @@ type UpgradeRequest struct { // // If provided Elastic Agent package embedded PGP key is not checked for signature during upgrade. SkipDefaultPgp bool `protobuf:"varint,5,opt,name=skipDefaultPgp,proto3" json:"skipDefaultPgp,omitempty"` + // If true it indicates that we wish to rollback the current/last upgrade + Rollback bool `protobuf:"varint,6,opt,name=rollback,proto3" json:"rollback,omitempty"` } func (x *UpgradeRequest) Reset() { @@ -649,6 +651,13 @@ func (x *UpgradeRequest) GetSkipDefaultPgp() bool { return false } +func (x *UpgradeRequest) GetRollback() bool { + if x != nil { + return x.Rollback + } + return false +} + // A upgrade response message. type UpgradeResponse struct { state protoimpl.MessageState @@ -2123,7 +2132,7 @@ var file_control_v2_proto_rawDesc = []byte{ 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x22, - 0xac, 0x01, 0x0a, 0x0e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0xc8, 0x01, 0x0a, 0x0e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x55, 0x52, 0x49, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, @@ -2133,300 +2142,302 @@ var file_control_v2_proto_rawDesc = []byte{ 0x70, 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x70, 0x67, 0x70, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0e, 0x73, 0x6b, 0x69, 0x70, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x67, 0x70, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, - 0x73, 0x6b, 0x69, 0x70, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x67, 0x70, 0x22, 0x6f, - 0x0a, 0x0f, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x2c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x14, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, - 0x6f, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x22, - 0xb5, 0x01, 0x0a, 0x12, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x55, 0x6e, 0x69, - 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x55, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x75, 0x6e, 0x69, 0x74, 0x49, 0x64, 0x12, 0x23, - 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, - 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, - 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x9f, 0x01, 0x0a, 0x14, 0x43, 0x6f, 0x6d, 0x70, - 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, - 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3a, 0x0a, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, - 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, - 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x04, 0x6d, 0x65, 0x74, 0x61, - 0x1a, 0x37, 0x0a, 0x09, 0x4d, 0x65, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xe6, 0x01, 0x0a, 0x0e, 0x43, 0x6f, - 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x12, 0x23, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, - 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, - 0x30, 0x0a, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, + 0x73, 0x6b, 0x69, 0x70, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x67, 0x70, 0x12, 0x1a, + 0x0a, 0x08, 0x72, 0x6f, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x72, 0x6f, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x22, 0x6f, 0x0a, 0x0f, 0x55, 0x70, + 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2c, 0x0a, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x14, 0x2e, + 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x22, 0xb5, 0x01, 0x0a, 0x12, + 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x55, 0x6e, 0x69, 0x74, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x2d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, + 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x17, 0x0a, 0x07, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x75, 0x6e, 0x69, 0x74, 0x49, 0x64, 0x12, 0x23, 0x0a, 0x05, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, + 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, + 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, + 0x6f, 0x61, 0x64, 0x22, 0x9f, 0x01, 0x0a, 0x14, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, + 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x12, 0x3a, 0x0a, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, - 0x74, 0x55, 0x6e, 0x69, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x75, 0x6e, 0x69, 0x74, - 0x73, 0x12, 0x3f, 0x0a, 0x0c, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x66, - 0x6f, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x0b, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, - 0x66, 0x6f, 0x22, 0xe0, 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, - 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x16, 0x0a, 0x06, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x06, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x62, 0x75, 0x69, 0x6c, 0x64, - 0x54, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x62, 0x75, 0x69, 0x6c, - 0x64, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, - 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, - 0x74, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, - 0x70, 0x69, 0x64, 0x12, 0x22, 0x0a, 0x0c, 0x75, 0x6e, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, - 0x67, 0x65, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x75, 0x6e, 0x70, 0x72, 0x69, - 0x76, 0x69, 0x6c, 0x65, 0x67, 0x65, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x73, 0x4d, 0x61, 0x6e, - 0x61, 0x67, 0x65, 0x64, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x69, 0x73, 0x4d, 0x61, - 0x6e, 0x61, 0x67, 0x65, 0x64, 0x22, 0xc9, 0x02, 0x0a, 0x12, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, - 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x12, 0x38, 0x0a, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x20, 0x2e, 0x63, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x1c, 0x0a, 0x09, - 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x62, 0x0a, 0x12, 0x43, 0x6f, + 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x4d, 0x65, 0x74, + 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x04, 0x6d, 0x65, 0x74, 0x61, 0x1a, 0x37, 0x0a, 0x09, + 0x4d, 0x65, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xe6, 0x01, 0x0a, 0x0e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, + 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x23, 0x0a, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x30, 0x0a, 0x05, 0x75, + 0x6e, 0x69, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x63, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x55, 0x6e, 0x69, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x12, 0x3f, 0x0a, + 0x0c, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, + 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, + 0x6f, 0x52, 0x0b, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0xe0, + 0x01, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x49, 0x6e, 0x66, + 0x6f, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, + 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x63, + 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x63, 0x6f, 0x6d, + 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x54, 0x69, 0x6d, 0x65, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x54, 0x69, 0x6d, + 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x08, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x12, 0x10, 0x0a, + 0x03, 0x70, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, + 0x22, 0x0a, 0x0c, 0x75, 0x6e, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, 0x67, 0x65, 0x64, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x75, 0x6e, 0x70, 0x72, 0x69, 0x76, 0x69, 0x6c, 0x65, + 0x67, 0x65, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x69, 0x73, 0x4d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x64, + 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x69, 0x73, 0x4d, 0x61, 0x6e, 0x61, 0x67, 0x65, + 0x64, 0x22, 0xc9, 0x02, 0x0a, 0x12, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, + 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x12, 0x38, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x20, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, + 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x74, 0x69, 0x6d, + 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x62, 0x0a, 0x12, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, + 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4d, 0x61, 0x70, 0x18, 0x04, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, + 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x2e, 0x43, + 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x12, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, + 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4d, 0x61, 0x70, 0x1a, 0x61, 0x0a, 0x17, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4d, 0x61, 0x70, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x30, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, - 0x6e, 0x74, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x12, 0x43, 0x6f, 0x6d, 0x70, - 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4d, 0x61, 0x70, 0x1a, 0x61, - 0x0a, 0x17, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x30, 0x0a, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x63, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, - 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x22, 0x80, 0x03, 0x0a, 0x0d, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x2a, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x41, 0x67, 0x65, 0x6e, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, - 0x23, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, - 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x2d, - 0x0a, 0x0a, 0x66, 0x6c, 0x65, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x52, 0x0a, 0x66, 0x6c, 0x65, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x22, 0x0a, - 0x0c, 0x66, 0x6c, 0x65, 0x65, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0c, 0x66, 0x6c, 0x65, 0x65, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x36, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x0a, 0x63, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3f, 0x0a, 0x0f, 0x75, 0x70, 0x67, - 0x72, 0x61, 0x64, 0x65, 0x5f, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, - 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x52, 0x0e, 0x75, 0x70, 0x67, 0x72, - 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x12, 0x38, 0x0a, 0x09, 0x63, 0x6f, - 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, - 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, - 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x09, 0x63, 0x6f, 0x6c, 0x6c, 0x65, - 0x63, 0x74, 0x6f, 0x72, 0x22, 0xa6, 0x01, 0x0a, 0x0e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, - 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x14, - 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, - 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x49, - 0x64, 0x12, 0x3a, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, - 0x72, 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x87, 0x02, - 0x0a, 0x16, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, - 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x63, 0x68, 0x65, - 0x64, 0x75, 0x6c, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, - 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x64, 0x41, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x64, - 0x6f, 0x77, 0x6e, 0x6c, 0x6f, 0x61, 0x64, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0f, 0x64, 0x6f, 0x77, 0x6e, 0x6c, 0x6f, 0x61, 0x64, 0x50, - 0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, - 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, - 0x69, 0x6c, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x72, 0x72, - 0x6f, 0x72, 0x5f, 0x6d, 0x73, 0x67, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x72, - 0x72, 0x6f, 0x72, 0x4d, 0x73, 0x67, 0x12, 0x26, 0x0a, 0x0f, 0x72, 0x65, 0x74, 0x72, 0x79, 0x5f, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x6d, 0x73, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4d, 0x73, 0x67, 0x12, 0x1f, - 0x0a, 0x0b, 0x72, 0x65, 0x74, 0x72, 0x79, 0x5f, 0x75, 0x6e, 0x74, 0x69, 0x6c, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0a, 0x72, 0x65, 0x74, 0x72, 0x79, 0x55, 0x6e, 0x74, 0x69, 0x6c, 0x12, - 0x16, 0x0a, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0xdf, 0x01, 0x0a, 0x14, 0x44, 0x69, 0x61, 0x67, - 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, - 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, - 0x38, 0x0a, 0x09, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, - 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x22, 0x6c, 0x0a, 0x16, 0x44, 0x69, 0x61, - 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x52, 0x0a, 0x12, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, 0x32, - 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, - 0x6e, 0x61, 0x6c, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x52, 0x11, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, - 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x22, 0xb5, 0x01, 0x0a, 0x1b, 0x44, 0x69, 0x61, 0x67, - 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x42, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, - 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x63, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, - 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x52, 0x0a, 0x12, 0x61, - 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x44, 0x69, 0x61, 0x67, 0x6e, - 0x6f, 0x73, 0x74, 0x69, 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x11, 0x61, 0x64, - 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x22, - 0x3f, 0x0a, 0x1a, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, - 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, - 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, - 0x22, 0x51, 0x0a, 0x17, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, - 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x07, 0x72, - 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, - 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x73, 0x22, 0x82, 0x01, 0x0a, 0x15, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, - 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, - 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, - 0x12, 0x2d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x6e, 0x69, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x17, 0x0a, 0x07, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x06, 0x75, 0x6e, 0x69, 0x74, 0x49, 0x64, 0x22, 0x4d, 0x0a, 0x16, 0x44, 0x69, 0x61, 0x67, - 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x12, 0x33, 0x0a, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x1d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, - 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x52, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x22, 0xd1, 0x01, 0x0a, 0x16, 0x44, 0x69, 0x61, 0x67, - 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, - 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, - 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x2d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x55, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x75, 0x6e, 0x69, 0x74, 0x49, 0x64, 0x12, 0x14, 0x0a, - 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, - 0x72, 0x6f, 0x72, 0x12, 0x36, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x05, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, - 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x8e, 0x01, 0x0a, 0x1b, - 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, - 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x63, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x14, - 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x12, 0x36, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, - 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x4f, 0x0a, 0x17, - 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, - 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x52, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x22, 0x2a, 0x0a, - 0x10, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2a, 0x85, 0x01, 0x0a, 0x05, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, - 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x4f, 0x4e, 0x46, 0x49, 0x47, 0x55, 0x52, 0x49, 0x4e, 0x47, - 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x48, 0x45, 0x41, 0x4c, 0x54, 0x48, 0x59, 0x10, 0x02, 0x12, - 0x0c, 0x0a, 0x08, 0x44, 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0a, 0x0a, - 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x4f, - 0x50, 0x50, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x54, 0x4f, 0x50, 0x50, - 0x45, 0x44, 0x10, 0x06, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x49, 0x4e, - 0x47, 0x10, 0x07, 0x12, 0x0c, 0x0a, 0x08, 0x52, 0x4f, 0x4c, 0x4c, 0x42, 0x41, 0x43, 0x4b, 0x10, - 0x08, 0x2a, 0xbf, 0x01, 0x0a, 0x18, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, - 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0e, - 0x0a, 0x0a, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4e, 0x6f, 0x6e, 0x65, 0x10, 0x00, 0x12, 0x12, - 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, - 0x10, 0x01, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4f, 0x4b, 0x10, 0x02, - 0x12, 0x1a, 0x0a, 0x16, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x63, 0x6f, 0x76, 0x65, - 0x72, 0x61, 0x62, 0x6c, 0x65, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x10, 0x03, 0x12, 0x18, 0x0a, 0x14, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x50, 0x65, 0x72, 0x6d, 0x61, 0x6e, 0x65, 0x6e, 0x74, 0x45, - 0x72, 0x72, 0x6f, 0x72, 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x46, 0x61, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x53, 0x74, 0x6f, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x10, 0x06, - 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x53, 0x74, 0x6f, 0x70, 0x70, 0x65, - 0x64, 0x10, 0x07, 0x2a, 0x21, 0x0a, 0x08, 0x55, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x09, 0x0a, 0x05, 0x49, 0x4e, 0x50, 0x55, 0x54, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x55, - 0x54, 0x50, 0x55, 0x54, 0x10, 0x01, 0x2a, 0x28, 0x0a, 0x0c, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, - 0x53, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x55, 0x52, 0x45, 0x10, 0x01, - 0x2a, 0x7f, 0x0a, 0x0b, 0x50, 0x70, 0x72, 0x6f, 0x66, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, - 0x0a, 0x0a, 0x06, 0x41, 0x4c, 0x4c, 0x4f, 0x43, 0x53, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x42, - 0x4c, 0x4f, 0x43, 0x4b, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4d, 0x44, 0x4c, 0x49, 0x4e, - 0x45, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x47, 0x4f, 0x52, 0x4f, 0x55, 0x54, 0x49, 0x4e, 0x45, - 0x10, 0x03, 0x12, 0x08, 0x0a, 0x04, 0x48, 0x45, 0x41, 0x50, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, - 0x4d, 0x55, 0x54, 0x45, 0x58, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x50, 0x52, 0x4f, 0x46, 0x49, - 0x4c, 0x45, 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x48, 0x52, 0x45, 0x41, 0x44, 0x43, 0x52, - 0x45, 0x41, 0x54, 0x45, 0x10, 0x07, 0x12, 0x09, 0x0a, 0x05, 0x54, 0x52, 0x41, 0x43, 0x45, 0x10, - 0x08, 0x2a, 0x30, 0x0a, 0x1b, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x44, + 0x6e, 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x80, 0x03, + 0x0a, 0x0d, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x2a, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, + 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, + 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x23, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x2d, 0x0a, 0x0a, 0x66, 0x6c, + 0x65, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, + 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x0a, 0x66, + 0x6c, 0x65, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x66, 0x6c, 0x65, + 0x65, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0c, 0x66, 0x6c, 0x65, 0x65, 0x74, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x36, 0x0a, + 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3f, 0x0a, 0x0f, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, + 0x5f, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, + 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x44, + 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x52, 0x0e, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x44, + 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x12, 0x38, 0x0a, 0x09, 0x63, 0x6f, 0x6c, 0x6c, 0x65, 0x63, + 0x74, 0x6f, 0x72, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x63, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, + 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x09, 0x63, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, + 0x22, 0xa6, 0x01, 0x0a, 0x0e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, + 0x69, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x74, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x1b, 0x0a, 0x09, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, 0x3a, 0x0a, + 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, + 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, + 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x87, 0x02, 0x0a, 0x16, 0x55, 0x70, + 0x67, 0x72, 0x61, 0x64, 0x65, 0x44, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x73, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, + 0x64, 0x5f, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x73, 0x63, 0x68, 0x65, + 0x64, 0x75, 0x6c, 0x65, 0x64, 0x41, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x64, 0x6f, 0x77, 0x6e, 0x6c, + 0x6f, 0x61, 0x64, 0x5f, 0x70, 0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x02, 0x52, 0x0f, 0x64, 0x6f, 0x77, 0x6e, 0x6c, 0x6f, 0x61, 0x64, 0x50, 0x65, 0x72, 0x63, 0x65, + 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x61, + 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x61, 0x69, 0x6c, 0x65, 0x64, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x5f, 0x6d, + 0x73, 0x67, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x4d, + 0x73, 0x67, 0x12, 0x26, 0x0a, 0x0f, 0x72, 0x65, 0x74, 0x72, 0x79, 0x5f, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x5f, 0x6d, 0x73, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x72, 0x65, 0x74, + 0x72, 0x79, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x4d, 0x73, 0x67, 0x12, 0x1f, 0x0a, 0x0b, 0x72, 0x65, + 0x74, 0x72, 0x79, 0x5f, 0x75, 0x6e, 0x74, 0x69, 0x6c, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0a, 0x72, 0x65, 0x74, 0x72, 0x79, 0x55, 0x6e, 0x74, 0x69, 0x6c, 0x12, 0x16, 0x0a, 0x06, 0x72, + 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x72, 0x65, 0x61, + 0x73, 0x6f, 0x6e, 0x22, 0xdf, 0x01, 0x0a, 0x14, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, + 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x12, 0x1a, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x20, 0x0a, 0x0b, + 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, + 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x38, 0x0a, 0x09, 0x67, + 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x67, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x74, 0x65, 0x64, 0x22, 0x6c, 0x0a, 0x16, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, + 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x52, 0x0a, 0x12, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x63, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x12, 0x07, 0x0a, 0x03, 0x43, 0x50, 0x55, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x43, 0x4f, 0x4e, - 0x4e, 0x10, 0x01, 0x32, 0xdf, 0x04, 0x0a, 0x13, 0x45, 0x6c, 0x61, 0x73, 0x74, 0x69, 0x63, 0x41, - 0x67, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x12, 0x31, 0x0a, 0x07, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x17, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x56, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2d, - 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x15, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, - 0x0a, 0x53, 0x74, 0x61, 0x74, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x12, 0x0d, 0x2e, 0x63, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x15, 0x2e, 0x63, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x30, 0x01, 0x12, 0x31, 0x0a, 0x07, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x0d, - 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x17, 0x2e, - 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3a, 0x0a, 0x07, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, - 0x65, 0x12, 0x16, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x63, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x52, 0x0a, 0x0f, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, - 0x41, 0x67, 0x65, 0x6e, 0x74, 0x12, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, - 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1f, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, - 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x53, 0x0a, 0x0f, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, - 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x12, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, - 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, - 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x62, 0x0a, 0x14, 0x44, + 0x52, 0x11, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x4d, 0x65, 0x74, 0x72, + 0x69, 0x63, 0x73, 0x22, 0xb5, 0x01, 0x0a, 0x1b, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, + 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x42, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x0a, 0x63, 0x6f, 0x6d, + 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x52, 0x0a, 0x12, 0x61, 0x64, 0x64, 0x69, 0x74, + 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x64, 0x64, + 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, + 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x11, 0x61, 0x64, 0x64, 0x69, 0x74, 0x69, + 0x6f, 0x6e, 0x61, 0x6c, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x22, 0x3f, 0x0a, 0x1a, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, - 0x6e, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, - 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, - 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, - 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, - 0x34, 0x0a, 0x09, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x12, 0x18, 0x2e, 0x63, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, - 0x45, 0x6d, 0x70, 0x74, 0x79, 0x42, 0x29, 0x5a, 0x24, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, - 0x6c, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x74, - 0x72, 0x6f, 0x6c, 0x2f, 0x76, 0x32, 0x2f, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0xf8, 0x01, 0x01, - 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, + 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x51, 0x0a, 0x17, + 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, + 0x82, 0x01, 0x0a, 0x15, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, + 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, + 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x2d, 0x0a, 0x09, + 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x75, + 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x75, 0x6e, + 0x69, 0x74, 0x49, 0x64, 0x22, 0x4d, 0x0a, 0x16, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, + 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x33, + 0x0a, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, + 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, + 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x05, 0x75, 0x6e, + 0x69, 0x74, 0x73, 0x22, 0xd1, 0x01, 0x0a, 0x16, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, + 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, + 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, + 0x64, 0x12, 0x2d, 0x0a, 0x09, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x10, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x6e, + 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x75, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x17, 0x0a, 0x07, 0x75, 0x6e, 0x69, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x06, 0x75, 0x6e, 0x69, 0x74, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, + 0x36, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, + 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, 0x07, + 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x8e, 0x01, 0x0a, 0x1b, 0x44, 0x69, 0x61, 0x67, + 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, + 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, + 0x72, 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, + 0x12, 0x36, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x1c, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, + 0x6f, 0x73, 0x74, 0x69, 0x63, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x52, + 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x22, 0x4f, 0x0a, 0x17, 0x44, 0x69, 0x61, 0x67, + 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, + 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x52, 0x05, 0x75, 0x6e, 0x69, 0x74, 0x73, 0x22, 0x2a, 0x0a, 0x10, 0x43, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, + 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x63, + 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2a, 0x85, 0x01, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, + 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x41, 0x52, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, 0x0f, 0x0a, + 0x0b, 0x43, 0x4f, 0x4e, 0x46, 0x49, 0x47, 0x55, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0b, + 0x0a, 0x07, 0x48, 0x45, 0x41, 0x4c, 0x54, 0x48, 0x59, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x44, + 0x45, 0x47, 0x52, 0x41, 0x44, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, + 0x47, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x06, + 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x07, 0x12, + 0x0c, 0x0a, 0x08, 0x52, 0x4f, 0x4c, 0x4c, 0x42, 0x41, 0x43, 0x4b, 0x10, 0x08, 0x2a, 0xbf, 0x01, + 0x0a, 0x18, 0x43, 0x6f, 0x6c, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6d, 0x70, 0x6f, + 0x6e, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x4e, 0x6f, 0x6e, 0x65, 0x10, 0x00, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x53, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x01, 0x12, 0x0c, + 0x0a, 0x08, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x4f, 0x4b, 0x10, 0x02, 0x12, 0x1a, 0x0a, 0x16, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x61, 0x62, 0x6c, + 0x65, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x10, 0x03, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x50, 0x65, 0x72, 0x6d, 0x61, 0x6e, 0x65, 0x6e, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, + 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x46, 0x61, 0x74, 0x61, + 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x53, 0x74, 0x6f, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x10, 0x06, 0x12, 0x11, 0x0a, 0x0d, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x53, 0x74, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x10, 0x07, 0x2a, + 0x21, 0x0a, 0x08, 0x55, 0x6e, 0x69, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x09, 0x0a, 0x05, 0x49, + 0x4e, 0x50, 0x55, 0x54, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, + 0x10, 0x01, 0x2a, 0x28, 0x0a, 0x0c, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, 0x00, 0x12, + 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x55, 0x52, 0x45, 0x10, 0x01, 0x2a, 0x7f, 0x0a, 0x0b, + 0x50, 0x70, 0x72, 0x6f, 0x66, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0a, 0x0a, 0x06, 0x41, + 0x4c, 0x4c, 0x4f, 0x43, 0x53, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x42, 0x4c, 0x4f, 0x43, 0x4b, + 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4d, 0x44, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x02, 0x12, + 0x0d, 0x0a, 0x09, 0x47, 0x4f, 0x52, 0x4f, 0x55, 0x54, 0x49, 0x4e, 0x45, 0x10, 0x03, 0x12, 0x08, + 0x0a, 0x04, 0x48, 0x45, 0x41, 0x50, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x4d, 0x55, 0x54, 0x45, + 0x58, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x50, 0x52, 0x4f, 0x46, 0x49, 0x4c, 0x45, 0x10, 0x06, + 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x48, 0x52, 0x45, 0x41, 0x44, 0x43, 0x52, 0x45, 0x41, 0x54, 0x45, + 0x10, 0x07, 0x12, 0x09, 0x0a, 0x05, 0x54, 0x52, 0x41, 0x43, 0x45, 0x10, 0x08, 0x2a, 0x30, 0x0a, + 0x1b, 0x41, 0x64, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x44, 0x69, 0x61, 0x67, 0x6e, + 0x6f, 0x73, 0x74, 0x69, 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x07, 0x0a, 0x03, + 0x43, 0x50, 0x55, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x43, 0x4f, 0x4e, 0x4e, 0x10, 0x01, 0x32, + 0xdf, 0x04, 0x0a, 0x13, 0x45, 0x6c, 0x61, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x12, 0x31, 0x0a, 0x07, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, 0x74, + 0x79, 0x1a, 0x17, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2d, 0x0a, 0x05, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, + 0x74, 0x79, 0x1a, 0x15, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, 0x0a, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x57, 0x61, 0x74, 0x63, 0x68, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x15, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, + 0x31, 0x0a, 0x07, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x0d, 0x2e, 0x63, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x17, 0x2e, 0x63, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x52, 0x65, 0x73, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x3a, 0x0a, 0x07, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x12, 0x16, 0x2e, + 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x55, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x52, + 0x0a, 0x0f, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, + 0x74, 0x12, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, + 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x1f, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, + 0x6f, 0x73, 0x74, 0x69, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x53, 0x0a, 0x0f, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, + 0x55, 0x6e, 0x69, 0x74, 0x73, 0x12, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, + 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, + 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x55, 0x6e, 0x69, 0x74, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x62, 0x0a, 0x14, 0x44, 0x69, 0x61, 0x67, 0x6e, + 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x12, + 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, 0x61, 0x67, 0x6e, 0x6f, 0x73, + 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x69, + 0x61, 0x67, 0x6e, 0x6f, 0x73, 0x74, 0x69, 0x63, 0x43, 0x6f, 0x6d, 0x70, 0x6f, 0x6e, 0x65, 0x6e, + 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x34, 0x0a, 0x09, 0x43, + 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x12, 0x18, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x0d, 0x2e, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6d, 0x70, 0x74, + 0x79, 0x42, 0x29, 0x5a, 0x24, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x70, 0x6b, + 0x67, 0x2f, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, + 0x76, 0x32, 0x2f, 0x63, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0xf8, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( From 191a9a5fa9ef88deae675ce9b0987a3bd73ad106 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 30 Jun 2025 15:34:24 +0200 Subject: [PATCH 02/38] introduce rollback parameter to upgrade --- .../handlers/handler_action_upgrade_test.go | 10 +----- .../application/coordinator/coordinator.go | 6 ++-- .../coordinator/coordinator_test.go | 8 ++--- .../coordinator/coordinator_unit_test.go | 2 +- .../pkg/agent/application/upgrade/upgrade.go | 2 +- internal/pkg/agent/cmd/upgrade.go | 9 ++++- pkg/control/v2/client/client.go | 5 +-- pkg/control/v2/server/server.go | 2 +- .../pkg/control/v2/client/client_mock.go | 33 ++++++++++--------- 9 files changed, 39 insertions(+), 38 deletions(-) diff --git a/internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go b/internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go index 10ac83f536e..ae18af1a576 100644 --- a/internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go +++ b/internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go @@ -48,15 +48,7 @@ func (u *mockUpgradeManager) Reload(rawConfig *config.Config) error { return nil } -func (u *mockUpgradeManager) Upgrade( - ctx context.Context, - version string, - sourceURI string, - action *fleetapi.ActionUpgrade, - details *details.Details, - skipVerifyOverride bool, - skipDefaultPgp bool, - pgpBytes ...string) (reexec.ShutdownCallbackFn, error) { +func (u *mockUpgradeManager) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (reexec.ShutdownCallbackFn, error) { return u.UpgradeFn( ctx, diff --git a/internal/pkg/agent/application/coordinator/coordinator.go b/internal/pkg/agent/application/coordinator/coordinator.go index a21d8e7cb9f..3340435958c 100644 --- a/internal/pkg/agent/application/coordinator/coordinator.go +++ b/internal/pkg/agent/application/coordinator/coordinator.go @@ -85,7 +85,7 @@ type UpgradeManager interface { Reload(rawConfig *config.Config) error // Upgrade upgrades running agent. - Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) + Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) // Ack is used on startup to check if the agent has upgraded and needs to send an ack for the action Ack(ctx context.Context, acker acker.Acker) error @@ -695,7 +695,7 @@ func (c *Coordinator) Migrate(ctx context.Context, action *fleetapi.ActionMigrat // Upgrade runs the upgrade process. // Called from external goroutines. -func (c *Coordinator) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) error { +func (c *Coordinator) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) error { // early check outside of upgrader before overriding the state if !c.upgradeMgr.Upgradeable() { return ErrNotUpgradable @@ -735,7 +735,7 @@ func (c *Coordinator) Upgrade(ctx context.Context, version string, sourceURI str det := details.NewDetails(version, details.StateRequested, actionID) det.RegisterObserver(c.SetUpgradeDetails) - cb, err := c.upgradeMgr.Upgrade(ctx, version, sourceURI, action, det, skipVerifyOverride, skipDefaultPgp, pgpBytes...) + cb, err := c.upgradeMgr.Upgrade(ctx, version, rollback, sourceURI, action, det, skipVerifyOverride, skipDefaultPgp, pgpBytes...) if err != nil { c.ClearOverrideState() if errors.Is(err, upgrade.ErrUpgradeSameVersion) { diff --git a/internal/pkg/agent/application/coordinator/coordinator_test.go b/internal/pkg/agent/application/coordinator/coordinator_test.go index b2ba3bcb53c..34f810f42e0 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_test.go @@ -535,7 +535,7 @@ func TestUpgradeSameErrorAcked(t *testing.T) { acker.On("Ack", mock.Anything, actionUpgrade).Return(nil) - require.NoError(t, coord.Upgrade(t.Context(), "9.0", "http://localhost", actionUpgrade, true, true)) + require.NoError(t, coord.Upgrade(t.Context(), "9.0", false, "http://localhost", actionUpgrade, true, true)) acker.AssertCalled(t, "Ack", mock.Anything, actionUpgrade) } @@ -917,7 +917,7 @@ func TestCoordinator_Upgrade(t *testing.T) { require.NoError(t, err) cfgMgr.Config(ctx, cfg) - err = coord.Upgrade(ctx, "9.0.0", "", nil, true, false) + err = coord.Upgrade(ctx, "9.0.0", false, "", nil, true, false) require.ErrorIs(t, err, ErrNotUpgradable) cancel() @@ -954,7 +954,7 @@ func TestCoordinator_UpgradeDetails(t *testing.T) { require.NoError(t, err) cfgMgr.Config(ctx, cfg) - err = coord.Upgrade(ctx, "9.0.0", "", nil, true, false) + err = coord.Upgrade(ctx, "9.0.0", false, "", nil, true, false) require.ErrorIs(t, expectedErr, err) cancel() @@ -1159,7 +1159,7 @@ func (f *fakeUpgradeManager) Reload(cfg *config.Config) error { return nil } -func (f *fakeUpgradeManager) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { +func (f *fakeUpgradeManager) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { f.upgradeCalled = true if f.upgradeErr != nil { return nil, f.upgradeErr diff --git a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go index 59766a3226d..17e1da95b54 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go @@ -1526,7 +1526,7 @@ func TestCoordinatorInitiatesUpgrade(t *testing.T) { } // Call upgrade and make sure the upgrade manager receives an Upgrade call - err := coord.Upgrade(ctx, "1.2.3", "", nil, false, false) + err := coord.Upgrade(ctx, "1.2.3", false, "", nil, false, false) assert.True(t, upgradeMgr.upgradeCalled, "Coordinator Upgrade should call upgrade manager Upgrade") assert.Equal(t, upgradeMgr.upgradeErr, err, "Upgrade should report upgrade manager error") diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 19d3b67cb2b..5381f64e704 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -194,7 +194,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m } // Upgrade upgrades running agent, function returns shutdown callback that must be called by reexec. -func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { +func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) currentVersion := agentVersion{ diff --git a/internal/pkg/agent/cmd/upgrade.go b/internal/pkg/agent/cmd/upgrade.go index c944133e288..62e90e57fff 100644 --- a/internal/pkg/agent/cmd/upgrade.go +++ b/internal/pkg/agent/cmd/upgrade.go @@ -33,6 +33,7 @@ const ( flagPGPBytesPath = "pgp-path" flagPGPBytesURI = "pgp-uri" flagForce = "force" + flagRollback = "rollback" ) var ( @@ -64,6 +65,7 @@ func newUpgradeCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Comman cmd.Flags().String(flagPGPBytesURI, "", "Path to a web location containing PGP to use for package verification") cmd.Flags().String(flagPGPBytesPath, "", "Path to a file containing PGP to use for package verification") cmd.Flags().BoolP(flagForce, "", false, "Advanced option to force an upgrade on a fleet managed agent") + cmd.Flags().BoolP(flagRollback, "", false, "Roll back an upgrade") err := cmd.Flags().MarkHidden(flagForce) if err != nil { fmt.Fprintf(streams.Err, "error while setting upgrade force flag attributes: %s", err.Error()) @@ -162,6 +164,11 @@ func upgradeCmdWithClient(input *upgradeInput) error { return fmt.Errorf("failed to retrieve command flag information while trying to upgrade the agent: %w", err) } + rollback, err := cmd.Flags().GetBool(flagRollback) + if err != nil { + return fmt.Errorf("failed to retrieve command flag information %q while trying to upgrade the agent: %w", flagRollback, err) + } + skipVerification, err := cmd.Flags().GetBool(flagSkipVerify) if err != nil { return fmt.Errorf("failed to retrieve %s flag information while upgrading the agent: %w", flagSkipVerify, err) @@ -215,7 +222,7 @@ func upgradeCmdWithClient(input *upgradeInput) error { } } skipDefaultPgp, _ := cmd.Flags().GetBool(flagSkipDefaultPgp) - version, err = c.Upgrade(context.Background(), version, sourceURI, skipVerification, skipDefaultPgp, pgpChecks...) + version, err = c.Upgrade(context.Background(), version, rollback, sourceURI, skipVerification, skipDefaultPgp, pgpChecks...) if err != nil { s, ok := status.FromError(err) // Sometimes the gRPC server shuts down before replying to the command which is expected diff --git a/pkg/control/v2/client/client.go b/pkg/control/v2/client/client.go index 0846266293f..1308eb020eb 100644 --- a/pkg/control/v2/client/client.go +++ b/pkg/control/v2/client/client.go @@ -205,7 +205,7 @@ type Client interface { // Restart triggers restarting the current running daemon. Restart(ctx context.Context) error // Upgrade triggers upgrade of the current running daemon. - Upgrade(ctx context.Context, version string, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) + Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) // DiagnosticAgent gathers diagnostics information for the running Elastic Agent. DiagnosticAgent(ctx context.Context, additionalDiags []AdditionalMetrics) ([]DiagnosticFileResult, error) // DiagnosticUnits gathers diagnostics information from specific units (or all if non are provided). @@ -328,13 +328,14 @@ func (c *client) Restart(ctx context.Context) error { } // Upgrade triggers upgrade of the current running daemon. -func (c *client) Upgrade(ctx context.Context, version string, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) { +func (c *client) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) { res, err := c.client.Upgrade(ctx, &cproto.UpgradeRequest{ Version: version, SourceURI: sourceURI, SkipVerify: skipVerify, PgpBytes: pgpBytes, SkipDefaultPgp: skipDefaultPgp, + Rollback: rollback, }) if err != nil { return "", err diff --git a/pkg/control/v2/server/server.go b/pkg/control/v2/server/server.go index 5ef757b064e..c81d7985c52 100644 --- a/pkg/control/v2/server/server.go +++ b/pkg/control/v2/server/server.go @@ -174,7 +174,7 @@ func (s *Server) Restart(_ context.Context, _ *cproto.Empty) (*cproto.RestartRes // Upgrade performs the upgrade operation. func (s *Server) Upgrade(ctx context.Context, request *cproto.UpgradeRequest) (*cproto.UpgradeResponse, error) { - err := s.coord.Upgrade(ctx, request.Version, request.SourceURI, nil, request.SkipVerify, request.SkipDefaultPgp, request.PgpBytes...) + err := s.coord.Upgrade(ctx, request.Version, request.Rollback, request.SourceURI, nil, request.SkipVerify, request.SkipDefaultPgp, request.PgpBytes...) if err != nil { //nolint:nilerr // ignore the error, return a failure upgrade response return &cproto.UpgradeResponse{ diff --git a/testing/mocks/pkg/control/v2/client/client_mock.go b/testing/mocks/pkg/control/v2/client/client_mock.go index c408984634b..967306af978 100644 --- a/testing/mocks/pkg/control/v2/client/client_mock.go +++ b/testing/mocks/pkg/control/v2/client/client_mock.go @@ -539,14 +539,14 @@ func (_c *Client_StateWatch_Call) RunAndReturn(run func(context.Context) (client return _c } -// Upgrade provides a mock function with given fields: ctx, version, sourceURI, skipVerify, skipDefaultPgp, pgpBytes -func (_m *Client) Upgrade(ctx context.Context, version string, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) { +// Upgrade provides a mock function with given fields: ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp, pgpBytes +func (_m *Client) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string) (string, error) { _va := make([]interface{}, len(pgpBytes)) for _i := range pgpBytes { _va[_i] = pgpBytes[_i] } var _ca []interface{} - _ca = append(_ca, ctx, version, sourceURI, skipVerify, skipDefaultPgp) + _ca = append(_ca, ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp) _ca = append(_ca, _va...) ret := _m.Called(_ca...) @@ -556,17 +556,17 @@ func (_m *Client) Upgrade(ctx context.Context, version string, sourceURI string, var r0 string var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, bool, bool, ...string) (string, error)); ok { - return rf(ctx, version, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) + if rf, ok := ret.Get(0).(func(context.Context, string, bool, string, bool, bool, ...string) (string, error)); ok { + return rf(ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) } - if rf, ok := ret.Get(0).(func(context.Context, string, string, bool, bool, ...string) string); ok { - r0 = rf(ctx, version, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) + if rf, ok := ret.Get(0).(func(context.Context, string, bool, string, bool, bool, ...string) string); ok { + r0 = rf(ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) } else { r0 = ret.Get(0).(string) } - if rf, ok := ret.Get(1).(func(context.Context, string, string, bool, bool, ...string) error); ok { - r1 = rf(ctx, version, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) + if rf, ok := ret.Get(1).(func(context.Context, string, bool, string, bool, bool, ...string) error); ok { + r1 = rf(ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp, pgpBytes...) } else { r1 = ret.Error(1) } @@ -582,24 +582,25 @@ type Client_Upgrade_Call struct { // Upgrade is a helper method to define mock.On call // - ctx context.Context // - version string +// - rollback bool // - sourceURI string // - skipVerify bool // - skipDefaultPgp bool // - pgpBytes ...string -func (_e *Client_Expecter) Upgrade(ctx interface{}, version interface{}, sourceURI interface{}, skipVerify interface{}, skipDefaultPgp interface{}, pgpBytes ...interface{}) *Client_Upgrade_Call { +func (_e *Client_Expecter) Upgrade(ctx interface{}, version interface{}, rollback interface{}, sourceURI interface{}, skipVerify interface{}, skipDefaultPgp interface{}, pgpBytes ...interface{}) *Client_Upgrade_Call { return &Client_Upgrade_Call{Call: _e.mock.On("Upgrade", - append([]interface{}{ctx, version, sourceURI, skipVerify, skipDefaultPgp}, pgpBytes...)...)} + append([]interface{}{ctx, version, rollback, sourceURI, skipVerify, skipDefaultPgp}, pgpBytes...)...)} } -func (_c *Client_Upgrade_Call) Run(run func(ctx context.Context, version string, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string)) *Client_Upgrade_Call { +func (_c *Client_Upgrade_Call) Run(run func(ctx context.Context, version string, rollback bool, sourceURI string, skipVerify bool, skipDefaultPgp bool, pgpBytes ...string)) *Client_Upgrade_Call { _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-5) - for i, a := range args[5:] { + variadicArgs := make([]string, len(args)-6) + for i, a := range args[6:] { if a != nil { variadicArgs[i] = a.(string) } } - run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(bool), args[4].(bool), variadicArgs...) + run(args[0].(context.Context), args[1].(string), args[2].(bool), args[3].(string), args[4].(bool), args[5].(bool), variadicArgs...) }) return _c } @@ -609,7 +610,7 @@ func (_c *Client_Upgrade_Call) Return(_a0 string, _a1 error) *Client_Upgrade_Cal return _c } -func (_c *Client_Upgrade_Call) RunAndReturn(run func(context.Context, string, string, bool, bool, ...string) (string, error)) *Client_Upgrade_Call { +func (_c *Client_Upgrade_Call) RunAndReturn(run func(context.Context, string, bool, string, bool, bool, ...string) (string, error)) *Client_Upgrade_Call { _c.Call.Return(run) return _c } From d427690d8d696d6e648ea46714d43f13d52d90ab Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 1 Jul 2025 11:38:05 +0200 Subject: [PATCH 03/38] manual rollback from CLI PoC --- .../handlers/handler_action_upgrade.go | 2 +- .../actions/handlers/handler_helpers.go | 2 +- .../pkg/agent/application/filelock/locker.go | 1 + .../pkg/agent/application/upgrade/upgrade.go | 86 ++++++++++++++++++- internal/pkg/agent/cmd/upgrade_test.go | 4 +- internal/pkg/agent/cmd/watch.go | 11 +++ internal/pkg/agent/cmd/watch_test.go | 86 ++++++++++++++++++- 7 files changed, 186 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/application/actions/handlers/handler_action_upgrade.go b/internal/pkg/agent/application/actions/handlers/handler_action_upgrade.go index 1520677f06b..849957c0f6d 100644 --- a/internal/pkg/agent/application/actions/handlers/handler_action_upgrade.go +++ b/internal/pkg/agent/application/actions/handlers/handler_action_upgrade.go @@ -75,7 +75,7 @@ func (h *Upgrade) Handle(ctx context.Context, a fleetapi.Action, ack acker.Acker go func() { h.log.Infof("starting upgrade to version %s in background", action.Data.Version) - if err := h.coord.Upgrade(asyncCtx, action.Data.Version, action.Data.SourceURI, action, false, false); err != nil { + if err := h.coord.Upgrade(asyncCtx, action.Data.Version, false, action.Data.SourceURI, action, false, false); err != nil { h.log.Errorf("upgrade to version %s failed: %v", action.Data.Version, err) // If context is cancelled in getAsyncContext, the actions are acked there if !errors.Is(asyncCtx.Err(), context.Canceled) { diff --git a/internal/pkg/agent/application/actions/handlers/handler_helpers.go b/internal/pkg/agent/application/actions/handlers/handler_helpers.go index 39d5e48d7af..c83834bb584 100644 --- a/internal/pkg/agent/application/actions/handlers/handler_helpers.go +++ b/internal/pkg/agent/application/actions/handlers/handler_helpers.go @@ -28,7 +28,7 @@ type actionCoordinator interface { type upgradeCoordinator interface { actionCoordinator - Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) error + Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) error } type performActionFunc func(context.Context, component.Component, component.Unit, string, map[string]interface{}) (map[string]interface{}, error) diff --git a/internal/pkg/agent/application/filelock/locker.go b/internal/pkg/agent/application/filelock/locker.go index 9de204c9731..316301ace26 100644 --- a/internal/pkg/agent/application/filelock/locker.go +++ b/internal/pkg/agent/application/filelock/locker.go @@ -39,6 +39,7 @@ func (a *AppLocker) TryLock() error { if !locked { return ErrAppAlreadyRunning } + return nil } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 5381f64e704..e9354547f65 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -19,6 +19,7 @@ import ( "github.com/otiai10/copy" "go.elastic.co/apm/v2" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec" @@ -35,6 +36,7 @@ import ( "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/utils" agtversion "github.com/elastic/elastic-agent/pkg/version" currentagtversion "github.com/elastic/elastic-agent/version" ) @@ -194,7 +196,12 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m } // Upgrade upgrades running agent, function returns shutdown callback that must be called by reexec. -func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { +func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { + + if rollback { + return u.forceRollbackToPreviousVersion(version, action, det) + } + u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) currentVersion := agentVersion{ @@ -377,6 +384,83 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } +func (u *Upgrader) forceRollbackToPreviousVersion(version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { + // Formal checks for verifying we can rollback properly: + // 1. d.Metadata.RollbacksAvailable should contain the desired version with a valid TTL (it may need to be written by main agent process before starting watcher) + // 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating) + // 3. upgrade marker exists + // these should be revalidated after taking over watcher + err := u.PersistManualRollback() + if err != nil { + return nil, err + } + + // Invoke watcher again + _, err = InvokeWatcher(u.log, paths.BinaryPath(paths.VersionedHome(paths.Top()), agentName)) + if err != nil { + return nil, fmt.Errorf("invoking watcher: %w", err) + } + + return nil, nil + +} + +func (u *Upgrader) PersistManualRollback() error { + watcherApplock, err := u.takeOverWatcher() + if err != nil { + return fmt.Errorf("taking over watcher processes: %w", err) + } + defer func(watcherApplock *filelock.AppLocker) { + releaseWatcherAppLockerErr := watcherApplock.Unlock() + if releaseWatcherAppLockerErr != nil { + u.log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr) + } + }(watcherApplock) + + // read the upgrade marker + updateMarker, err := LoadMarker(paths.Data()) + if err != nil { + return fmt.Errorf("loading marker: %w", err) + } + updateMarker.DesiredOutcome = OUTCOME_ROLLBACK + err = SaveMarker(paths.Data(), updateMarker, true) + if err != nil { + return fmt.Errorf("saving marker: %w", err) + } + + return nil +} + +func (u *Upgrader) takeOverWatcher() (*filelock.AppLocker, error) { + pids, err := utils.GetWatcherPIDs() + if err != nil { + return nil, fmt.Errorf("listing watcher processes: %w", err) + } + + // this should be run continuously and concurrently attempting to get the app locker + for _, pid := range pids { + u.log.Debugf("attempting to kill watcher process with PID: %d", pid) + process, findProcErr := os.FindProcess(pid) + if findProcErr != nil { + return nil, fmt.Errorf("finding process with PID: %d: %w", pid, findProcErr) + } + killProcErr := process.Kill() + if killProcErr != nil { + return nil, fmt.Errorf("killing process with PID: %d: %w", pid, killProcErr) + } + u.log.Debugf("killed watcher process with PID: %d", pid) + } + + // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited + locker := filelock.NewAppLocker(paths.Top(), "watcher.lock") + err = locker.TryLock() + if err != nil { + return nil, fmt.Errorf("locking watcher applocker: %w", err) + } + + return locker, nil +} + func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { // check if the upgraded version is less than the previous (currently installed) version if current.parsedVersion.Less(*previous.parsedVersion) { diff --git a/internal/pkg/agent/cmd/upgrade_test.go b/internal/pkg/agent/cmd/upgrade_test.go index f9b98e5ec89..54f0510c25a 100644 --- a/internal/pkg/agent/cmd/upgrade_test.go +++ b/internal/pkg/agent/cmd/upgrade_test.go @@ -143,7 +143,7 @@ func TestUpgradeCmd(t *testing.T) { t.Run("proceed with upgrade if fleet managed, privileged, --force is set", func(t *testing.T) { mockClient := clientmocks.NewClient(t) mockClient.EXPECT().State(mock.Anything).Return(&client.AgentState{State: cproto.State_HEALTHY}, nil) - mockClient.EXPECT().Upgrade(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return("mockVersion", nil) + mockClient.EXPECT().Upgrade(mock.Anything, mock.Anything, false, mock.Anything, mock.Anything, mock.Anything).Return("mockVersion", nil) args := []string{"8.13.0"} // Version argument streams := cli.NewIOStreams() @@ -231,7 +231,7 @@ func TestUpgradeCmd(t *testing.T) { t.Run("proceed with upgrade if agent is standalone, user is privileged and skip-verify flag is set", func(t *testing.T) { mockClient := clientmocks.NewClient(t) mockClient.EXPECT().State(mock.Anything).Return(&client.AgentState{State: cproto.State_HEALTHY}, nil) - mockClient.EXPECT().Upgrade(mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return("mockVersion", nil) + mockClient.EXPECT().Upgrade(mock.Anything, mock.Anything, false, mock.Anything, mock.Anything, mock.Anything).Return("mockVersion", nil) args := []string{"8.13.0"} // Version argument streams := cli.NewIOStreams() diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index f203c1814f7..78632541a7c 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -102,6 +102,17 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher _ = locker.Unlock() }() + if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { + // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work + log.Info("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) + + err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) + if err != nil { + return fmt.Errorf("rolling back: %w", err) + } + return nil + } + isWithinGrace, tilGrace := gracePeriod(marker, cfg.GracePeriod) if isTerminalState(marker) || !isWithinGrace { stateString := "" diff --git a/internal/pkg/agent/cmd/watch_test.go b/internal/pkg/agent/cmd/watch_test.go index 9451c476543..afac6794129 100644 --- a/internal/pkg/agent/cmd/watch_test.go +++ b/internal/pkg/agent/cmd/watch_test.go @@ -268,6 +268,87 @@ func Test_watchCmd(t *testing.T) { }, wantErr: assert.NoError, }, + { + name: "Desired outcome is rollback, rollback immediately", + setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + dataDirPath := paths.DataFrom(tmpDir) + err := os.MkdirAll(dataDirPath, 0755) + require.NoError(t, err) + // upgrade started yesterday ;) + updatedOn := time.Now().Add(-1 * 24 * time.Hour) + err = upgrade.SaveMarker( + dataDirPath, + &upgrade.UpdateMarker{ + Version: "4.5.6", + Hash: "newver", + VersionedHome: "elastic-agent-4.5.6-newver", + UpdatedOn: updatedOn, + PrevVersion: "1.2.3", + PrevHash: "prvver", + PrevVersionedHome: "elastic-agent-prvver", + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "4.5.6", + State: details.StateWatching, + ActionID: "", + Metadata: details.Metadata{}, + }, + DesiredOutcome: upgrade.OUTCOME_ROLLBACK, + }, + true, + ) + require.NoError(t, err) + + installModifier.EXPECT(). + Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver"). + Return(nil) + }, + args: args{ + cfg: configuration.DefaultUpgradeConfig().Watcher, + }, + wantErr: assert.NoError, + }, + { + name: "Desired outcome is rollback no upgrade details, rollback immediately", + setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + dataDirPath := paths.DataFrom(tmpDir) + err := os.MkdirAll(dataDirPath, 0755) + require.NoError(t, err) + // upgrade started yesterday ;) + updatedOn := time.Now().Add(-1 * 24 * time.Hour) + err = upgrade.SaveMarker( + dataDirPath, + &upgrade.UpdateMarker{ + Version: "4.5.6", + Hash: "newver", + VersionedHome: "elastic-agent-4.5.6-newver", + UpdatedOn: updatedOn, + PrevVersion: "1.2.3", + PrevHash: "prvver", + PrevVersionedHome: "elastic-agent-prvver", + Acked: false, + Action: &fleetapi.ActionUpgrade{ + ActionID: "action-id", + ActionType: fleetapi.ActionTypeUpgrade, + Data: fleetapi.ActionUpgradeData{Version: "4.5.6"}, + }, + Details: nil, + DesiredOutcome: upgrade.OUTCOME_ROLLBACK, + }, + true, + ) + require.NoError(t, err) + + installModifier.EXPECT(). + Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver"). + Return(nil) + }, + args: args{ + cfg: configuration.DefaultUpgradeConfig().Watcher, + }, + wantErr: assert.NoError, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -277,7 +358,10 @@ func Test_watchCmd(t *testing.T) { mockInstallModifier := cmdmocks.NewInstallationModifier(t) tt.setupUpgradeMarker(t, tmpDir, mockWatcher, mockInstallModifier) tt.wantErr(t, watchCmd(log, tmpDir, tt.args.cfg, mockWatcher, mockInstallModifier), fmt.Sprintf("watchCmd(%v, ...)", tt.args.cfg)) - t.Logf("watchCmd logs:\n%v", obs.All()) + t.Log("watchCmd logs:\n") + for _, osbLog := range obs.All() { + t.Logf("\t%s - %s - %v\n", osbLog.Level, osbLog.Message, osbLog.Context) + } }) } } From 623b65bbe21ea0d3eeabd0e22717428f6e6b930e Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 1 Jul 2025 17:26:44 +0200 Subject: [PATCH 04/38] Concurrently retry taking over watcher --- .../application/upgrade/details/state.go | 3 +- .../pkg/agent/application/upgrade/upgrade.go | 83 ++++++++++++------- internal/pkg/agent/cmd/upgrade.go | 2 +- internal/pkg/agent/cmd/watch.go | 35 +++++--- 4 files changed, 81 insertions(+), 42 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/details/state.go b/internal/pkg/agent/application/upgrade/details/state.go index 41a04698cdb..bcb46b1569c 100644 --- a/internal/pkg/agent/application/upgrade/details/state.go +++ b/internal/pkg/agent/application/upgrade/details/state.go @@ -21,5 +21,6 @@ const ( StateFailed State = "UPG_FAILED" // List of well-known reasons for state transitions - ReasonWatchFailed = "watch failed" + ReasonWatchFailed = "watch failed" + ReasonManualRollback = "manual rollback requested" ) diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index e9354547f65..e9e27177769 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -199,7 +199,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { if rollback { - return u.forceRollbackToPreviousVersion(version, action, det) + return u.forceRollbackToPreviousVersion(ctx, version, action, det) } u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) @@ -384,13 +384,13 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } -func (u *Upgrader) forceRollbackToPreviousVersion(version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { +func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { // Formal checks for verifying we can rollback properly: // 1. d.Metadata.RollbacksAvailable should contain the desired version with a valid TTL (it may need to be written by main agent process before starting watcher) // 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating) // 3. upgrade marker exists // these should be revalidated after taking over watcher - err := u.PersistManualRollback() + err := u.PersistManualRollback(ctx) if err != nil { return nil, err } @@ -405,8 +405,8 @@ func (u *Upgrader) forceRollbackToPreviousVersion(version string, action *fleeta } -func (u *Upgrader) PersistManualRollback() error { - watcherApplock, err := u.takeOverWatcher() +func (u *Upgrader) PersistManualRollback(ctx context.Context) error { + watcherApplock, err := u.takeOverWatcher(ctx) if err != nil { return fmt.Errorf("taking over watcher processes: %w", err) } @@ -431,34 +431,59 @@ func (u *Upgrader) PersistManualRollback() error { return nil } -func (u *Upgrader) takeOverWatcher() (*filelock.AppLocker, error) { - pids, err := utils.GetWatcherPIDs() - if err != nil { - return nil, fmt.Errorf("listing watcher processes: %w", err) - } - - // this should be run continuously and concurrently attempting to get the app locker - for _, pid := range pids { - u.log.Debugf("attempting to kill watcher process with PID: %d", pid) - process, findProcErr := os.FindProcess(pid) - if findProcErr != nil { - return nil, fmt.Errorf("finding process with PID: %d: %w", pid, findProcErr) - } - killProcErr := process.Kill() - if killProcErr != nil { - return nil, fmt.Errorf("killing process with PID: %d: %w", pid, killProcErr) +func (u *Upgrader) takeOverWatcher(ctx context.Context) (*filelock.AppLocker, error) { + + takeoverCtx, takeoverCancel := context.WithTimeout(ctx, 30*time.Second) + defer takeoverCancel() + go func() { + killingTicker := time.NewTicker(500 * time.Millisecond) + defer killingTicker.Stop() + for { + select { + case <-takeoverCtx.Done(): + return + case <-killingTicker.C: + pids, err := utils.GetWatcherPIDs() + if err != nil { + u.log.Errorf("error listing watcher processes: %s", err) + continue + } + + // this should be run continuously and concurrently to attempting to get the app locker + for _, pid := range pids { + u.log.Debugf("attempting to kill watcher process with PID: %d", pid) + process, findProcErr := os.FindProcess(pid) + if findProcErr != nil { + u.log.Errorf("error finding process with PID: %d: %s", pid, findProcErr) + continue + } + killProcErr := process.Kill() + if killProcErr != nil { + u.log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) + } + u.log.Debugf("killed watcher process with PID: %d", pid) + } + } } - u.log.Debugf("killed watcher process with PID: %d", pid) - } + }() // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited - locker := filelock.NewAppLocker(paths.Top(), "watcher.lock") - err = locker.TryLock() - if err != nil { - return nil, fmt.Errorf("locking watcher applocker: %w", err) + takeOverTicker := time.NewTicker(100 * time.Millisecond) + defer takeOverTicker.Stop() + for { + select { + case <-takeoverCtx.Done(): + return nil, fmt.Errorf("timed out taking over watcher applocker") + case <-takeOverTicker.C: + locker := filelock.NewAppLocker(paths.Top(), "watcher.lock") + err := locker.TryLock() + if err != nil { + u.log.Errorf("error locking watcher applocker: %s", err) + continue + } + return locker, nil + } } - - return locker, nil } func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { diff --git a/internal/pkg/agent/cmd/upgrade.go b/internal/pkg/agent/cmd/upgrade.go index 62e90e57fff..400293f1616 100644 --- a/internal/pkg/agent/cmd/upgrade.go +++ b/internal/pkg/agent/cmd/upgrade.go @@ -188,7 +188,7 @@ func upgradeCmdWithClient(input *upgradeInput) error { if err != nil { return fmt.Errorf("failed to check if upgrade is already in progress: %w", err) } - if isBeingUpgraded { + if isBeingUpgraded && !rollback { return errors.New("an upgrade is already in progress; please try again later.") } diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 78632541a7c..4d05aacef2f 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -102,17 +102,6 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher _ = locker.Unlock() }() - if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { - // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work - log.Info("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) - - err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) - if err != nil { - return fmt.Errorf("rolling back: %w", err) - } - return nil - } - isWithinGrace, tilGrace := gracePeriod(marker, cfg.GracePeriod) if isTerminalState(marker) || !isWithinGrace { stateString := "" @@ -131,6 +120,30 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher return nil } + if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { + // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work + log.Info("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) + err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) + if err != nil { + return fmt.Errorf("rolling back: %w", err) + } + + if marker.Details == nil { + actionID := "" + if marker.Action != nil { + actionID = marker.Action.ActionID + } + marker.Details = details.NewDetails(marker.Version, details.StateRollback, actionID) + } + marker.Details.SetStateWithReason(details.StateRollback, details.ReasonManualRollback) + err := upgrade.SaveMarker(dataDir, marker, true) + if err != nil { + return fmt.Errorf("saving marker after rolling back: %w", err) + } + + return nil + } + // About to start watching the upgrade. Initialize upgrade details and save them in the // upgrade marker. saveMarkerFunc := func(marker *upgrade.UpdateMarker, b bool) error { From fe5f321d2bbf40bdbeb02d890086f2e979b3d695 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 7 Jul 2025 08:41:03 +0200 Subject: [PATCH 05/38] Gracefully shutdown agent watcher --- .../agent/application/upgrade/rollback_linux.go | 2 +- internal/pkg/agent/application/upgrade/upgrade.go | 7 ++++--- internal/pkg/agent/cmd/watch.go | 15 +++++++++++++-- pkg/core/process/process.go | 5 +++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback_linux.go b/internal/pkg/agent/application/upgrade/rollback_linux.go index bdaf918a2b6..b3388e2cb54 100644 --- a/internal/pkg/agent/application/upgrade/rollback_linux.go +++ b/internal/pkg/agent/application/upgrade/rollback_linux.go @@ -38,7 +38,7 @@ func invokeCmd(agentExecutable string) *exec.Cmd { Credential: cred, Setsid: true, // propagate sigint instead of sigkill so we can ignore it - Pdeathsig: syscall.SIGINT, + Pdeathsig: syscall.Signal(0x0), } cmd.SysProcAttr = sysproc return cmd diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index e9e27177769..f82007409ba 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -36,6 +36,7 @@ import ( "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/core/process" "github.com/elastic/elastic-agent/pkg/utils" agtversion "github.com/elastic/elastic-agent/pkg/version" currentagtversion "github.com/elastic/elastic-agent/version" @@ -449,15 +450,15 @@ func (u *Upgrader) takeOverWatcher(ctx context.Context) (*filelock.AppLocker, er continue } - // this should be run continuously and concurrently to attempting to get the app locker + // this should be run continuously and concurrently attempting to get the app locker for _, pid := range pids { u.log.Debugf("attempting to kill watcher process with PID: %d", pid) - process, findProcErr := os.FindProcess(pid) + watcherProcess, findProcErr := os.FindProcess(pid) if findProcErr != nil { u.log.Errorf("error finding process with PID: %d: %s", pid, findProcErr) continue } - killProcErr := process.Kill() + killProcErr := process.Terminate(watcherProcess) if killProcErr != nil { u.log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) } diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 4d05aacef2f..6c35be5f515 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -37,6 +37,8 @@ const ( watcherLockFile = "watcher.lock" ) +var ErrWatchCancelled = errors.New("watch cancelled") + func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command { cmd := &cobra.Command{ Use: "watch", @@ -154,6 +156,11 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher errorCheckInterval := cfg.ErrorCheck.Interval ctx := context.Background() if err := watcher.Watch(ctx, tilGrace, errorCheckInterval, log); err != nil { + if errors.Is(err, ErrWatchCancelled) { + // the watch has been cancelled prematurely, don't clean or rollback just yet + return nil + } + log.Error("Error detected, proceeding to rollback: %v", err) upgradeDetails.SetStateWithReason(details.StateRollback, details.ReasonWatchFailed) @@ -227,8 +234,12 @@ func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time. WATCHLOOP: for { select { - case <-signals: - // ignore + case s := <-signals: + log.Infof("received signal: (%d): %v during watch", s, s) + if s == syscall.SIGINT || s == syscall.SIGTERM { + log.Infof("received signal: (%d): %v. Exiting watch", s, s) + return ErrWatchCancelled + } continue case <-ctx.Done(): break WATCHLOOP diff --git a/pkg/core/process/process.go b/pkg/core/process/process.go index 97755a63480..8f562dd1985 100644 --- a/pkg/core/process/process.go +++ b/pkg/core/process/process.go @@ -172,3 +172,8 @@ func startContext(ctx context.Context, path string, uid, gid int, args []string, Stderr: stderr, }, err } + +// Terminate is a utility function to gracefully shutdown a process +func Terminate(proc *os.Process) error { + return terminateCmd(proc) +} From 27247d6500eebcefdfabbe3eb47b0cd07a73bfd9 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 9 Jul 2025 09:56:07 +0200 Subject: [PATCH 06/38] move desired outcome check before grace period evaluation --- internal/pkg/agent/cmd/watch.go | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 6c35be5f515..e55104ee2b0 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -104,24 +104,6 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher _ = locker.Unlock() }() - isWithinGrace, tilGrace := gracePeriod(marker, cfg.GracePeriod) - if isTerminalState(marker) || !isWithinGrace { - stateString := "" - if marker.Details != nil { - stateString = string(marker.Details.State) - } - log.Infof("not within grace [updatedOn %v] %v or agent have been rolled back [state: %s]", marker.UpdatedOn.String(), time.Since(marker.UpdatedOn).String(), stateString) - // if it is started outside of upgrade loop - // if we're not within grace and marker is still there it might mean - // that cleanup was not performed ok, cleanup everything except current version - // hash is the same as hash of agent which initiated watcher. - if err := installModifier.Cleanup(log, paths.Top(), paths.VersionedHome(topDir), release.ShortCommit(), true, false); err != nil { - log.Error("clean up of prior watcher run failed", err) - } - // exit nicely - return nil - } - if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work log.Info("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) @@ -146,6 +128,24 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher return nil } + isWithinGrace, tilGrace := gracePeriod(marker, cfg.GracePeriod) + if isTerminalState(marker) || !isWithinGrace { + stateString := "" + if marker.Details != nil { + stateString = string(marker.Details.State) + } + log.Infof("not within grace [updatedOn %v] %v or agent have been rolled back [state: %s]", marker.UpdatedOn.String(), time.Since(marker.UpdatedOn).String(), stateString) + // if it is started outside of upgrade loop + // if we're not within grace and marker is still there it might mean + // that cleanup was not performed ok, cleanup everything except current version + // hash is the same as hash of agent which initiated watcher. + if err := installModifier.Cleanup(log, paths.Top(), paths.VersionedHome(topDir), release.ShortCommit(), true, false); err != nil { + log.Error("clean up of prior watcher run failed", err) + } + // exit nicely + return nil + } + // About to start watching the upgrade. Initialize upgrade details and save them in the // upgrade marker. saveMarkerFunc := func(marker *upgrade.UpdateMarker, b bool) error { From bcb8c93fd047f12da7ec80e068316a2e92ae7f6d Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 11 Jul 2025 07:46:55 +0200 Subject: [PATCH 07/38] Add rollbacks available to upgrade marker --- internal/pkg/agent/application/application.go | 2 +- .../coordinator/coordinator_unit_test.go | 6 +-- .../application/upgrade/details/details.go | 13 +++++++ .../application/upgrade/rollback_test.go | 6 +-- .../application/upgrade/step_download_test.go | 8 ++-- .../agent/application/upgrade/step_mark.go | 13 ++++++- .../pkg/agent/application/upgrade/upgrade.go | 39 ++++++++++--------- internal/pkg/agent/cmd/watch.go | 4 ++ 8 files changed, 56 insertions(+), 35 deletions(-) diff --git a/internal/pkg/agent/application/application.go b/internal/pkg/agent/application/application.go index dcaa2ddc570..e48c8739340 100644 --- a/internal/pkg/agent/application/application.go +++ b/internal/pkg/agent/application/application.go @@ -120,7 +120,7 @@ func New( // monitoring is not supported in bootstrap mode https://github.com/elastic/elastic-agent/issues/1761 isMonitoringSupported := !disableMonitoring && cfg.Settings.V1MonitoringEnabled - upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, agentInfo) + upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, cfg.Settings.Upgrade, agentInfo) if err != nil { return nil, nil, nil, fmt.Errorf("failed to create upgrader: %w", err) } diff --git a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go index 17e1da95b54..5dc366bc5f9 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go @@ -462,11 +462,7 @@ func TestCoordinatorReportsInvalidPolicy(t *testing.T) { } }() - upgradeMgr, err := upgrade.NewUpgrader( - log, - &artifact.Config{}, - &info.AgentInfo{}, - ) + upgradeMgr, err := upgrade.NewUpgrader(log, &artifact.Config{}, nil, &info.AgentInfo{}) require.NoError(t, err, "errored when creating a new upgrader") // Channels have buffer length 1, so we don't have to run on multiple diff --git a/internal/pkg/agent/application/upgrade/details/details.go b/internal/pkg/agent/application/upgrade/details/details.go index cd83c61855d..8e246c26f3c 100644 --- a/internal/pkg/agent/application/upgrade/details/details.go +++ b/internal/pkg/agent/application/upgrade/details/details.go @@ -59,6 +59,19 @@ type Metadata struct { // Reason is a string that may give out more information about transitioning to the current state. It has been // introduced initially to distinguish between manual and automatic rollbacks Reason string `json:"reason,omitempty" yaml:"reason,omitempty"` + + RollbacksAvailable []RollbackAvailable `json:"rollbacks_available,omitempty" yaml:"rollbacks_available,omitempty"` +} + +//rollback_available: +//- version: 8.16.0-SNAPSHOT, +//home: data/elastic-agent-8.16.0-SNAPSHOT-b65953 +//valid_until: "2024-11-21T14:42:21Z" + +type RollbackAvailable struct { + Version string `json:"version" yaml:"version"` + Home string `json:"home" yaml:"home"` + ValidUntil time.Time `json:"valid_until" yaml:"valid_until"` } func NewDetails(targetVersion string, initialState State, actionID string) *Details { diff --git a/internal/pkg/agent/application/upgrade/rollback_test.go b/internal/pkg/agent/application/upgrade/rollback_test.go index 3f9cc0a33ab..26c5813c542 100644 --- a/internal/pkg/agent/application/upgrade/rollback_test.go +++ b/internal/pkg/agent/application/upgrade/rollback_test.go @@ -504,10 +504,6 @@ func createUpdateMarker(t *testing.T, log *logger.Logger, topDir, newAgentVersio hash: oldAgentHash, versionedHome: oldAgentVersionedHome, } - err := markUpgrade(log, - paths.DataFrom(topDir), - newAgentInstall, - oldAgentInstall, - nil, nil, OUTCOME_UPGRADE) + err := markUpgrade(log, paths.DataFrom(topDir), newAgentInstall, oldAgentInstall, nil, nil, OUTCOME_UPGRADE, 0) require.NoError(t, err, "error writing fake update marker") } diff --git a/internal/pkg/agent/application/upgrade/step_download_test.go b/internal/pkg/agent/application/upgrade/step_download_test.go index f1e20427c25..66ee4712074 100644 --- a/internal/pkg/agent/application/upgrade/step_download_test.go +++ b/internal/pkg/agent/application/upgrade/step_download_test.go @@ -91,7 +91,7 @@ func TestDownloadWithRetries(t *testing.T) { return &mockDownloader{expectedDownloadPath, nil}, nil } - u, err := NewUpgrader(testLogger, &settings, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -141,7 +141,7 @@ func TestDownloadWithRetries(t *testing.T) { return nil, nil } - u, err := NewUpgrader(testLogger, &settings, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -196,7 +196,7 @@ func TestDownloadWithRetries(t *testing.T) { return nil, nil } - u, err := NewUpgrader(testLogger, &settings, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -241,7 +241,7 @@ func TestDownloadWithRetries(t *testing.T) { return &mockDownloader{"", errors.New("download failed")}, nil } - u, err := NewUpgrader(testLogger, &settings, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index 65b4e878a40..eb0e0a1f77f 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -197,7 +197,7 @@ type agentInstall struct { } // markUpgrade marks update happened so we can handle grace period -func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, desiredOutcome UpgradeOutcome) error { +func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, desiredOutcome UpgradeOutcome, rollbackWindow time.Duration) error { if len(previousAgent.hash) > hashLen { previousAgent.hash = previousAgent.hash[:hashLen] @@ -216,13 +216,22 @@ func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent ag DesiredOutcome: desiredOutcome, } + if rollbackWindow > 0 { + // if we have a not empty rollback window, write the prev version in the rollbacks_available field + upgradeDetails.Metadata.RollbacksAvailable = []details.RollbackAvailable{details.RollbackAvailable{ + Version: previousAgent.version, + Home: previousAgent.versionedHome, + ValidUntil: time.Now().Add(rollbackWindow), + }} + } + markerBytes, err := yaml.Marshal(newMarkerSerializer(marker)) if err != nil { return errors.New(err, errors.TypeConfig, "failed to parse marker file") } markerPath := markerFilePath(dataDirPath) - log.Infow("Writing upgrade marker file", "file.path", markerPath, "hash", marker.Hash, "prev_hash", marker.PrevHash) + log.Infow("Writing upgrade marker file", "file.path", markerPath, "hash", marker.Hash, "prev_hash", marker.PrevHash, "content", string(markerBytes)) if err := os.WriteFile(markerPath, markerBytes, 0600); err != nil { return errors.New(err, errors.TypeFilesystem, "failed to create update marker file", errors.M(errors.MetaKeyPath, markerPath)) } diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index f82007409ba..b84f2fa4b93 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -73,12 +73,13 @@ func init() { // Upgrader performs an upgrade type Upgrader struct { - log *logger.Logger - settings *artifact.Config - agentInfo info.Agent - upgradeable bool - fleetServerURI string - markerWatcher MarkerWatcher + log *logger.Logger + settings *artifact.Config + upgradeSettings *configuration.UpgradeConfig + agentInfo info.Agent + upgradeable bool + fleetServerURI string + markerWatcher MarkerWatcher } // IsUpgradeable when agent is installed and running as a service or flag was provided. @@ -89,13 +90,14 @@ func IsUpgradeable() bool { } // NewUpgrader creates an upgrader which is capable of performing upgrade operation -func NewUpgrader(log *logger.Logger, settings *artifact.Config, agentInfo info.Agent) (*Upgrader, error) { +func NewUpgrader(log *logger.Logger, settings *artifact.Config, upgradeConfig *configuration.UpgradeConfig, agentInfo info.Agent) (*Upgrader, error) { return &Upgrader{ - log: log, - settings: settings, - agentInfo: agentInfo, - upgradeable: IsUpgradeable(), - markerWatcher: newMarkerFileWatcher(markerFilePath(paths.Data()), log), + log: log, + settings: settings, + upgradeSettings: upgradeConfig, + agentInfo: agentInfo, + upgradeable: IsUpgradeable(), + markerWatcher: newMarkerFileWatcher(markerFilePath(paths.Data()), log), }, nil } @@ -147,6 +149,8 @@ func (u *Upgrader) Reload(rawConfig *config.Config) error { } u.settings = cfg.Settings.DownloadConfig + u.upgradeSettings = cfg.Settings.Upgrade + return nil } @@ -346,12 +350,11 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s hash: release.Commit(), versionedHome: currentVersionedHome, } - - if err := markUpgrade(u.log, - paths.Data(), // data dir to place the marker in - current, // new agent version data - previous, // old agent version data - action, det, OUTCOME_UPGRADE); err != nil { + rollbackWindow := time.Duration(0) + if u.upgradeSettings != nil && u.upgradeSettings.Rollback != nil { // TODO && target version supports manual rollback and deferred cleanup + rollbackWindow = u.upgradeSettings.Rollback.Window + } + if err := markUpgrade(u.log, paths.Data(), current, previous, action, det, OUTCOME_UPGRADE, rollbackWindow); err != nil { u.log.Errorw("Rolling back: marking upgrade failed", "error.message", err) rollbackErr := rollbackInstall(ctx, u.log, paths.Top(), hashedDir, currentVersionedHome) return nil, goerrors.Join(err, rollbackErr) diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index e55104ee2b0..0d28354a780 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -308,7 +308,11 @@ func getConfig(streams *cli.IOStreams) *configuration.Configuration { } func initUpgradeDetails(marker *upgrade.UpdateMarker, saveMarker func(*upgrade.UpdateMarker, bool) error, log *logp.Logger) *details.Details { + // FIXME this should edit details not rewrite them upgradeDetails := details.NewDetails(version.GetAgentPackageVersion(), details.StateWatching, marker.GetActionID()) + if marker.Details != nil { + upgradeDetails.Metadata.RollbacksAvailable = marker.Details.Metadata.RollbacksAvailable + } upgradeDetails.RegisterObserver(func(details *details.Details) { marker.Details = details if err := saveMarker(marker, true); err != nil { From 8736b846bffba2fbfcbb9ca3ff42dfe56792a440 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 14 Jul 2025 15:32:27 +0200 Subject: [PATCH 08/38] remove fakeAcker in favour of generated Acker mock --- .../agent/application/upgrade/upgrade_test.go | 54 ++++++------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index 17d19252f6e..8de7d8b21af 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -29,7 +29,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" - "github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker" "github.com/elastic/elastic-agent/internal/pkg/release" v1 "github.com/elastic/elastic-agent/pkg/api/v1" "github.com/elastic/elastic-agent/pkg/control/v2/client" @@ -37,7 +36,8 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" agtversion "github.com/elastic/elastic-agent/pkg/version" - mocks "github.com/elastic/elastic-agent/testing/mocks/pkg/control/v2/client" + ackermocks "github.com/elastic/elastic-agent/testing/mocks/internal_/pkg/fleetapi/acker" + clientmocks "github.com/elastic/elastic-agent/testing/mocks/pkg/control/v2/client" ) func Test_CopyFile(t *testing.T) { @@ -239,7 +239,7 @@ func TestIsInProgress(t *testing.T) { t.Run(name, func(t *testing.T) { // Expect client.State() call to be made only if no Upgrade Watcher PIDs // are returned (i.e. no Upgrade Watcher is found to be running). - mc := mocks.NewClient(t) + mc := clientmocks.NewClient(t) if test.watcherPIDsFetcher != nil { pids, _ := test.watcherPIDsFetcher() if len(pids) == 0 { @@ -293,37 +293,31 @@ func TestUpgraderAckAction(t *testing.T) { require.Nil(t, u.AckAction(t.Context(), nil, action)) }) t.Run("AckAction with acker", func(t *testing.T) { - acker := &fakeAcker{} - acker.On("Ack", mock.Anything, action).Return(nil) - acker.On("Commit", mock.Anything).Return(nil) + mockAcker := ackermocks.NewAcker(t) + mockAcker.EXPECT().Ack(mock.Anything, action).Return(nil) + mockAcker.EXPECT().Commit(mock.Anything).Return(nil) - require.Nil(t, u.AckAction(t.Context(), acker, action)) - acker.AssertCalled(t, "Ack", mock.Anything, action) - acker.AssertCalled(t, "Commit", mock.Anything) + require.Nil(t, u.AckAction(t.Context(), mockAcker, action)) }) t.Run("AckAction with acker - failing commit", func(t *testing.T) { - acker := &fakeAcker{} + mockAcker := ackermocks.NewAcker(t) errCommit := errors.New("failed commit") - acker.On("Ack", mock.Anything, action).Return(nil) - acker.On("Commit", mock.Anything).Return(errCommit) + mockAcker.EXPECT().Ack(mock.Anything, action).Return(nil) + mockAcker.EXPECT().Commit(mock.Anything).Return(errCommit) - require.ErrorIs(t, u.AckAction(t.Context(), acker, action), errCommit) - acker.AssertCalled(t, "Ack", mock.Anything, action) - acker.AssertCalled(t, "Commit", mock.Anything) + require.ErrorIs(t, u.AckAction(t.Context(), mockAcker, action), errCommit) }) t.Run("AckAction with acker - failed ack", func(t *testing.T) { - acker := &fakeAcker{} + mockAcker := ackermocks.NewAcker(t) errAck := errors.New("ack error") - acker.On("Ack", mock.Anything, action).Return(errAck) - acker.On("Commit", mock.Anything).Return(nil) - - require.ErrorIs(t, u.AckAction(t.Context(), acker, action), errAck) - acker.AssertCalled(t, "Ack", mock.Anything, action) - acker.AssertNotCalled(t, "Commit", mock.Anything) + mockAcker.EXPECT().Ack(mock.Anything, action).Return(errAck) + // no expectation on Commit() since it shouldn't be called after an error during Ack() + + require.ErrorIs(t, u.AckAction(t.Context(), mockAcker, action), errAck) }) } @@ -1276,19 +1270,3 @@ func TestIsSameReleaseVersion(t *testing.T) { }) } } - -var _ acker.Acker = &fakeAcker{} - -type fakeAcker struct { - mock.Mock -} - -func (f *fakeAcker) Ack(ctx context.Context, action fleetapi.Action) error { - args := f.Called(ctx, action) - return args.Error(0) -} - -func (f *fakeAcker) Commit(ctx context.Context) error { - args := f.Called(ctx) - return args.Error(0) -} From ca3aa08aae6a3e8246cbd9825be97c23a203dce2 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 15 Jul 2025 10:12:02 +0200 Subject: [PATCH 09/38] Introduce WatcherHelper --- .mockery.yaml | 10 + internal/pkg/agent/application/application.go | 2 +- .../coordinator/coordinator_unit_test.go | 2 +- .../agent/application/upgrade/mocks_test.go | 262 +++++++++++++ .../application/upgrade/step_download_test.go | 8 +- .../pkg/agent/application/upgrade/upgrade.go | 199 ++++------ .../agent/application/upgrade/upgrade_test.go | 367 ++++++------------ .../pkg/agent/application/upgrade/watcher.go | 130 +++++++ .../agent/application/upgrade/watcher_test.go | 243 ++++++++++++ magefile.go | 26 +- 10 files changed, 859 insertions(+), 390 deletions(-) create mode 100644 internal/pkg/agent/application/upgrade/mocks_test.go diff --git a/.mockery.yaml b/.mockery.yaml index ea5df1eebbe..ae6b4ef9cca 100644 --- a/.mockery.yaml +++ b/.mockery.yaml @@ -37,3 +37,13 @@ packages: installationModifier: config: mockname: "InstallationModifier" + github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade: + interfaces: + WatcherHelper: + config: + inpackage: True + with-expecter: True + dir: "{{.InterfaceDirRelative}}" + mockname: "{{.Mock}}{{.InterfaceName}}" + outpkg: "{{.PackageName}}" + filename: "mocks_test.go" \ No newline at end of file diff --git a/internal/pkg/agent/application/application.go b/internal/pkg/agent/application/application.go index e48c8739340..0cd825ad29a 100644 --- a/internal/pkg/agent/application/application.go +++ b/internal/pkg/agent/application/application.go @@ -120,7 +120,7 @@ func New( // monitoring is not supported in bootstrap mode https://github.com/elastic/elastic-agent/issues/1761 isMonitoringSupported := !disableMonitoring && cfg.Settings.V1MonitoringEnabled - upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, cfg.Settings.Upgrade, agentInfo) + upgrader, err := upgrade.NewUpgrader(log, cfg.Settings.DownloadConfig, cfg.Settings.Upgrade, agentInfo, new(upgrade.AgentWatcherHelper)) if err != nil { return nil, nil, nil, fmt.Errorf("failed to create upgrader: %w", err) } diff --git a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go index 5dc366bc5f9..972c2d1eb7f 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go @@ -462,7 +462,7 @@ func TestCoordinatorReportsInvalidPolicy(t *testing.T) { } }() - upgradeMgr, err := upgrade.NewUpgrader(log, &artifact.Config{}, nil, &info.AgentInfo{}) + upgradeMgr, err := upgrade.NewUpgrader(log, &artifact.Config{}, nil, &info.AgentInfo{}, new(upgrade.AgentWatcherHelper)) require.NoError(t, err, "errored when creating a new upgrader") // Channels have buffer length 1, so we don't have to run on multiple diff --git a/internal/pkg/agent/application/upgrade/mocks_test.go b/internal/pkg/agent/application/upgrade/mocks_test.go new file mode 100644 index 00000000000..43eb4002e60 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/mocks_test.go @@ -0,0 +1,262 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +// Code generated by mockery v2.53.4. DO NOT EDIT. + +package upgrade + +import ( + context "context" + exec "os/exec" + + logp "github.com/elastic/elastic-agent-libs/logp" + filelock "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" + + mock "github.com/stretchr/testify/mock" + + time "time" +) + +// MockWatcherHelper is an autogenerated mock type for the WatcherHelper type +type MockWatcherHelper struct { + mock.Mock +} + +type MockWatcherHelper_Expecter struct { + mock *mock.Mock +} + +func (_m *MockWatcherHelper) EXPECT() *MockWatcherHelper_Expecter { + return &MockWatcherHelper_Expecter{mock: &_m.Mock} +} + +// InvokeWatcher provides a mock function with given fields: log, agentExecutable +func (_m *MockWatcherHelper) InvokeWatcher(log *logp.Logger, agentExecutable string) (*exec.Cmd, error) { + ret := _m.Called(log, agentExecutable) + + if len(ret) == 0 { + panic("no return value specified for InvokeWatcher") + } + + var r0 *exec.Cmd + var r1 error + if rf, ok := ret.Get(0).(func(*logp.Logger, string) (*exec.Cmd, error)); ok { + return rf(log, agentExecutable) + } + if rf, ok := ret.Get(0).(func(*logp.Logger, string) *exec.Cmd); ok { + r0 = rf(log, agentExecutable) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*exec.Cmd) + } + } + + if rf, ok := ret.Get(1).(func(*logp.Logger, string) error); ok { + r1 = rf(log, agentExecutable) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockWatcherHelper_InvokeWatcher_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InvokeWatcher' +type MockWatcherHelper_InvokeWatcher_Call struct { + *mock.Call +} + +// InvokeWatcher is a helper method to define mock.On call +// - log *logp.Logger +// - agentExecutable string +func (_e *MockWatcherHelper_Expecter) InvokeWatcher(log interface{}, agentExecutable interface{}) *MockWatcherHelper_InvokeWatcher_Call { + return &MockWatcherHelper_InvokeWatcher_Call{Call: _e.mock.On("InvokeWatcher", log, agentExecutable)} +} + +func (_c *MockWatcherHelper_InvokeWatcher_Call) Run(run func(log *logp.Logger, agentExecutable string)) *MockWatcherHelper_InvokeWatcher_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*logp.Logger), args[1].(string)) + }) + return _c +} + +func (_c *MockWatcherHelper_InvokeWatcher_Call) Return(_a0 *exec.Cmd, _a1 error) *MockWatcherHelper_InvokeWatcher_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockWatcherHelper_InvokeWatcher_Call) RunAndReturn(run func(*logp.Logger, string) (*exec.Cmd, error)) *MockWatcherHelper_InvokeWatcher_Call { + _c.Call.Return(run) + return _c +} + +// SelectWatcherExecutable provides a mock function with given fields: topDir, previous, current +func (_m *MockWatcherHelper) SelectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { + ret := _m.Called(topDir, previous, current) + + if len(ret) == 0 { + panic("no return value specified for SelectWatcherExecutable") + } + + var r0 string + if rf, ok := ret.Get(0).(func(string, agentInstall, agentInstall) string); ok { + r0 = rf(topDir, previous, current) + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + +// MockWatcherHelper_SelectWatcherExecutable_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SelectWatcherExecutable' +type MockWatcherHelper_SelectWatcherExecutable_Call struct { + *mock.Call +} + +// SelectWatcherExecutable is a helper method to define mock.On call +// - topDir string +// - previous agentInstall +// - current agentInstall +func (_e *MockWatcherHelper_Expecter) SelectWatcherExecutable(topDir interface{}, previous interface{}, current interface{}) *MockWatcherHelper_SelectWatcherExecutable_Call { + return &MockWatcherHelper_SelectWatcherExecutable_Call{Call: _e.mock.On("SelectWatcherExecutable", topDir, previous, current)} +} + +func (_c *MockWatcherHelper_SelectWatcherExecutable_Call) Run(run func(topDir string, previous agentInstall, current agentInstall)) *MockWatcherHelper_SelectWatcherExecutable_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(string), args[1].(agentInstall), args[2].(agentInstall)) + }) + return _c +} + +func (_c *MockWatcherHelper_SelectWatcherExecutable_Call) Return(_a0 string) *MockWatcherHelper_SelectWatcherExecutable_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockWatcherHelper_SelectWatcherExecutable_Call) RunAndReturn(run func(string, agentInstall, agentInstall) string) *MockWatcherHelper_SelectWatcherExecutable_Call { + _c.Call.Return(run) + return _c +} + +// TakeOverWatcher provides a mock function with given fields: ctx, log, topDir +func (_m *MockWatcherHelper) TakeOverWatcher(ctx context.Context, log *logp.Logger, topDir string) (*filelock.AppLocker, error) { + ret := _m.Called(ctx, log, topDir) + + if len(ret) == 0 { + panic("no return value specified for TakeOverWatcher") + } + + var r0 *filelock.AppLocker + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger, string) (*filelock.AppLocker, error)); ok { + return rf(ctx, log, topDir) + } + if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger, string) *filelock.AppLocker); ok { + r0 = rf(ctx, log, topDir) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*filelock.AppLocker) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *logp.Logger, string) error); ok { + r1 = rf(ctx, log, topDir) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockWatcherHelper_TakeOverWatcher_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'TakeOverWatcher' +type MockWatcherHelper_TakeOverWatcher_Call struct { + *mock.Call +} + +// TakeOverWatcher is a helper method to define mock.On call +// - ctx context.Context +// - log *logp.Logger +// - topDir string +func (_e *MockWatcherHelper_Expecter) TakeOverWatcher(ctx interface{}, log interface{}, topDir interface{}) *MockWatcherHelper_TakeOverWatcher_Call { + return &MockWatcherHelper_TakeOverWatcher_Call{Call: _e.mock.On("TakeOverWatcher", ctx, log, topDir)} +} + +func (_c *MockWatcherHelper_TakeOverWatcher_Call) Run(run func(ctx context.Context, log *logp.Logger, topDir string)) *MockWatcherHelper_TakeOverWatcher_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*logp.Logger), args[2].(string)) + }) + return _c +} + +func (_c *MockWatcherHelper_TakeOverWatcher_Call) Return(_a0 *filelock.AppLocker, _a1 error) *MockWatcherHelper_TakeOverWatcher_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockWatcherHelper_TakeOverWatcher_Call) RunAndReturn(run func(context.Context, *logp.Logger, string) (*filelock.AppLocker, error)) *MockWatcherHelper_TakeOverWatcher_Call { + _c.Call.Return(run) + return _c +} + +// WaitForWatcher provides a mock function with given fields: ctx, log, markerFilePath, waitTime +func (_m *MockWatcherHelper) WaitForWatcher(ctx context.Context, log *logp.Logger, markerFilePath string, waitTime time.Duration) error { + ret := _m.Called(ctx, log, markerFilePath, waitTime) + + if len(ret) == 0 { + panic("no return value specified for WaitForWatcher") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger, string, time.Duration) error); ok { + r0 = rf(ctx, log, markerFilePath, waitTime) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockWatcherHelper_WaitForWatcher_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WaitForWatcher' +type MockWatcherHelper_WaitForWatcher_Call struct { + *mock.Call +} + +// WaitForWatcher is a helper method to define mock.On call +// - ctx context.Context +// - log *logp.Logger +// - markerFilePath string +// - waitTime time.Duration +func (_e *MockWatcherHelper_Expecter) WaitForWatcher(ctx interface{}, log interface{}, markerFilePath interface{}, waitTime interface{}) *MockWatcherHelper_WaitForWatcher_Call { + return &MockWatcherHelper_WaitForWatcher_Call{Call: _e.mock.On("WaitForWatcher", ctx, log, markerFilePath, waitTime)} +} + +func (_c *MockWatcherHelper_WaitForWatcher_Call) Run(run func(ctx context.Context, log *logp.Logger, markerFilePath string, waitTime time.Duration)) *MockWatcherHelper_WaitForWatcher_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*logp.Logger), args[2].(string), args[3].(time.Duration)) + }) + return _c +} + +func (_c *MockWatcherHelper_WaitForWatcher_Call) Return(_a0 error) *MockWatcherHelper_WaitForWatcher_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockWatcherHelper_WaitForWatcher_Call) RunAndReturn(run func(context.Context, *logp.Logger, string, time.Duration) error) *MockWatcherHelper_WaitForWatcher_Call { + _c.Call.Return(run) + return _c +} + +// NewMockWatcherHelper creates a new instance of MockWatcherHelper. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMockWatcherHelper(t interface { + mock.TestingT + Cleanup(func()) +}) *MockWatcherHelper { + mock := &MockWatcherHelper{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/pkg/agent/application/upgrade/step_download_test.go b/internal/pkg/agent/application/upgrade/step_download_test.go index 66ee4712074..852e45d83e7 100644 --- a/internal/pkg/agent/application/upgrade/step_download_test.go +++ b/internal/pkg/agent/application/upgrade/step_download_test.go @@ -91,7 +91,7 @@ func TestDownloadWithRetries(t *testing.T) { return &mockDownloader{expectedDownloadPath, nil}, nil } - u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}, new(AgentWatcherHelper)) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -141,7 +141,7 @@ func TestDownloadWithRetries(t *testing.T) { return nil, nil } - u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}, new(AgentWatcherHelper)) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -196,7 +196,7 @@ func TestDownloadWithRetries(t *testing.T) { return nil, nil } - u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}, new(AgentWatcherHelper)) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") @@ -241,7 +241,7 @@ func TestDownloadWithRetries(t *testing.T) { return &mockDownloader{"", errors.New("download failed")}, nil } - u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}) + u, err := NewUpgrader(testLogger, &settings, nil, &info.AgentInfo{}, new(AgentWatcherHelper)) require.NoError(t, err) parsedVersion, err := agtversion.ParseVersion("8.9.0") diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index b84f2fa4b93..931747a6a35 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -36,8 +36,6 @@ import ( "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger" - "github.com/elastic/elastic-agent/pkg/core/process" - "github.com/elastic/elastic-agent/pkg/utils" agtversion "github.com/elastic/elastic-agent/pkg/version" currentagtversion "github.com/elastic/elastic-agent/version" ) @@ -63,6 +61,7 @@ var ( ErrUpgradeSameVersion = errors.New("upgrade did not occur because it is the same version") ErrNonFipsToFips = errors.New("cannot switch to fips mode when upgrading") ErrFipsToNonFips = errors.New("cannot switch to non-fips mode when upgrading") + ErrNilUpdateMarker = errors.New("loaded a nil update marker") ) func init() { @@ -71,6 +70,23 @@ func init() { } } +// WatcherHelper is an abstraction of operations that Upgrader will trigger on elastic-agent watcher. +// This is defined to help with Upgrader testing and verify interactions with elastic-agent watcher +type WatcherHelper interface { + // InvokeWatcher invokes an elastic-agent watcher using the agentExecutable passed as argument + InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error) + // SelectWatcherExecutable will return the path to the newer elastic-agent executable that will be used to invoke the + // more recent watcher between the previous (the agent that started the upgrade) and current (the agent that will run after restart) + // agent installation + SelectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string + // WaitForWatcher will listen for changes to the update marker, waiting for the elastic-agent watcher to set UPG_WATCHING state + // in the upgrade details' metadata + WaitForWatcher(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration) error + // TakeOverWatcher will look for watcher processes and terminate them while at the same time trying to acquire the watcher AppLocker. + // It will return once it managed to get the AppLocker or with an error if the lock could not be acquired. + TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) +} + // Upgrader performs an upgrade type Upgrader struct { log *logger.Logger @@ -80,6 +96,7 @@ type Upgrader struct { upgradeable bool fleetServerURI string markerWatcher MarkerWatcher + watcherHelper WatcherHelper } // IsUpgradeable when agent is installed and running as a service or flag was provided. @@ -90,7 +107,7 @@ func IsUpgradeable() bool { } // NewUpgrader creates an upgrader which is capable of performing upgrade operation -func NewUpgrader(log *logger.Logger, settings *artifact.Config, upgradeConfig *configuration.UpgradeConfig, agentInfo info.Agent) (*Upgrader, error) { +func NewUpgrader(log *logger.Logger, settings *artifact.Config, upgradeConfig *configuration.UpgradeConfig, agentInfo info.Agent, watcherHelper WatcherHelper) (*Upgrader, error) { return &Upgrader{ log: log, settings: settings, @@ -98,6 +115,7 @@ func NewUpgrader(log *logger.Logger, settings *artifact.Config, upgradeConfig *c agentInfo: agentInfo, upgradeable: IsUpgradeable(), markerWatcher: newMarkerFileWatcher(markerFilePath(paths.Data()), log), + watcherHelper: watcherHelper, }, nil } @@ -204,7 +222,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { if rollback { - return u.forceRollbackToPreviousVersion(ctx, version, action, det) + return u.forceRollbackToPreviousVersion(ctx, paths.Top(), version, action, det) } u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) @@ -360,16 +378,16 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return nil, goerrors.Join(err, rollbackErr) } - watcherExecutable := selectWatcherExecutable(paths.Top(), previous, current) + watcherExecutable := u.watcherHelper.SelectWatcherExecutable(paths.Top(), previous, current) var watcherCmd *exec.Cmd - if watcherCmd, err = InvokeWatcher(u.log, watcherExecutable); err != nil { + if watcherCmd, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable); err != nil { u.log.Errorw("Rolling back: starting watcher failed", "error.message", err) rollbackErr := rollbackInstall(ctx, u.log, paths.Top(), hashedDir, currentVersionedHome) return nil, goerrors.Join(err, rollbackErr) } - watcherWaitErr := waitForWatcher(ctx, u.log, markerFilePath(paths.Data()), watcherMaxWaitTime) + watcherWaitErr := u.watcherHelper.WaitForWatcher(ctx, u.log, markerFilePath(paths.Data()), watcherMaxWaitTime) if watcherWaitErr != nil { killWatcherErr := watcherCmd.Process.Kill() rollbackErr := rollbackInstall(ctx, u.log, paths.Top(), hashedDir, currentVersionedHome) @@ -388,19 +406,32 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } -func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { - // Formal checks for verifying we can rollback properly: +func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { + // check that the upgrade marker exists and is accessible + updateMarkerPath := markerFilePath(paths.DataFrom(topDir)) + _, err := os.Stat(updateMarkerPath) + if err != nil { + return nil, fmt.Errorf("stat() on upgrade marker %q failed: %w", updateMarkerPath, err) + } + + // TODO Formal checks for verifying we can rollback properly: // 1. d.Metadata.RollbacksAvailable should contain the desired version with a valid TTL (it may need to be written by main agent process before starting watcher) // 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating) // 3. upgrade marker exists // these should be revalidated after taking over watcher - err := u.PersistManualRollback(ctx) + updateMarker, err := u.persistManualRollback(ctx, topDir) if err != nil { - return nil, err + return nil, fmt.Errorf("persisting rollback in update marker: %w", err) + } + + previous, current, err := extractAgentInstallsFromMarker(updateMarker) + if err != nil { + return nil, fmt.Errorf("extracting current and previous install details: %w", err) } // Invoke watcher again - _, err = InvokeWatcher(u.log, paths.BinaryPath(paths.VersionedHome(paths.Top()), agentName)) + watcherExecutable := u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) + _, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) if err != nil { return nil, fmt.Errorf("invoking watcher: %w", err) } @@ -409,10 +440,36 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, version s } -func (u *Upgrader) PersistManualRollback(ctx context.Context) error { - watcherApplock, err := u.takeOverWatcher(ctx) +func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentInstall, current agentInstall, err error) { + previousParsedVersion, err := agtversion.ParseVersion(updateMarker.PrevVersion) if err != nil { - return fmt.Errorf("taking over watcher processes: %w", err) + return previous, current, fmt.Errorf("parsing previous version %q: %w", updateMarker.PrevVersion, err) + } + previous = agentInstall{ + parsedVersion: previousParsedVersion, + version: updateMarker.PrevVersion, + hash: updateMarker.PrevHash, + versionedHome: updateMarker.PrevVersionedHome, + } + + currentParsedVersion, err := agtversion.ParseVersion(updateMarker.Version) + if err != nil { + return previous, current, fmt.Errorf("parsing current version %q: %w", updateMarker.Version, err) + } + current = agentInstall{ + parsedVersion: currentParsedVersion, + version: updateMarker.Version, + hash: updateMarker.Hash, + versionedHome: updateMarker.VersionedHome, + } + + return previous, current, nil +} + +func (u *Upgrader) persistManualRollback(ctx context.Context, topDir string) (*UpdateMarker, error) { + watcherApplock, err := u.watcherHelper.TakeOverWatcher(ctx, u.log, topDir) + if err != nil { + return nil, fmt.Errorf("taking over watcher processes: %w", err) } defer func(watcherApplock *filelock.AppLocker) { releaseWatcherAppLockerErr := watcherApplock.Unlock() @@ -422,118 +479,22 @@ func (u *Upgrader) PersistManualRollback(ctx context.Context) error { }(watcherApplock) // read the upgrade marker - updateMarker, err := LoadMarker(paths.Data()) - if err != nil { - return fmt.Errorf("loading marker: %w", err) - } - updateMarker.DesiredOutcome = OUTCOME_ROLLBACK - err = SaveMarker(paths.Data(), updateMarker, true) + updateMarker, err := LoadMarker(paths.DataFrom(topDir)) if err != nil { - return fmt.Errorf("saving marker: %w", err) + return nil, fmt.Errorf("loading marker: %w", err) } - return nil -} - -func (u *Upgrader) takeOverWatcher(ctx context.Context) (*filelock.AppLocker, error) { - - takeoverCtx, takeoverCancel := context.WithTimeout(ctx, 30*time.Second) - defer takeoverCancel() - go func() { - killingTicker := time.NewTicker(500 * time.Millisecond) - defer killingTicker.Stop() - for { - select { - case <-takeoverCtx.Done(): - return - case <-killingTicker.C: - pids, err := utils.GetWatcherPIDs() - if err != nil { - u.log.Errorf("error listing watcher processes: %s", err) - continue - } - - // this should be run continuously and concurrently attempting to get the app locker - for _, pid := range pids { - u.log.Debugf("attempting to kill watcher process with PID: %d", pid) - watcherProcess, findProcErr := os.FindProcess(pid) - if findProcErr != nil { - u.log.Errorf("error finding process with PID: %d: %s", pid, findProcErr) - continue - } - killProcErr := process.Terminate(watcherProcess) - if killProcErr != nil { - u.log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) - } - u.log.Debugf("killed watcher process with PID: %d", pid) - } - } - } - }() - - // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited - takeOverTicker := time.NewTicker(100 * time.Millisecond) - defer takeOverTicker.Stop() - for { - select { - case <-takeoverCtx.Done(): - return nil, fmt.Errorf("timed out taking over watcher applocker") - case <-takeOverTicker.C: - locker := filelock.NewAppLocker(paths.Top(), "watcher.lock") - err := locker.TryLock() - if err != nil { - u.log.Errorf("error locking watcher applocker: %s", err) - continue - } - return locker, nil - } - } -} - -func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { - // check if the upgraded version is less than the previous (currently installed) version - if current.parsedVersion.Less(*previous.parsedVersion) { - // use the current agent executable for watch, if downgrading the old agent doesn't understand the current agent's path structure. - return paths.BinaryPath(filepath.Join(topDir, previous.versionedHome), agentName) - } else { - // use the new agent executable as it should be able to parse the new update marker - return paths.BinaryPath(filepath.Join(topDir, current.versionedHome), agentName) + if updateMarker == nil { + return nil, ErrNilUpdateMarker } -} -func waitForWatcher(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration) error { - return waitForWatcherWithTimeoutCreationFunc(ctx, log, markerFilePath, waitTime, context.WithTimeout) -} - -type createContextWithTimeout func(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) - -func waitForWatcherWithTimeoutCreationFunc(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration, createTimeoutContext createContextWithTimeout) error { - // Wait for the watcher to be up and running - watcherContext, cancel := createTimeoutContext(ctx, waitTime) - defer cancel() - - markerWatcher := newMarkerFileWatcher(markerFilePath, log) - err := markerWatcher.Run(watcherContext) + updateMarker.DesiredOutcome = OUTCOME_ROLLBACK + err = SaveMarker(paths.DataFrom(topDir), updateMarker, true) if err != nil { - return fmt.Errorf("error starting update marker watcher: %w", err) + return updateMarker, fmt.Errorf("saving marker: %w", err) } - log.Infof("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching) - - for { - select { - case updMarker := <-markerWatcher.Watch(): - if updMarker.Details != nil && updMarker.Details.State == details.StateWatching { - // watcher started and it is watching, all good - log.Infof("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching) - return nil - } - - case <-watcherContext.Done(): - log.Errorf("upgrade watcher did not start watching within %s or context has expired", waitTime) - return goerrors.Join(ErrWatcherNotStarted, watcherContext.Err()) - } - } + return updateMarker, nil } // Ack acks last upgrade action diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index 8de7d8b21af..8b62e757b6a 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -8,10 +8,11 @@ import ( "context" "crypto/tls" "fmt" + "io/fs" "os" + "os/exec" "path/filepath" "runtime" - "sync" "testing" "time" @@ -19,13 +20,13 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" "github.com/elastic/elastic-agent-libs/transport/httpcommon" "github.com/elastic/elastic-agent-libs/transport/tlscommon" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" + "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/config" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" @@ -36,6 +37,7 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" agtversion "github.com/elastic/elastic-agent/pkg/version" + infomocks "github.com/elastic/elastic-agent/testing/mocks/internal_/pkg/agent/application/info" ackermocks "github.com/elastic/elastic-agent/testing/mocks/internal_/pkg/fleetapi/acker" clientmocks "github.com/elastic/elastic-agent/testing/mocks/pkg/control/v2/client" ) @@ -316,7 +318,7 @@ func TestUpgraderAckAction(t *testing.T) { errAck := errors.New("ack error") mockAcker.EXPECT().Ack(mock.Anything, action).Return(errAck) // no expectation on Commit() since it shouldn't be called after an error during Ack() - + require.ErrorIs(t, u.AckAction(t.Context(), mockAcker, action), errAck) }) } @@ -953,242 +955,6 @@ func TestCheckUpgrade(t *testing.T) { } } -func TestWaitForWatcher(t *testing.T) { - wantErrWatcherNotStarted := func(t assert.TestingT, err error, i ...interface{}) bool { - return assert.ErrorIs(t, err, ErrWatcherNotStarted, i) - } - - tests := []struct { - name string - states []details.State - stateChangeInterval time.Duration - cancelWaitContext bool - wantErr assert.ErrorAssertionFunc - }{ - { - name: "Happy path: watcher is watching already", - states: []details.State{details.StateWatching}, - stateChangeInterval: 1 * time.Millisecond, - wantErr: assert.NoError, - }, - { - name: "Sad path: watcher is never starting", - states: []details.State{details.StateReplacing}, - stateChangeInterval: 1 * time.Millisecond, - cancelWaitContext: true, - wantErr: wantErrWatcherNotStarted, - }, - { - name: "Runaround path: marker is jumping around and landing on watching", - states: []details.State{ - details.StateRequested, - details.StateScheduled, - details.StateDownloading, - details.StateExtracting, - details.StateReplacing, - details.StateRestarting, - details.StateWatching, - }, - stateChangeInterval: 1 * time.Millisecond, - wantErr: assert.NoError, - }, - { - name: "Timeout: marker is never created", - states: nil, - stateChangeInterval: 1 * time.Millisecond, - cancelWaitContext: true, - wantErr: wantErrWatcherNotStarted, - }, - { - name: "Timeout2: state doesn't get there in time", - states: []details.State{ - details.StateRequested, - details.StateScheduled, - details.StateDownloading, - details.StateExtracting, - details.StateReplacing, - details.StateRestarting, - }, - - stateChangeInterval: 1 * time.Millisecond, - cancelWaitContext: true, - wantErr: wantErrWatcherNotStarted, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - deadline, ok := t.Deadline() - if !ok { - deadline = time.Now().Add(5 * time.Second) - } - testCtx, testCancel := context.WithDeadline(context.Background(), deadline) - defer testCancel() - - tmpDir := t.TempDir() - updMarkerFilePath := filepath.Join(tmpDir, markerFilename) - - waitContext, waitCancel := context.WithCancel(testCtx) - defer waitCancel() - - fakeTimeout := 30 * time.Second - - // in order to take timing out of the equation provide a context that we can cancel manually - // still assert that the parent context and timeout passed are correct - var createContextFunc createContextWithTimeout = func(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { - assert.Same(t, testCtx, ctx, "parent context should be the same as the waitForWatcherCall") - assert.Equal(t, fakeTimeout, timeout, "timeout used in new context should be the same as testcase") - - return waitContext, waitCancel - } - - if len(tt.states) > 0 { - initialState := tt.states[0] - writeState(t, updMarkerFilePath, initialState) - } - - wg := new(sync.WaitGroup) - - var furtherStates []details.State - if len(tt.states) > 1 { - // we have more states to produce - furtherStates = tt.states[1:] - } - - wg.Add(1) - - // worker goroutine: writes out additional states while the test is blocked on waitOnWatcher() call and expires - // the wait context if cancelWaitContext is set to true. Timing of the goroutine is driven by stateChangeInterval. - go func() { - defer wg.Done() - tick := time.NewTicker(tt.stateChangeInterval) - defer tick.Stop() - for _, state := range furtherStates { - select { - case <-testCtx.Done(): - return - case <-tick.C: - writeState(t, updMarkerFilePath, state) - } - } - if tt.cancelWaitContext { - <-tick.C - waitCancel() - } - }() - - log, _ := loggertest.New(tt.name) - - tt.wantErr(t, waitForWatcherWithTimeoutCreationFunc(testCtx, log, updMarkerFilePath, fakeTimeout, createContextFunc), fmt.Sprintf("waitForWatcher %s, %v, %s, %s)", updMarkerFilePath, tt.states, tt.stateChangeInterval, fakeTimeout)) - - // wait for goroutines to finish - wg.Wait() - }) - } -} - -func writeState(t *testing.T, path string, state details.State) { - ms := newMarkerSerializer(&UpdateMarker{ - Version: "version", - Hash: "hash", - VersionedHome: "versionedHome", - UpdatedOn: time.Now(), - PrevVersion: "prev_version", - PrevHash: "prev_hash", - PrevVersionedHome: "prev_versionedhome", - Acked: false, - Action: nil, - Details: &details.Details{ - TargetVersion: "version", - State: state, - ActionID: "", - Metadata: details.Metadata{}, - }, - }) - - bytes, err := yaml.Marshal(ms) - if assert.NoError(t, err, "error marshaling the test upgrade marker") { - err = os.WriteFile(path, bytes, 0770) - assert.NoError(t, err, "error writing out the test upgrade marker") - } -} - -func Test_selectWatcherExecutable(t *testing.T) { - type args struct { - previous agentInstall - current agentInstall - } - tests := []struct { - name string - args args - want string - }{ - { - name: "Simple upgrade, we should launch the new (current) watcher", - args: args{ - previous: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), - }, - current: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(4, 5, 6, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), - }, - }, - want: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), - }, - { - name: "Simple downgrade, we should launch the currently installed (previous) watcher", - args: args{ - previous: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(4, 5, 6, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), - }, - current: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), - }, - }, - want: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), - }, - { - name: "Upgrade from snapshot to released version, we should launch the new (current) watcher", - args: args{ - previous: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-somehash"), - }, - current: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-someotherhash"), - }, - }, - want: filepath.Join("data", "elastic-agent-1.2.3-someotherhash"), - }, - { - name: "Downgrade from released version to SNAPSHOT, we should launch the currently installed (previous) watcher", - args: args{ - previous: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), - }, - current: agentInstall{ - parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", ""), - versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-someotherhash"), - }, - }, - - want: filepath.Join("data", "elastic-agent-1.2.3-somehash"), - }, - } - // Just need a top dir path. This test does not make any operation on the filesystem, so a temp dir path is as good as any - fakeTopDir := filepath.Join(t.TempDir(), "Elastic", "Agent") - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equalf(t, paths.BinaryPath(filepath.Join(fakeTopDir, tt.want), agentName), selectWatcherExecutable(fakeTopDir, tt.args.previous, tt.args.current), "selectWatcherExecutable(%v, %v)", tt.args.previous, tt.args.current) - }) - } -} - func TestIsSameReleaseVersion(t *testing.T) { tests := []struct { name string @@ -1270,3 +1036,124 @@ func TestIsSameReleaseVersion(t *testing.T) { }) } } + +func TestManualRollback(t *testing.T) { + const updatemarkerwatching456 = ` + version: 4.5.6 + hash: newver + versioned_home: data/elastic-agent-4.5.6-newver + updated_on: 2025-07-11T10:11:12.131415Z + prev_version: 1.2.3 + prev_hash: oldver + prev_versioned_home: data/elastic-agent-1.2.3-oldver + acked: false + action: null + details: + target_version: 4.5.6 + state: UPG_WATCHING + metadata: + retry_until: null + rollbacks_available: + - version: 1.2.3 + home: data/elastic-agent-1.2.3-oldver + valid_until: 2025-07-18T10:11:12.131415Z + desired_outcome: UPGRADE + ` + parsed123Version, err := agtversion.ParseVersion("1.2.3") + require.NoError(t, err) + parsed456Version, err := agtversion.ParseVersion("4.5.6") + require.NoError(t, err) + + agentInstall123 := agentInstall{ + parsedVersion: parsed123Version, + version: "1.2.3", + hash: "oldver", + versionedHome: "data/elastic-agent-1.2.3-oldver", + } + + agentInstall456 := agentInstall{ + parsedVersion: parsed456Version, + version: "4.5.6", + hash: "newver", + versionedHome: "data/elastic-agent-4.5.6-newver", + } + + type setupF func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) + type postRollbackAssertionsF func(t *testing.T, topDir string) + type testcase struct { + name string + setup setupF + artifactSettings *artifact.Config + upgradeSettings *configuration.UpgradeConfig + version string + wantErr assert.ErrorAssertionFunc + additionalAsserts postRollbackAssertionsF + } + + testcases := []testcase{ + { + name: "no update marker - rollback fails", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + //do not setup anything here, let the rollback fail + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "1.2.3", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, fs.ErrNotExist) + }, + additionalAsserts: nil, + }, + { + name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "1.2.3", + wantErr: assert.NoError, + additionalAsserts: func(t *testing.T, topDir string) { + marker, loadMarkerErr := LoadMarker(paths.DataFrom(topDir)) + require.NoError(t, loadMarkerErr, "error loading marker") + require.NotNil(t, marker, "marker is nil") + + assert.Equal(t, OUTCOME_ROLLBACK, marker.DesiredOutcome) + require.NotNil(t, marker.Details) + assert.NotEmpty(t, marker.Details.Metadata.RollbacksAvailable) + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + log, _ := loggertest.New(t.Name()) + mockAgentInfo := infomocks.NewAgent(t) + mockWatcherHelper := NewMockWatcherHelper(t) + topDir := t.TempDir() + err := os.MkdirAll(paths.DataFrom(topDir), 0777) + require.NoError(t, err, "error creating data directory in topDir %q", topDir) + + if tc.setup != nil { + tc.setup(t, topDir, mockAgentInfo, mockWatcherHelper) + } + + upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper) + require.NoError(t, err, "error instantiating upgrader") + + _, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.version, nil, nil) + tc.wantErr(t, err, "unexpected error returned by forceRollbackToPreviousVersion()") + if tc.additionalAsserts != nil { + tc.additionalAsserts(t, topDir) + } + }) + } +} diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index df7ee03df70..1d54669a6fe 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -8,13 +8,20 @@ import ( "context" "errors" "fmt" + "os" + "os/exec" + "path/filepath" "time" "google.golang.org/grpc" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/core/process" + "github.com/elastic/elastic-agent/pkg/utils" ) const ( @@ -257,3 +264,126 @@ func (ch *AgentWatcher) checkFailures() bool { } return false } + +// Ensure that AgentWatcherHelper implements the WatcherHelper interface +var _ WatcherHelper = &AgentWatcherHelper{} + +type AgentWatcherHelper struct { +} + +func (a AgentWatcherHelper) InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error) { + return InvokeWatcher(log, agentExecutable) +} + +func (a AgentWatcherHelper) SelectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { + return selectWatcherExecutable(topDir, previous, current) +} + +func (a AgentWatcherHelper) WaitForWatcher(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration) error { + return waitForWatcher(ctx, log, markerFilePath, waitTime) +} + +func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { + return takeOverWatcher(ctx, log, topDir) +} + +// Private functions providing implementation of AgentWatcherHelper +func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { + takeoverCtx, takeoverCancel := context.WithTimeout(ctx, 30*time.Second) + defer takeoverCancel() + go func() { + killingTicker := time.NewTicker(500 * time.Millisecond) + defer killingTicker.Stop() + for { + select { + case <-takeoverCtx.Done(): + return + case <-killingTicker.C: + pids, err := utils.GetWatcherPIDs() + if err != nil { + log.Errorf("error listing watcher processes: %s", err) + continue + } + + // this should be run continuously and concurrently attempting to get the app locker + for _, pid := range pids { + log.Debugf("attempting to kill watcher process with PID: %d", pid) + watcherProcess, findProcErr := os.FindProcess(pid) + if findProcErr != nil { + log.Errorf("error finding process with PID: %d: %s", pid, findProcErr) + continue + } + killProcErr := process.Terminate(watcherProcess) + if killProcErr != nil { + log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) + } + log.Debugf("killed watcher process with PID: %d", pid) + } + } + } + }() + + // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited + takeOverTicker := time.NewTicker(100 * time.Millisecond) + defer takeOverTicker.Stop() + for { + select { + case <-takeoverCtx.Done(): + return nil, fmt.Errorf("timed out taking over watcher applocker") + case <-takeOverTicker.C: + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err := locker.TryLock() + if err != nil { + log.Errorf("error locking watcher applocker: %s", err) + continue + } + return locker, nil + } + } +} + +func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { + // check if the upgraded version is less than the previous (currently installed) version + if current.parsedVersion.Less(*previous.parsedVersion) { + // use the current agent executable for watch, if downgrading the old agent doesn't understand the current agent's path structure. + return paths.BinaryPath(filepath.Join(topDir, previous.versionedHome), agentName) + } else { + // use the new agent executable as it should be able to parse the new update marker + return paths.BinaryPath(filepath.Join(topDir, current.versionedHome), agentName) + } +} + +func waitForWatcher(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration) error { + return waitForWatcherWithTimeoutCreationFunc(ctx, log, markerFilePath, waitTime, context.WithTimeout) +} + +type createContextWithTimeout func(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) + +func waitForWatcherWithTimeoutCreationFunc(ctx context.Context, log *logger.Logger, markerFilePath string, waitTime time.Duration, createTimeoutContext createContextWithTimeout) error { + // Wait for the watcher to be up and running + watcherContext, cancel := createTimeoutContext(ctx, waitTime) + defer cancel() + + markerWatcher := newMarkerFileWatcher(markerFilePath, log) + err := markerWatcher.Run(watcherContext) + if err != nil { + return fmt.Errorf("error starting update marker watcher: %w", err) + } + + log.Infof("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching) + + for { + select { + case updMarker := <-markerWatcher.Watch(): + if updMarker.Details != nil && updMarker.Details.State == details.StateWatching { + // watcher started and it is watching, all good + log.Infof("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching) + return nil + } + + case <-watcherContext.Done(): + log.Errorf("upgrade watcher did not start watching within %s or context has expired", waitTime) + return errors.Join(ErrWatcherNotStarted, watcherContext.Err()) + } + } +} diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index b639df0b2f4..61a46522a69 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -8,16 +8,23 @@ import ( "context" "fmt" "net" + "os" + "path/filepath" + "sync" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" + "gopkg.in/yaml.v3" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" + agtversion "github.com/elastic/elastic-agent/pkg/version" ) func TestWatcher_CannotConnect(t *testing.T) { @@ -623,3 +630,239 @@ func (s *mockDaemon) Client() client.Client { func (s *mockDaemon) StateWatch(_ *cproto.Empty, srv cproto.ElasticAgentControl_StateWatchServer) error { return s.watch(srv) } + +func Test_selectWatcherExecutable(t *testing.T) { + type args struct { + previous agentInstall + current agentInstall + } + tests := []struct { + name string + args args + want string + }{ + { + name: "Simple upgrade, we should launch the new (current) watcher", + args: args{ + previous: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), + }, + current: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(4, 5, 6, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), + }, + }, + want: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), + }, + { + name: "Simple downgrade, we should launch the currently installed (previous) watcher", + args: args{ + previous: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(4, 5, 6, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), + }, + current: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), + }, + }, + want: filepath.Join("data", "elastic-agent-4.5.6-someotherhash"), + }, + { + name: "Upgrade from snapshot to released version, we should launch the new (current) watcher", + args: args{ + previous: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-somehash"), + }, + current: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-someotherhash"), + }, + }, + want: filepath.Join("data", "elastic-agent-1.2.3-someotherhash"), + }, + { + name: "Downgrade from released version to SNAPSHOT, we should launch the currently installed (previous) watcher", + args: args{ + previous: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-somehash"), + }, + current: agentInstall{ + parsedVersion: agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", ""), + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-someotherhash"), + }, + }, + + want: filepath.Join("data", "elastic-agent-1.2.3-somehash"), + }, + } + // Just need a top dir path. This test does not make any operation on the filesystem, so a temp dir path is as good as any + fakeTopDir := filepath.Join(t.TempDir(), "Elastic", "Agent") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, paths.BinaryPath(filepath.Join(fakeTopDir, tt.want), agentName), selectWatcherExecutable(fakeTopDir, tt.args.previous, tt.args.current), "selectWatcherExecutable(%v, %v)", tt.args.previous, tt.args.current) + }) + } +} + +func TestWaitForWatcher(t *testing.T) { + wantErrWatcherNotStarted := func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrWatcherNotStarted, i) + } + + tests := []struct { + name string + states []details.State + stateChangeInterval time.Duration + cancelWaitContext bool + wantErr assert.ErrorAssertionFunc + }{ + { + name: "Happy path: watcher is watching already", + states: []details.State{details.StateWatching}, + stateChangeInterval: 1 * time.Millisecond, + wantErr: assert.NoError, + }, + { + name: "Sad path: watcher is never starting", + states: []details.State{details.StateReplacing}, + stateChangeInterval: 1 * time.Millisecond, + cancelWaitContext: true, + wantErr: wantErrWatcherNotStarted, + }, + { + name: "Runaround path: marker is jumping around and landing on watching", + states: []details.State{ + details.StateRequested, + details.StateScheduled, + details.StateDownloading, + details.StateExtracting, + details.StateReplacing, + details.StateRestarting, + details.StateWatching, + }, + stateChangeInterval: 1 * time.Millisecond, + wantErr: assert.NoError, + }, + { + name: "Timeout: marker is never created", + states: nil, + stateChangeInterval: 1 * time.Millisecond, + cancelWaitContext: true, + wantErr: wantErrWatcherNotStarted, + }, + { + name: "Timeout2: state doesn't get there in time", + states: []details.State{ + details.StateRequested, + details.StateScheduled, + details.StateDownloading, + details.StateExtracting, + details.StateReplacing, + details.StateRestarting, + }, + + stateChangeInterval: 1 * time.Millisecond, + cancelWaitContext: true, + wantErr: wantErrWatcherNotStarted, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + deadline, ok := t.Deadline() + if !ok { + deadline = time.Now().Add(5 * time.Second) + } + testCtx, testCancel := context.WithDeadline(context.Background(), deadline) + defer testCancel() + + tmpDir := t.TempDir() + updMarkerFilePath := filepath.Join(tmpDir, markerFilename) + + waitContext, waitCancel := context.WithCancel(testCtx) + defer waitCancel() + + fakeTimeout := 30 * time.Second + + // in order to take timing out of the equation provide a context that we can cancel manually + // still assert that the parent context and timeout passed are correct + var createContextFunc createContextWithTimeout = func(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { + assert.Same(t, testCtx, ctx, "parent context should be the same as the waitForWatcherCall") + assert.Equal(t, fakeTimeout, timeout, "timeout used in new context should be the same as testcase") + + return waitContext, waitCancel + } + + if len(tt.states) > 0 { + initialState := tt.states[0] + writeState(t, updMarkerFilePath, initialState) + } + + wg := new(sync.WaitGroup) + + var furtherStates []details.State + if len(tt.states) > 1 { + // we have more states to produce + furtherStates = tt.states[1:] + } + + wg.Add(1) + + // worker goroutine: writes out additional states while the test is blocked on waitOnWatcher() call and expires + // the wait context if cancelWaitContext is set to true. Timing of the goroutine is driven by stateChangeInterval. + go func() { + defer wg.Done() + tick := time.NewTicker(tt.stateChangeInterval) + defer tick.Stop() + for _, state := range furtherStates { + select { + case <-testCtx.Done(): + return + case <-tick.C: + writeState(t, updMarkerFilePath, state) + } + } + if tt.cancelWaitContext { + <-tick.C + waitCancel() + } + }() + + log, _ := loggertest.New(tt.name) + + tt.wantErr(t, waitForWatcherWithTimeoutCreationFunc(testCtx, log, updMarkerFilePath, fakeTimeout, createContextFunc), fmt.Sprintf("waitForWatcher %s, %v, %s, %s)", updMarkerFilePath, tt.states, tt.stateChangeInterval, fakeTimeout)) + + // wait for goroutines to finish + wg.Wait() + }) + } +} + +func writeState(t *testing.T, path string, state details.State) { + ms := newMarkerSerializer(&UpdateMarker{ + Version: "version", + Hash: "hash", + VersionedHome: "versionedHome", + UpdatedOn: time.Now(), + PrevVersion: "prev_version", + PrevHash: "prev_hash", + PrevVersionedHome: "prev_versionedhome", + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "version", + State: state, + ActionID: "", + Metadata: details.Metadata{}, + }, + }) + + bytes, err := yaml.Marshal(ms) + if assert.NoError(t, err, "error marshaling the test upgrade marker") { + err = os.WriteFile(path, bytes, 0770) + assert.NoError(t, err, "error writing out the test upgrade marker") + } +} diff --git a/magefile.go b/magefile.go index ede727682fe..a3942cd9e53 100644 --- a/magefile.go +++ b/magefile.go @@ -262,31 +262,7 @@ func (Dev) RegenerateMocks() error { return fmt.Errorf("generating mocks: %w", err) } - // change CWD - workingDir, err := os.Getwd() - if err != nil { - return fmt.Errorf("retrieving CWD: %w", err) - } - // restore the working directory when exiting the function - defer func() { - err := os.Chdir(workingDir) - if err != nil { - panic(fmt.Errorf("failed to restore working dir %q: %w", workingDir, err)) - } - }() - - mPath, err := mocksPath() - if err != nil { - return fmt.Errorf("retrieving mocks path: %w", err) - } - - err = os.Chdir(mPath) - if err != nil { - return fmt.Errorf("changing current directory to %q: %w", mPath, err) - } - - mg.Deps(devtools.AddLicenseHeaders) - mg.Deps(devtools.GoImports) + mg.Deps(devtools.Format) return nil } From f01bfe71594d6f66f6b973d04e1ba89d8133f654 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 16 Jul 2025 18:45:06 +0200 Subject: [PATCH 10/38] Add tests for available_rollbacks --- .../application/upgrade/rollback_test.go | 3 +- .../agent/application/upgrade/step_mark.go | 17 +- .../application/upgrade/step_mark_test.go | 184 ++++++++++++++++++ .../pkg/agent/application/upgrade/upgrade.go | 2 +- 4 files changed, 197 insertions(+), 9 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback_test.go b/internal/pkg/agent/application/upgrade/rollback_test.go index 26c5813c542..93ca32278f3 100644 --- a/internal/pkg/agent/application/upgrade/rollback_test.go +++ b/internal/pkg/agent/application/upgrade/rollback_test.go @@ -11,6 +11,7 @@ import ( "path/filepath" "runtime" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -504,6 +505,6 @@ func createUpdateMarker(t *testing.T, log *logger.Logger, topDir, newAgentVersio hash: oldAgentHash, versionedHome: oldAgentVersionedHome, } - err := markUpgrade(log, paths.DataFrom(topDir), newAgentInstall, oldAgentInstall, nil, nil, OUTCOME_UPGRADE, 0) + err := markUpgrade(log, paths.DataFrom(topDir), time.Now(), newAgentInstall, oldAgentInstall, nil, nil, OUTCOME_UPGRADE, 0) require.NoError(t, err, "error writing fake update marker") } diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index eb0e0a1f77f..9c87ef75fa2 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -197,7 +197,7 @@ type agentInstall struct { } // markUpgrade marks update happened so we can handle grace period -func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, desiredOutcome UpgradeOutcome, rollbackWindow time.Duration) error { +func markUpgrade(log *logger.Logger, dataDirPath string, updatedOn time.Time, agent, previousAgent agentInstall, action *fleetapi.ActionUpgrade, upgradeDetails *details.Details, desiredOutcome UpgradeOutcome, rollbackWindow time.Duration) error { if len(previousAgent.hash) > hashLen { previousAgent.hash = previousAgent.hash[:hashLen] @@ -207,7 +207,7 @@ func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent ag Version: agent.version, Hash: agent.hash, VersionedHome: agent.versionedHome, - UpdatedOn: time.Now(), + UpdatedOn: updatedOn, PrevVersion: previousAgent.version, PrevHash: previousAgent.hash, PrevVersionedHome: previousAgent.versionedHome, @@ -217,12 +217,15 @@ func markUpgrade(log *logger.Logger, dataDirPath string, agent, previousAgent ag } if rollbackWindow > 0 { + // if we have a not empty rollback window, write the prev version in the rollbacks_available field - upgradeDetails.Metadata.RollbacksAvailable = []details.RollbackAvailable{details.RollbackAvailable{ - Version: previousAgent.version, - Home: previousAgent.versionedHome, - ValidUntil: time.Now().Add(rollbackWindow), - }} + upgradeDetails.Metadata.RollbacksAvailable = []details.RollbackAvailable{ + { + Version: previousAgent.version, + Home: previousAgent.versionedHome, + ValidUntil: updatedOn.Add(rollbackWindow), + }, + } } markerBytes, err := yaml.Marshal(newMarkerSerializer(marker)) diff --git a/internal/pkg/agent/application/upgrade/step_mark_test.go b/internal/pkg/agent/application/upgrade/step_mark_test.go index fc1731e7b24..d3c975f749d 100644 --- a/internal/pkg/agent/application/upgrade/step_mark_test.go +++ b/internal/pkg/agent/application/upgrade/step_mark_test.go @@ -7,13 +7,17 @@ package upgrade import ( "os" "path/filepath" + "runtime" "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" + "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" + agtversion "github.com/elastic/elastic-agent/pkg/version" ) func TestSaveAndLoadMarker_NoLoss(t *testing.T) { @@ -260,3 +264,183 @@ desired_outcome: true }) } } + +func TestMarkUpgrade(t *testing.T) { + var parsed123SNAPSHOT = agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", "") + var parsed456SNAPSHOT = agtversion.NewParsedSemVer(4, 5, 6, "SNAPSHOT", "") + + // fix a timestamp (truncated to the second because of loss of precision during marshalling/unmarshalling) + updatedOnNow := time.Now().UTC().Truncate(time.Second) + + type args struct { + updatedOn time.Time + currentAgent agentInstall + previousAgent agentInstall + action *fleetapi.ActionUpgrade + details *details.Details + desiredOutcome UpgradeOutcome + rollbackWindow time.Duration + } + type workingDirHook func(t *testing.T, dataDir string) + + testcases := []struct { + name string + setupBeforeMark workingDirHook + args args + wantErr assert.ErrorAssertionFunc + assertAfterMark workingDirHook + }{ + { + name: "error writing update marker - check error", + setupBeforeMark: func(t *testing.T, dataDir string) { + + // read-only permissions on directories don't work on windows, skip + if runtime.GOOS == "windows" { + t.Skip("skipping test on windows since readonly permissions on directory don't work") + } + + err := os.Chmod(dataDir, 0555) + require.NoError(t, err, "error setting dataDir read-only") + }, + args: args{ + updatedOn: updatedOnNow, + currentAgent: agentInstall{ + parsedVersion: parsed456SNAPSHOT, + version: "4.5.6-SNAPSHOT", + hash: "curagt", + versionedHome: filepath.Join("data", "elastic-agent-4.5.6-SNAPSHOT-curagt"), + }, + previousAgent: agentInstall{ + parsedVersion: parsed123SNAPSHOT, + version: "1.2.3-SNAPSHOT", + hash: "prvagt", + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + }, + action: nil, + details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""), + desiredOutcome: OUTCOME_UPGRADE, + rollbackWindow: 0, + }, + wantErr: assert.Error, + }, + { + name: "no rollback window specified - no available rollbacks", + args: args{ + updatedOn: updatedOnNow, + currentAgent: agentInstall{ + parsedVersion: parsed456SNAPSHOT, + version: "4.5.6-SNAPSHOT", + hash: "curagt", + versionedHome: filepath.Join("data", "elastic-agent-4.5.6-SNAPSHOT-curagt"), + }, + previousAgent: agentInstall{ + parsedVersion: parsed123SNAPSHOT, + version: "1.2.3-SNAPSHOT", + hash: "prvagt", + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + }, + action: nil, + details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""), + desiredOutcome: OUTCOME_UPGRADE, + rollbackWindow: 0, + }, + wantErr: assert.NoError, + assertAfterMark: func(t *testing.T, dataDir string) { + actualMarker, err := LoadMarker(dataDir) + require.NoError(t, err, "error reading actualMarker content after writing") + + expectedMarker := &UpdateMarker{ + Version: "4.5.6-SNAPSHOT", + Hash: "curagt", + VersionedHome: filepath.Join("data", "elastic-agent-4.5.6-SNAPSHOT-curagt"), + UpdatedOn: updatedOnNow, + PrevVersion: "1.2.3-SNAPSHOT", + PrevHash: "prvagt", + PrevVersionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "4.5.6-SNAPSHOT", + State: "UPG_REPLACING", + ActionID: "", + Metadata: details.Metadata{}, + }, + DesiredOutcome: OUTCOME_UPGRADE, + } + assert.Equal(t, expectedMarker, actualMarker) + }, + }, + { + name: "rollback window specified - available rollbacks must be present", + args: args{ + updatedOn: updatedOnNow, + currentAgent: agentInstall{ + parsedVersion: parsed456SNAPSHOT, + version: "4.5.6-SNAPSHOT", + hash: "curagt", + versionedHome: filepath.Join("data", "elastic-agent-4.5.6-SNAPSHOT-curagt"), + }, + previousAgent: agentInstall{ + parsedVersion: parsed123SNAPSHOT, + version: "1.2.3-SNAPSHOT", + hash: "prvagt", + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + }, + action: nil, + details: details.NewDetails("4.5.6-SNAPSHOT", details.StateReplacing, ""), + desiredOutcome: OUTCOME_UPGRADE, + rollbackWindow: 7 * 24 * time.Hour, + }, + wantErr: assert.NoError, + assertAfterMark: func(t *testing.T, dataDir string) { + actualMarker, err := LoadMarker(dataDir) + require.NoError(t, err, "error reading actualMarker content after writing") + + expectedMarker := &UpdateMarker{ + Version: "4.5.6-SNAPSHOT", + Hash: "curagt", + VersionedHome: filepath.Join("data", "elastic-agent-4.5.6-SNAPSHOT-curagt"), + UpdatedOn: updatedOnNow, + PrevVersion: "1.2.3-SNAPSHOT", + PrevHash: "prvagt", + PrevVersionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "4.5.6-SNAPSHOT", + State: "UPG_REPLACING", + ActionID: "", + Metadata: details.Metadata{ + RollbacksAvailable: []details.RollbackAvailable{ + { + Version: "1.2.3-SNAPSHOT", + Home: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + ValidUntil: updatedOnNow.Add(7 * 24 * time.Hour), + }, + }, + }, + }, + DesiredOutcome: OUTCOME_UPGRADE, + } + assert.Equal(t, expectedMarker, actualMarker) + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + dataDir := t.TempDir() + log, _ := loggertest.New(t.Name()) + + if tc.setupBeforeMark != nil { + tc.setupBeforeMark(t, dataDir) + } + + err := markUpgrade(log, dataDir, tc.args.updatedOn, tc.args.currentAgent, tc.args.previousAgent, tc.args.action, tc.args.details, tc.args.desiredOutcome, tc.args.rollbackWindow) + tc.wantErr(t, err) + if tc.assertAfterMark != nil { + tc.assertAfterMark(t, dataDir) + } + }) + } +} diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 931747a6a35..ea969d9de34 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -372,7 +372,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s if u.upgradeSettings != nil && u.upgradeSettings.Rollback != nil { // TODO && target version supports manual rollback and deferred cleanup rollbackWindow = u.upgradeSettings.Rollback.Window } - if err := markUpgrade(u.log, paths.Data(), current, previous, action, det, OUTCOME_UPGRADE, rollbackWindow); err != nil { + if err := markUpgrade(u.log, paths.Data(), time.Now(), current, previous, action, det, OUTCOME_UPGRADE, rollbackWindow); err != nil { u.log.Errorw("Rolling back: marking upgrade failed", "error.message", err) rollbackErr := rollbackInstall(ctx, u.log, paths.Top(), hashedDir, currentVersionedHome) return nil, goerrors.Join(err, rollbackErr) From 903e4bfd45d673fd1c8a66714e7fb8b14a06539b Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 16 Jul 2025 18:56:23 +0200 Subject: [PATCH 11/38] Add tests for takeOverWatcher --- .../filelock/testlocker/.gitignore | 2 + .../application/filelock/testlocker/main.go | 64 ++++++++ .../pkg/agent/application/upgrade/watcher.go | 25 ++- .../agent/application/upgrade/watcher_test.go | 155 ++++++++++++++++++ magefile.go | 1 + pkg/core/process/process.go | 2 + 6 files changed, 240 insertions(+), 9 deletions(-) create mode 100644 internal/pkg/agent/application/filelock/testlocker/.gitignore create mode 100644 internal/pkg/agent/application/filelock/testlocker/main.go diff --git a/internal/pkg/agent/application/filelock/testlocker/.gitignore b/internal/pkg/agent/application/filelock/testlocker/.gitignore new file mode 100644 index 00000000000..1afe2659727 --- /dev/null +++ b/internal/pkg/agent/application/filelock/testlocker/.gitignore @@ -0,0 +1,2 @@ +# Ignore test binary +testlocker \ No newline at end of file diff --git a/internal/pkg/agent/application/filelock/testlocker/main.go b/internal/pkg/agent/application/filelock/testlocker/main.go new file mode 100644 index 00000000000..ed9817e4c58 --- /dev/null +++ b/internal/pkg/agent/application/filelock/testlocker/main.go @@ -0,0 +1,64 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +// This is a simple program that will lock an applocker using a file passed using the -lockfile option, used for testing file lock works properly. +// os.Interrupt or signal.SIGTERM will make the program release the lock and exit +package main + +import ( + "flag" + "log" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" +) + +const AcquiredLockLogFmt = "Acquired lock on file %s\n" + +const lockFileFlagName = "lockfile" +const ignoreSignalFlagName = "ignoresignals" + +var lockFile = flag.String(lockFileFlagName, "", "path to lock file") +var ignoreSignals = flag.Bool(ignoreSignalFlagName, false, "ignore signals") + +func main() { + + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) + + flag.Parse() + if *lockFile == "" { + log.Fatalf("No lockfile specified. Please run %s -%s ", os.Args[0], lockFileFlagName) + } + + appLocker := filelock.NewAppLocker(filepath.Dir(*lockFile), filepath.Base(*lockFile)) + + err := appLocker.TryLock() + if err != nil { + log.Fatalf("Error locking %s: %s", *lockFile, err.Error()) + } + + defer func(aLocker *filelock.AppLocker) { + + if unlockErr := aLocker.Unlock(); unlockErr != nil { + log.Printf("Error unlocking %s: %s", *lockFile, unlockErr.Error()) + } + }(appLocker) + + log.Printf(AcquiredLockLogFmt, *lockFile) + + for { + s := <-signalCh + if *ignoreSignals { + log.Printf("Received signal: %s, ignoring it...", s.String()) + continue + } + + log.Printf("Received signal: %s, exiting", s.String()) + break + } +} diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 1d54669a6fe..22d7fffbf9c 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -28,6 +28,8 @@ const ( statusCheckMissesAllowed = 4 // enable 2 minute start (30 second periods) statusLossesAllowed = 2 // enable connection lost to agent twice statusFailureFlipFlopsAllowed = 3 // no more than three failure flip-flops allowed + + watcherApplockerFileName = "watcher.lock" ) var ( @@ -284,22 +286,26 @@ func (a AgentWatcherHelper) WaitForWatcher(ctx context.Context, log *logger.Logg } func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { - return takeOverWatcher(ctx, log, topDir) + return takeOverWatcher(ctx, log, topDir, utils.GetWatcherPIDs, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) } +// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. +// This will allow for easier testing of takeOverWatcher using fake binaries +type watcherPIDsFetcher func() ([]int, error) + // Private functions providing implementation of AgentWatcherHelper -func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { - takeoverCtx, takeoverCancel := context.WithTimeout(ctx, 30*time.Second) +func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, pidFetchFunc watcherPIDsFetcher, timeout time.Duration, watcherSweepInterval time.Duration, takeOverInterval time.Duration) (*filelock.AppLocker, error) { + takeoverCtx, takeoverCancel := context.WithTimeout(ctx, timeout) defer takeoverCancel() go func() { - killingTicker := time.NewTicker(500 * time.Millisecond) - defer killingTicker.Stop() + sweepTicker := time.NewTicker(watcherSweepInterval) + defer sweepTicker.Stop() for { select { case <-takeoverCtx.Done(): return - case <-killingTicker.C: - pids, err := utils.GetWatcherPIDs() + case <-sweepTicker.C: + pids, err := pidFetchFunc() if err != nil { log.Errorf("error listing watcher processes: %s", err) continue @@ -316,6 +322,7 @@ func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*f killProcErr := process.Terminate(watcherProcess) if killProcErr != nil { log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) + continue } log.Debugf("killed watcher process with PID: %d", pid) } @@ -324,14 +331,14 @@ func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*f }() // we should retry to take over the AppLocker for 30s, but AppLocker interface is limited - takeOverTicker := time.NewTicker(100 * time.Millisecond) + takeOverTicker := time.NewTicker(takeOverInterval) defer takeOverTicker.Stop() for { select { case <-takeoverCtx.Done(): return nil, fmt.Errorf("timed out taking over watcher applocker") case <-takeOverTicker.C: - locker := filelock.NewAppLocker(topDir, "watcher.lock") + locker := filelock.NewAppLocker(topDir, watcherApplockerFileName) err := locker.TryLock() if err != nil { log.Errorf("error locking watcher applocker: %s", err) diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 61a46522a69..5f28cdd05ec 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -10,6 +10,7 @@ import ( "net" "os" "path/filepath" + "runtime" "sync" "testing" "time" @@ -19,11 +20,13 @@ import ( "google.golang.org/grpc" "gopkg.in/yaml.v3" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" + "github.com/elastic/elastic-agent/pkg/core/process" agtversion "github.com/elastic/elastic-agent/pkg/version" ) @@ -866,3 +869,155 @@ func writeState(t *testing.T, path string, state details.State) { assert.NoError(t, err, "error writing out the test upgrade marker") } } + +// TestTakeOverWatcher verifies that takeOverWatcher behaves within expectations. +// This test cannot run in parallel because it deals with launching test processes and verifying their state. +// In case of aggressive PID reuse along with parallel execution, this test could kill "innocent" processes +func TestTakeOverWatcher(t *testing.T) { + testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") + if runtime.GOOS == "windows" { + testExecutablePath += ".exe" + } + testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) + require.NoError(t, err, "error calculating absolute test executable part") + + require.FileExists(t, testExecutableAbsolutePath, + "testlocker binary not found.\n"+ + "Check that:\n"+ + "- test binaries have been built with mage dev:buildtestbinaries\n"+ + "- the path of the executable is correct") + + returnCmdPIDsFetcher := func(cmds ...*process.Info) watcherPIDsFetcher { + return func() ([]int, error) { + pids := make([]int, 0, len(cmds)) + for _, c := range cmds { + if c.Process != nil { + pids = append(pids, c.Process.Pid) + } + } + + return pids, nil + } + } + + type setupFunc func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) + type assertFunc func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) + + testcases := []struct { + name string + setup setupFunc + wantErr assert.ErrorAssertionFunc + assertPostTakeover assertFunc + }{ + { + name: "no contention for watcher applocker", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { + // nothing to do here, always return and empty list of pids + return func() ([]int, error) { + return nil, nil + }, nil + }, + wantErr: assert.NoError, + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, _ []*process.Info) { + assert.NotNil(t, appLocker, "appLocker should not be nil") + assert.FileExists(t, filepath.Join(workdir, watcherApplockerFileName)) + }, + }, + { + name: "contention with test binary listening to signals: test binary is terminated gracefully", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { + cancelFunc, cmd := createTestlockerCommand(t.Context(), t, testExecutableAbsolutePath, workdir, false) + t.Cleanup(cancelFunc) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, watcherApplockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*process.Info{cmd} + }, + wantErr: assert.NoError, + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) { + assert.NotNil(t, appLocker, "appLocker should not be nil") + assert.FileExists(t, filepath.Join(workdir, watcherApplockerFileName)) + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess.Cmd, "test locker process info should have exec.Cmd set") + err = testlockerProcess.Cmd.Wait() + assert.NoError(t, err, "error waiting for testlocker process to terminate") + if assert.NotNil(t, testlockerProcess.Cmd.ProcessState, "test locker process should have completed and process state set") { + assert.True(t, testlockerProcess.Cmd.ProcessState.Success(), "test locker process should be successful") + } + }, + }, + { + name: "contention with test binary not listening to signals: test binary is not terminated and error is returned by takeOverWatcher", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { + cancelFunc, cmd := createTestlockerCommand(t.Context(), t, testExecutableAbsolutePath, workdir, true) + t.Cleanup(cancelFunc) + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, watcherApplockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*process.Info{cmd} + }, + wantErr: assert.Error, + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) { + assert.Nil(t, appLocker, "appLocker should be nil") + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess.Process, "testlocker process should not be nil") + assert.Nil(t, testlockerProcess.Cmd.ProcessState, "testlocker process should not have ProcessState set since it should still be running") + err := testlockerProcess.Process.Kill() + assert.NoError(t, err, "error killing testlocker process") + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + workDir := t.TempDir() + logger, logs := loggertest.New(t.Name()) + pidFetcher, cmds := tc.setup(t, workDir) + + appLocker, err := takeOverWatcher(t.Context(), logger, workDir, pidFetcher, 10*time.Second, 500*time.Millisecond, 100*time.Millisecond) + loggertest.PrintObservedLogs(logs.TakeAll(), t.Log) + + tc.wantErr(t, err) + if appLocker != nil { + defer func(appLocker *filelock.AppLocker) { + unlockErr := appLocker.Unlock() + assert.NoError(t, unlockErr, "error unlocking the app locker") + }(appLocker) + } + if tc.assertPostTakeover != nil { + tc.assertPostTakeover(t, workDir, appLocker, cmds) + } + }) + } + +} + +func createTestlockerCommand(ctx context.Context, t *testing.T, testExecutablePath string, workdir string, ignoreSignals bool) (context.CancelFunc, *process.Info) { + cmdCtx, cmdCancel := context.WithCancel(ctx) + args := []string{"-lockfile", filepath.Join(workdir, watcherApplockerFileName)} + if ignoreSignals { + args = append(args, "-ignoresignals") + } + proc, err := process.Start( + testExecutablePath, + process.WithArgs(args), + process.WithContext(cmdCtx), + ) + require.NoError(t, err, "error starting testlocker binary") + return cmdCancel, proc +} diff --git a/magefile.go b/magefile.go index a3942cd9e53..3e9ef701305 100644 --- a/magefile.go +++ b/magefile.go @@ -414,6 +414,7 @@ func getTestBinariesPath() ([]string, error) { filepath.Join(wd, "internal", "pkg", "agent", "install", "testblocking"), filepath.Join(wd, "pkg", "core", "process", "testsignal"), filepath.Join(wd, "internal", "pkg", "otel", "manager", "testing"), + filepath.Join(wd, "internal", "pkg", "agent", "application", "filelock", "testlocker"), } return testBinaryPkgs, nil } diff --git a/pkg/core/process/process.go b/pkg/core/process/process.go index 8f562dd1985..99c1be9fba5 100644 --- a/pkg/core/process/process.go +++ b/pkg/core/process/process.go @@ -18,6 +18,7 @@ type Info struct { Process *os.Process Stdin io.WriteCloser Stderr io.ReadCloser + Cmd *exec.Cmd } // CmdOption is an option func to change the underlying command @@ -170,6 +171,7 @@ func startContext(ctx context.Context, path string, uid, gid int, args []string, Process: cmd.Process, Stdin: stdin, Stderr: stderr, + Cmd: cmd, }, err } From 25db8f9abbece71fad78c2f6eb00f4063615103f Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 21 Jul 2025 10:23:58 +0200 Subject: [PATCH 12/38] add testlocker binary to sonar exclusions --- sonar-project.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/sonar-project.properties b/sonar-project.properties index d9486a3cd52..cc7181a1d95 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -5,6 +5,7 @@ sonar.sources=. sonar.exclusions=.git/**, dev-tools/**, /magefile.go, changelog/**, \ _meta/**, deploy/**, docs/**, img/**, specs/**, \ */*_test.go, pkg/testing/**, pkg/component/fake/**, testing/**, **/mocks/*.go, \ + internal/pkg/agent/application/filelock/testlocker/**, \ pkg/control/v1/proto/*.pb.go, pkg/control/v2/cproto/*.pb.go sonar.tests=. sonar.test.inclusions=**/*_test.go From cf8defd32606870ce9588a25721177e7cc515c80 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 25 Jul 2025 11:05:33 +0200 Subject: [PATCH 13/38] disable rollback window by default --- _meta/config/common.reference.p2.yml.tmpl | 2 +- elastic-agent.reference.yml | 2 +- internal/pkg/agent/configuration/upgrade.go | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/_meta/config/common.reference.p2.yml.tmpl b/_meta/config/common.reference.p2.yml.tmpl index 8429db51431..29ef3cfb9d8 100644 --- a/_meta/config/common.reference.p2.yml.tmpl +++ b/_meta/config/common.reference.p2.yml.tmpl @@ -123,7 +123,7 @@ inputs: # # rollback settings # rollback: # # duration in which an upgraded Agent may be manually rolled back. -# window: 168h +# window: 0 # agent.process: # # timeout for creating new processes. when process is not successfully created by this timeout diff --git a/elastic-agent.reference.yml b/elastic-agent.reference.yml index 41a4eb36150..7bc3a553867 100644 --- a/elastic-agent.reference.yml +++ b/elastic-agent.reference.yml @@ -129,7 +129,7 @@ inputs: # # rollback settings # rollback: # # duration in which an upgraded Agent may be manually rolled back. -# window: 168h +# window: 0 # agent.process: # # timeout for creating new processes. when process is not successfully created by this timeout diff --git a/internal/pkg/agent/configuration/upgrade.go b/internal/pkg/agent/configuration/upgrade.go index 405b405ec46..bd79fd3f94c 100644 --- a/internal/pkg/agent/configuration/upgrade.go +++ b/internal/pkg/agent/configuration/upgrade.go @@ -15,7 +15,9 @@ const ( // period during which an upgraded Agent can be asked to rollback to the previous // Agent version on disk. - defaultRollbackWindowDuration = 7 * 24 * time.Hour // 7 days + // this is temporarily set to 0 to disable the rollback window until manual rollback functionality is complete. + // defaultRollbackWindowDuration = 7 * 24 * time.Hour // 7 days + defaultRollbackWindowDuration = 0 ) // UpgradeConfig is the configuration related to Agent upgrades. From d0017d438eeb70382d8d5b39356a838bfd930e10 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 28 Jul 2025 10:13:41 +0200 Subject: [PATCH 14/38] Add formal checks to manual rollback arguments --- .../pkg/agent/application/upgrade/upgrade.go | 115 +++++++++++------ .../agent/application/upgrade/upgrade_test.go | 122 +++++++++++++++++- 2 files changed, 195 insertions(+), 42 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index ea969d9de34..58e9e1f9542 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -57,11 +57,13 @@ var agentArtifact = artifact.Artifact{ } var ( - ErrWatcherNotStarted = errors.New("watcher did not start in time") - ErrUpgradeSameVersion = errors.New("upgrade did not occur because it is the same version") - ErrNonFipsToFips = errors.New("cannot switch to fips mode when upgrading") - ErrFipsToNonFips = errors.New("cannot switch to non-fips mode when upgrading") - ErrNilUpdateMarker = errors.New("loaded a nil update marker") + ErrWatcherNotStarted = errors.New("watcher did not start in time") + ErrUpgradeSameVersion = errors.New("upgrade did not occur because it is the same version") + ErrNonFipsToFips = errors.New("cannot switch to fips mode when upgrading") + ErrFipsToNonFips = errors.New("cannot switch to non-fips mode when upgrading") + ErrNilUpdateMarker = errors.New("loaded a nil update marker") + ErrEmptyRollbackVersion = errors.New("rollback version is empty") + ErrNoRollbacksAvailable = errors.New("no rollbacks available") ) func init() { @@ -222,7 +224,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { if rollback { - return u.forceRollbackToPreviousVersion(ctx, paths.Top(), version, action, det) + return u.forceRollbackToPreviousVersion(ctx, paths.Top(), time.Now(), version, action) } u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) @@ -406,7 +408,11 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } -func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) { +func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) { + if version == "" { + return nil, ErrEmptyRollbackVersion + } + // check that the upgrade marker exists and is accessible updateMarkerPath := markerFilePath(paths.DataFrom(topDir)) _, err := os.Stat(updateMarkerPath) @@ -419,18 +425,53 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st // 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating) // 3. upgrade marker exists // these should be revalidated after taking over watcher - updateMarker, err := u.persistManualRollback(ctx, topDir) - if err != nil { - return nil, fmt.Errorf("persisting rollback in update marker: %w", err) - } - previous, current, err := extractAgentInstallsFromMarker(updateMarker) + watcherExecutable := "" + err = withTakeOverWatcher(ctx, u.log, topDir, u.watcherHelper, func() error { + // read the upgrade marker + updateMarker, err := LoadMarker(paths.DataFrom(topDir)) + if err != nil { + return fmt.Errorf("loading marker: %w", err) + } + + if updateMarker == nil { + return ErrNilUpdateMarker + } + + if updateMarker.Details == nil || len(updateMarker.Details.Metadata.RollbacksAvailable) == 0 { + return ErrNoRollbacksAvailable + } + var selectedRollback *details.RollbackAvailable + for _, rollback := range updateMarker.Details.Metadata.RollbacksAvailable { + if rollback.Version == version && now.Before(rollback.ValidUntil) { + selectedRollback = &rollback + break + } + } + if selectedRollback == nil { + return fmt.Errorf("version %q not listed among the available rollbacks: %w", version, ErrNoRollbacksAvailable) + } + + // write the desired outcome of the upgrade + err = u.persistManualRollback(topDir, updateMarker) + if err != nil { + return fmt.Errorf("persisting rollback in update marker: %w", err) + } + + // extract the agent installs involved in the upgrade and select the most appropriate watcher executable + previous, current, err := extractAgentInstallsFromMarker(updateMarker) + if err != nil { + return fmt.Errorf("extracting current and previous install details: %w", err) + } + watcherExecutable = u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) + return nil + }) + if err != nil { - return nil, fmt.Errorf("extracting current and previous install details: %w", err) + return nil, err } - // Invoke watcher again - watcherExecutable := u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) + // Invoke watcher again (now that we released the watcher applocks) _, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) if err != nil { return nil, fmt.Errorf("invoking watcher: %w", err) @@ -440,6 +481,21 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st } +func withTakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, watcherHelper WatcherHelper, f func() error) error { + watcherApplock, err := watcherHelper.TakeOverWatcher(ctx, log, topDir) + if err != nil { + return fmt.Errorf("taking over watcher processes: %w", err) + } + defer func(watcherApplock *filelock.AppLocker) { + releaseWatcherAppLockerErr := watcherApplock.Unlock() + if releaseWatcherAppLockerErr != nil { + log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr) + } + }(watcherApplock) + + return f() +} + func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentInstall, current agentInstall, err error) { previousParsedVersion, err := agtversion.ParseVersion(updateMarker.PrevVersion) if err != nil { @@ -466,35 +522,14 @@ func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentI return previous, current, nil } -func (u *Upgrader) persistManualRollback(ctx context.Context, topDir string) (*UpdateMarker, error) { - watcherApplock, err := u.watcherHelper.TakeOverWatcher(ctx, u.log, topDir) - if err != nil { - return nil, fmt.Errorf("taking over watcher processes: %w", err) - } - defer func(watcherApplock *filelock.AppLocker) { - releaseWatcherAppLockerErr := watcherApplock.Unlock() - if releaseWatcherAppLockerErr != nil { - u.log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr) - } - }(watcherApplock) - - // read the upgrade marker - updateMarker, err := LoadMarker(paths.DataFrom(topDir)) - if err != nil { - return nil, fmt.Errorf("loading marker: %w", err) - } - - if updateMarker == nil { - return nil, ErrNilUpdateMarker - } - +func (u *Upgrader) persistManualRollback(topDir string, updateMarker *UpdateMarker) error { updateMarker.DesiredOutcome = OUTCOME_ROLLBACK - err = SaveMarker(paths.DataFrom(topDir), updateMarker, true) + err := SaveMarker(paths.DataFrom(topDir), updateMarker, true) if err != nil { - return updateMarker, fmt.Errorf("saving marker: %w", err) + return fmt.Errorf("saving marker: %w", err) } - return updateMarker, nil + return nil } // Ack acks last upgrade action diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index 8b62e757b6a..a8fa35ee37f 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -1038,6 +1038,23 @@ func TestIsSameReleaseVersion(t *testing.T) { } func TestManualRollback(t *testing.T) { + const updatemarkerwatching456NoRollbackAvailable = ` + version: 4.5.6 + hash: newver + versioned_home: data/elastic-agent-4.5.6-newver + updated_on: 2025-07-11T10:11:12.131415Z + prev_version: 1.2.3 + prev_hash: oldver + prev_versioned_home: data/elastic-agent-1.2.3-oldver + acked: false + action: null + details: + target_version: 4.5.6 + state: UPG_WATCHING + metadata: + retry_until: null + desired_outcome: UPGRADE + ` const updatemarkerwatching456 = ` version: 4.5.6 hash: newver @@ -1059,6 +1076,7 @@ func TestManualRollback(t *testing.T) { valid_until: 2025-07-18T10:11:12.131415Z desired_outcome: UPGRADE ` + parsed123Version, err := agtversion.ParseVersion("1.2.3") require.NoError(t, err) parsed456Version, err := agtversion.ParseVersion("4.5.6") @@ -1078,6 +1096,13 @@ func TestManualRollback(t *testing.T) { versionedHome: "data/elastic-agent-4.5.6-newver", } + // this is the updated_on timestamp in the example + nowBeforeTTL, err := time.Parse(time.RFC3339, `2025-07-11T10:11:12Z`) + require.NoError(t, err, "error parsing nowBeforeTTL") + + // the update marker yaml assume 7d TLL for rollbacks, let's make an extra day pass + nowAfterTTL := nowBeforeTTL.Add(8 * 24 * time.Hour) + type setupF func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) type postRollbackAssertionsF func(t *testing.T, topDir string) type testcase struct { @@ -1085,12 +1110,26 @@ func TestManualRollback(t *testing.T) { setup setupF artifactSettings *artifact.Config upgradeSettings *configuration.UpgradeConfig + now time.Time version string wantErr assert.ErrorAssertionFunc additionalAsserts postRollbackAssertionsF } testcases := []testcase{ + { + name: "no rollback version - rollback fails", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + //do not setup anything here, let the rollback fail + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrEmptyRollbackVersion) + }, + additionalAsserts: nil, + }, { name: "no update marker - rollback fails", setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { @@ -1104,6 +1143,85 @@ func TestManualRollback(t *testing.T) { }, additionalAsserts: nil, }, + { + name: "update marker ok but rollback available is empty - error", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456NoRollbackAvailable), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "2.3.4-unknown", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456NoRollbackAvailable, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok but version is not available for rollback - error", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "2.3.4-unknown", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") + }, + }, + { + name: "update marker ok but rollback is expired - error", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + now: nowAfterTTL, + version: "1.2.3", + wantErr: func(t assert.TestingT, err error, i ...interface{}) bool { + return assert.ErrorIs(t, err, ErrNoRollbacksAvailable) + }, + additionalAsserts: func(t *testing.T, topDir string) { + // marker should be untouched + filePath := markerFilePath(paths.DataFrom(topDir)) + require.FileExists(t, filePath) + markerFileBytes, readMarkerErr := os.ReadFile(filePath) + require.NoError(t, readMarkerErr) + + assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched") + }, + }, { name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher", setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { @@ -1119,6 +1237,7 @@ func TestManualRollback(t *testing.T) { }, artifactSettings: artifact.DefaultConfig(), upgradeSettings: configuration.DefaultUpgradeConfig(), + now: nowBeforeTTL, version: "1.2.3", wantErr: assert.NoError, additionalAsserts: func(t *testing.T, topDir string) { @@ -1148,8 +1267,7 @@ func TestManualRollback(t *testing.T) { upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper) require.NoError(t, err, "error instantiating upgrader") - - _, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.version, nil, nil) + _, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil) tc.wantErr(t, err, "unexpected error returned by forceRollbackToPreviousVersion()") if tc.additionalAsserts != nil { tc.additionalAsserts(t, topDir) From c4a242ac394cebc79277cd76b7eb1a327176b3b5 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 28 Jul 2025 16:31:21 +0200 Subject: [PATCH 15/38] rename forceRollbackToPreviousVersion --- internal/pkg/agent/application/upgrade/upgrade.go | 10 ++-------- internal/pkg/agent/application/upgrade/upgrade_test.go | 4 ++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 58e9e1f9542..c5ede342690 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -224,7 +224,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) { if rollback { - return u.forceRollbackToPreviousVersion(ctx, paths.Top(), time.Now(), version, action) + return u.rollbackToPreviousVersion(ctx, paths.Top(), time.Now(), version, action) } u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI) @@ -408,7 +408,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s return cb, nil } -func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) { +func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) { if version == "" { return nil, ErrEmptyRollbackVersion } @@ -420,12 +420,6 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st return nil, fmt.Errorf("stat() on upgrade marker %q failed: %w", updateMarkerPath, err) } - // TODO Formal checks for verifying we can rollback properly: - // 1. d.Metadata.RollbacksAvailable should contain the desired version with a valid TTL (it may need to be written by main agent process before starting watcher) - // 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating) - // 3. upgrade marker exists - // these should be revalidated after taking over watcher - watcherExecutable := "" err = withTakeOverWatcher(ctx, u.log, topDir, u.watcherHelper, func() error { // read the upgrade marker diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index a8fa35ee37f..0f3027f36b9 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -1267,8 +1267,8 @@ func TestManualRollback(t *testing.T) { upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper) require.NoError(t, err, "error instantiating upgrader") - _, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil) - tc.wantErr(t, err, "unexpected error returned by forceRollbackToPreviousVersion()") + _, err = upgrader.rollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil) + tc.wantErr(t, err, "unexpected error returned by rollbackToPreviousVersion()") if tc.additionalAsserts != nil { tc.additionalAsserts(t, topDir) } From e37bc9141f2454e083826d5c59c2c20b70ddc7d7 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 28 Jul 2025 17:58:50 +0200 Subject: [PATCH 16/38] test watchloop --- internal/pkg/agent/cmd/watch.go | 48 --------------- internal/pkg/agent/cmd/watch_impl.go | 50 ++++++++++++++++ internal/pkg/agent/cmd/watch_impl_test.go | 73 +++++++++++++++++++++++ 3 files changed, 123 insertions(+), 48 deletions(-) create mode 100644 internal/pkg/agent/cmd/watch_impl_test.go diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 0d28354a780..d25d56614df 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -8,9 +8,7 @@ import ( "context" "fmt" "os" - "os/signal" "runtime" - "syscall" "time" "github.com/spf13/cobra" @@ -211,52 +209,6 @@ func isWindows() bool { return runtime.GOOS == "windows" } -func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logger.Logger) error { - errChan := make(chan error) - - ctx, cancel := context.WithCancel(ctx) - - //cleanup - defer func() { - cancel() - close(errChan) - }() - - agentWatcher := upgrade.NewAgentWatcher(errChan, log, errorCheckInterval) - go agentWatcher.Run(ctx) - - signals := make(chan os.Signal, 1) - signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) - - t := time.NewTimer(tilGrace) - defer t.Stop() - -WATCHLOOP: - for { - select { - case s := <-signals: - log.Infof("received signal: (%d): %v during watch", s, s) - if s == syscall.SIGINT || s == syscall.SIGTERM { - log.Infof("received signal: (%d): %v. Exiting watch", s, s) - return ErrWatchCancelled - } - continue - case <-ctx.Done(): - break WATCHLOOP - // grace period passed, agent is considered stable - case <-t.C: - log.Info("Grace period passed, not watching") - break WATCHLOOP - // Agent in degraded state. - case err := <-errChan: - log.Errorf("Agent Error detected: %s", err.Error()) - return err - } - } - - return nil -} - // gracePeriod returns true if it is within grace period and time until grace period ends. // otherwise it returns false and 0 func gracePeriod(marker *upgrade.UpdateMarker, gracePeriodDuration time.Duration) (bool, time.Duration) { diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index 92e3118435c..2a198480209 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -6,6 +6,9 @@ package cmd import ( "context" + "os" + "os/signal" + "syscall" "time" "github.com/elastic/elastic-agent-libs/logp" @@ -29,3 +32,50 @@ func (a upgradeInstallationModifier) Cleanup(log *logger.Logger, topDirPath, cur func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string) error { return upgrade.Rollback(ctx, log, c, topDirPath, prevVersionedHome, prevHash) } + +func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logger.Logger) error { + errChan := make(chan error) + + ctx, cancel := context.WithCancel(ctx) + + //cleanup + defer func() { + cancel() + close(errChan) + }() + + agtWatcher := upgrade.NewAgentWatcher(errChan, log, errorCheckInterval) + go agtWatcher.Run(ctx) + + signals := make(chan os.Signal, 1) + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) + + graceTimer := time.NewTimer(tilGrace) + defer graceTimer.Stop() + + return watchLoop(ctx, log, signals, errChan, graceTimer.C) +} + +func watchLoop(ctx context.Context, log *logger.Logger, signals <-chan os.Signal, errChan <-chan error, graceTimer <-chan time.Time) error { + for { + select { + case s := <-signals: + log.Infof("received signal: (%d): %v during watch", s, s) + if s == syscall.SIGINT || s == syscall.SIGTERM { + log.Infof("received signal: (%d): %v. Exiting watch", s, s) + return ErrWatchCancelled + } + continue + case <-ctx.Done(): + return nil + // grace period passed, agent is considered stable + case <-graceTimer: + log.Info("Grace period passed, not watching") + return nil + // Agent in degraded state. + case err := <-errChan: + log.Errorf("Agent Error detected: %s", err.Error()) + return err + } + } +} diff --git a/internal/pkg/agent/cmd/watch_impl_test.go b/internal/pkg/agent/cmd/watch_impl_test.go new file mode 100644 index 00000000000..d9537b58c92 --- /dev/null +++ b/internal/pkg/agent/cmd/watch_impl_test.go @@ -0,0 +1,73 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package cmd + +import ( + "context" + "fmt" + "os" + "syscall" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" +) + +func Test_watchLoop(t *testing.T) { + + t.Run("watchloop returns when context expires - no error", func(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond) + defer cancel() + log, _ := loggertest.New(t.Name()) + signals := make(chan os.Signal, 1) + errChan := make(chan error, 1) + graceTimer := make(chan time.Time, 1) + err := watchLoop(ctx, log, signals, errChan, graceTimer) + require.NoError(t, err) + }) + + t.Run("watchloop returns when grace timer triggers - no error", func(t *testing.T) { + log, _ := loggertest.New(t.Name()) + signals := make(chan os.Signal, 1) + errChan := make(chan error, 1) + graceTimer := make(chan time.Time, 1) + graceTimer <- time.Now() + err := watchLoop(t.Context(), log, signals, errChan, graceTimer) + require.NoError(t, err) + }) + + t.Run("watchloop returns when error from AgentWatcher is received - error", func(t *testing.T) { + log, _ := loggertest.New(t.Name()) + signals := make(chan os.Signal, 1) + errChan := make(chan error, 1) + graceTimer := make(chan time.Time, 1) + agentWatcherError := fmt.Errorf("some error") + errChan <- agentWatcherError + err := watchLoop(t.Context(), log, signals, errChan, graceTimer) + require.ErrorIs(t, err, agentWatcherError) + }) + + t.Run("watchloop returns when receiving signals - error", func(t *testing.T) { + testSignals := []syscall.Signal{ + syscall.SIGTERM, + syscall.SIGINT, + } + + for _, signal := range testSignals { + t.Run(signal.String(), func(t *testing.T) { + log, _ := loggertest.New(t.Name()) + signals := make(chan os.Signal, 1) + errChan := make(chan error, 1) + graceTimer := make(chan time.Time, 1) + signals <- signal + err := watchLoop(t.Context(), log, signals, errChan, graceTimer) + assert.ErrorIs(t, err, ErrWatchCancelled) + }) + } + }) +} From 3bd7b1015deb844fef00d8d3f4ef78da6d2fcc3c Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 1 Aug 2025 14:07:00 +0200 Subject: [PATCH 17/38] Re-invoke watcher after takeover --- .../pkg/agent/application/upgrade/upgrade.go | 36 ++++++++++++------- .../agent/application/upgrade/upgrade_test.go | 29 +++++++++++++++ 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index c5ede342690..fb4ccc2c866 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -420,10 +420,26 @@ func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, return nil, fmt.Errorf("stat() on upgrade marker %q failed: %w", updateMarkerPath, err) } - watcherExecutable := "" + // read the upgrade marker + updateMarker, err := LoadMarker(paths.DataFrom(topDir)) + if err != nil { + return nil, fmt.Errorf("loading marker: %w", err) + } + + if updateMarker == nil { + return nil, ErrNilUpdateMarker + } + + // extract the agent installs involved in the upgrade and select the most appropriate watcher executable + previous, current, err := extractAgentInstallsFromMarker(updateMarker) + if err != nil { + return nil, fmt.Errorf("extracting current and previous install details: %w", err) + } + watcherExecutable := u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) + err = withTakeOverWatcher(ctx, u.log, topDir, u.watcherHelper, func() error { // read the upgrade marker - updateMarker, err := LoadMarker(paths.DataFrom(topDir)) + updateMarker, err = LoadMarker(paths.DataFrom(topDir)) if err != nil { return fmt.Errorf("loading marker: %w", err) } @@ -452,23 +468,17 @@ func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, return fmt.Errorf("persisting rollback in update marker: %w", err) } - // extract the agent installs involved in the upgrade and select the most appropriate watcher executable - previous, current, err := extractAgentInstallsFromMarker(updateMarker) - if err != nil { - return fmt.Errorf("extracting current and previous install details: %w", err) - } - watcherExecutable = u.watcherHelper.SelectWatcherExecutable(topDir, previous, current) return nil }) - if err != nil { - return nil, err + // Invoke watcher again (now that we released the watcher applocks) + _, invokeWatcherErr := u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) + if invokeWatcherErr != nil { + return nil, goerrors.Join(err, fmt.Errorf("invoking watcher: %w", invokeWatcherErr)) } - // Invoke watcher again (now that we released the watcher applocks) - _, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable) if err != nil { - return nil, fmt.Errorf("invoking watcher: %w", err) + return nil, err } return nil, nil diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index 0f3027f36b9..9d6ec22f1c1 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -1143,6 +1143,26 @@ func TestManualRollback(t *testing.T) { }, additionalAsserts: nil, }, + { + name: "update marker is malformed - rollback fails", + setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { + err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte("this is not a proper YAML file"), 0600) + require.NoError(t, err, "error setting up update marker") + locker := filelock.NewAppLocker(topDir, "watcher.lock") + err = locker.TryLock() + require.NoError(t, err, "error locking initial watcher AppLocker") + // there's no takeover watcher so no expectation on that or InvokeWatcher + t.Cleanup(func() { + unlockErr := locker.Unlock() + assert.NoError(t, unlockErr, "error unlocking initial watcher AppLocker") + }) + }, + artifactSettings: artifact.DefaultConfig(), + upgradeSettings: configuration.DefaultUpgradeConfig(), + version: "1.2.3", + wantErr: assert.Error, + additionalAsserts: nil, + }, { name: "update marker ok but rollback available is empty - error", setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) { @@ -1152,6 +1172,9 @@ func TestManualRollback(t *testing.T) { err = locker.TryLock() require.NoError(t, err, "error locking initial watcher AppLocker") watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) }, artifactSettings: artifact.DefaultConfig(), upgradeSettings: configuration.DefaultUpgradeConfig(), @@ -1178,6 +1201,9 @@ func TestManualRollback(t *testing.T) { err = locker.TryLock() require.NoError(t, err, "error locking initial watcher AppLocker") watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) }, artifactSettings: artifact.DefaultConfig(), upgradeSettings: configuration.DefaultUpgradeConfig(), @@ -1204,6 +1230,9 @@ func TestManualRollback(t *testing.T) { err = locker.TryLock() require.NoError(t, err, "error locking initial watcher AppLocker") watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil) + newerWatcherExecutable := filepath.Join(topDir, "data", "elastic-agent-4.5.6-newver", "elastic-agent") + watcherHelper.EXPECT().SelectWatcherExecutable(topDir, agentInstall123, agentInstall456).Return(newerWatcherExecutable) + watcherHelper.EXPECT().InvokeWatcher(mock.Anything, newerWatcherExecutable).Return(&exec.Cmd{Path: newerWatcherExecutable, Args: []string{"watch", "for realsies"}}, nil) }, artifactSettings: artifact.DefaultConfig(), upgradeSettings: configuration.DefaultUpgradeConfig(), From 6037333a18c98054988fb37776231e8294d6e545 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 1 Aug 2025 15:38:31 +0200 Subject: [PATCH 18/38] Add minimum version check for creating rollbacks entries in update marker --- .../agent/application/upgrade/step_mark.go | 5 +- .../application/upgrade/step_mark_test.go | 50 ++++++++++++++++++- .../pkg/agent/application/upgrade/upgrade.go | 5 +- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index 9c87ef75fa2..c376b334712 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -216,9 +216,10 @@ func markUpgrade(log *logger.Logger, dataDirPath string, updatedOn time.Time, ag DesiredOutcome: desiredOutcome, } - if rollbackWindow > 0 { - + if rollbackWindow > 0 && agent.parsedVersion != nil && !agent.parsedVersion.Less(*Version_9_2_0_SNAPSHOT) { // if we have a not empty rollback window, write the prev version in the rollbacks_available field + // we also need to check the destination version because the manual rollback and delayed cleanup will be + // handled by that version of agent, so it needs to be recent enough upgradeDetails.Metadata.RollbacksAvailable = []details.RollbackAvailable{ { Version: previousAgent.version, diff --git a/internal/pkg/agent/application/upgrade/step_mark_test.go b/internal/pkg/agent/application/upgrade/step_mark_test.go index d3c975f749d..abe3bac6c1a 100644 --- a/internal/pkg/agent/application/upgrade/step_mark_test.go +++ b/internal/pkg/agent/application/upgrade/step_mark_test.go @@ -268,7 +268,7 @@ desired_outcome: true func TestMarkUpgrade(t *testing.T) { var parsed123SNAPSHOT = agtversion.NewParsedSemVer(1, 2, 3, "SNAPSHOT", "") var parsed456SNAPSHOT = agtversion.NewParsedSemVer(4, 5, 6, "SNAPSHOT", "") - + var parsed920SNAPSHOT = agtversion.NewParsedSemVer(9, 2, 0, "SNAPSHOT", "") // fix a timestamp (truncated to the second because of loss of precision during marshalling/unmarshalling) updatedOnNow := time.Now().UTC().Truncate(time.Second) @@ -371,7 +371,7 @@ func TestMarkUpgrade(t *testing.T) { }, }, { - name: "rollback window specified - available rollbacks must be present", + name: "rollback window specified but new version is too low - no rollbacks", args: args{ updatedOn: updatedOnNow, currentAgent: agentInstall{ @@ -410,6 +410,52 @@ func TestMarkUpgrade(t *testing.T) { TargetVersion: "4.5.6-SNAPSHOT", State: "UPG_REPLACING", ActionID: "", + }, + DesiredOutcome: OUTCOME_UPGRADE, + } + assert.Equal(t, expectedMarker, actualMarker) + }, + }, + { + name: "rollback window specified and new version is at least 9.2.0-SNAPSHOT - available rollbacks must be present", + args: args{ + updatedOn: updatedOnNow, + currentAgent: agentInstall{ + parsedVersion: parsed920SNAPSHOT, + version: "9.2.0-SNAPSHOT", + hash: "newagt", + versionedHome: filepath.Join("data", "elastic-agent-9.2.0-SNAPSHOT-newagt"), + }, + previousAgent: agentInstall{ + parsedVersion: parsed123SNAPSHOT, + version: "1.2.3-SNAPSHOT", + hash: "prvagt", + versionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + }, + action: nil, + details: details.NewDetails("9.2.0-SNAPSHOT", details.StateReplacing, ""), + desiredOutcome: OUTCOME_UPGRADE, + rollbackWindow: 7 * 24 * time.Hour, + }, + wantErr: assert.NoError, + assertAfterMark: func(t *testing.T, dataDir string) { + actualMarker, err := LoadMarker(dataDir) + require.NoError(t, err, "error reading actualMarker content after writing") + + expectedMarker := &UpdateMarker{ + Version: "9.2.0-SNAPSHOT", + Hash: "newagt", + VersionedHome: filepath.Join("data", "elastic-agent-9.2.0-SNAPSHOT-newagt"), + UpdatedOn: updatedOnNow, + PrevVersion: "1.2.3-SNAPSHOT", + PrevHash: "prvagt", + PrevVersionedHome: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + Acked: false, + Action: nil, + Details: &details.Details{ + TargetVersion: "9.2.0-SNAPSHOT", + State: "UPG_REPLACING", + ActionID: "", Metadata: details.Metadata{ RollbacksAvailable: []details.RollbackAvailable{ { diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index fb4ccc2c866..00c91440457 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -64,6 +64,9 @@ var ( ErrNilUpdateMarker = errors.New("loaded a nil update marker") ErrEmptyRollbackVersion = errors.New("rollback version is empty") ErrNoRollbacksAvailable = errors.New("no rollbacks available") + + // Version_9_2_0_SNAPSHOT is the minimum version for manual rollback and rollback reason + Version_9_2_0_SNAPSHOT = agtversion.NewParsedSemVer(9, 2, 0, "SNAPSHOT", "") ) func init() { @@ -371,7 +374,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s versionedHome: currentVersionedHome, } rollbackWindow := time.Duration(0) - if u.upgradeSettings != nil && u.upgradeSettings.Rollback != nil { // TODO && target version supports manual rollback and deferred cleanup + if u.upgradeSettings != nil && u.upgradeSettings.Rollback != nil { rollbackWindow = u.upgradeSettings.Rollback.Window } if err := markUpgrade(u.log, paths.Data(), time.Now(), current, previous, action, det, OUTCOME_UPGRADE, rollbackWindow); err != nil { From ba53d11b2dce256149a62beb71622467a1925e09 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Sat, 2 Aug 2025 10:23:44 +0200 Subject: [PATCH 19/38] Add manual rollback integration test --- testing/integration/ess/repackage.go | 21 +-- .../integration/ess/upgrade_rollback_test.go | 135 ++++++++++++++++-- .../upgrade_standalone_same_commit_test.go | 13 +- 3 files changed, 138 insertions(+), 31 deletions(-) diff --git a/testing/integration/ess/repackage.go b/testing/integration/ess/repackage.go index 3e21c36f8f7..c042139e0c2 100644 --- a/testing/integration/ess/repackage.go +++ b/testing/integration/ess/repackage.go @@ -11,7 +11,6 @@ import ( "archive/zip" "bytes" "compress/gzip" - "context" "errors" "io" "os" @@ -25,19 +24,13 @@ import ( "github.com/elastic/elastic-agent/dev-tools/mage" v1 "github.com/elastic/elastic-agent/pkg/api/v1" - atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/version" agtversion "github.com/elastic/elastic-agent/version" ) -func repackageArchive(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, newVersionBuildMetadata string, currentVersion *version.ParsedSemVer, newPackageContainingDir string, parsedNewVersion *version.ParsedSemVer) (*version.ParsedSemVer, error) { - err := startFixture.EnsurePrepared(ctx) - require.NoErrorf(t, err, "fixture should be prepared") - - // retrieve the compressed package file location - srcPackage, err := startFixture.SrcPackage(ctx) - require.NoErrorf(t, err, "error retrieving start fixture source package") - +// repackageArchive will take a srcPackage elastic-agent package and create a modified copy that will present parsedNewVersion +// in package version file, manifest and relevant metadata. +func repackageArchive(t *testing.T, srcPackage string, newVersionBuildMetadata string, currentVersion *version.ParsedSemVer, parsedNewVersion *version.ParsedSemVer) (*version.ParsedSemVer, string, error) { originalPackageFileName := filepath.Base(srcPackage) // integration test fixtures and package names treat the version as a string including the "-SNAPSHOT" suffix @@ -54,8 +47,8 @@ func repackageArchive(ctx context.Context, t *testing.T, startFixture *atesting. // calculate the new package name newPackageFileName := strings.Replace(originalPackageFileName, currentVersion.String(), versionForFixture.String(), 1) t.Logf("originalPackageName: %q newPackageFileName: %q", originalPackageFileName, newPackageFileName) - - newPackageAbsPath := filepath.Join(newPackageContainingDir, newPackageFileName) + outDir := t.TempDir() + newPackageAbsPath := filepath.Join(outDir, newPackageFileName) // hack the package based on type ext := filepath.Ext(originalPackageFileName) @@ -76,9 +69,9 @@ func repackageArchive(ctx context.Context, t *testing.T, startFixture *atesting. } // Create hash file for the new package - err = mage.CreateSHA512File(newPackageAbsPath) + err := mage.CreateSHA512File(newPackageAbsPath) require.NoErrorf(t, err, "error creating .sha512 for file %q", newPackageAbsPath) - return versionForFixture, err + return versionForFixture, newPackageAbsPath, err } func repackageTarArchive(t *testing.T, srcPackagePath string, newPackagePath string, newVersion *version.ParsedSemVer) { diff --git a/testing/integration/ess/upgrade_rollback_test.go b/testing/integration/ess/upgrade_rollback_test.go index 6207639c455..e6e7045402b 100644 --- a/testing/integration/ess/upgrade_rollback_test.go +++ b/testing/integration/ess/upgrade_rollback_test.go @@ -10,6 +10,7 @@ import ( "context" "errors" "fmt" + "path/filepath" "runtime" "strings" "testing" @@ -24,6 +25,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/agent/install" + "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" @@ -39,6 +41,15 @@ agent.upgrade.watcher: error_check.interval: 5s ` +const fastWatcherCfgWithRollbackWindow = ` +agent.upgrade: + watcher: + grace_period: 2m + error_check.interval: 5s + rollback: + window: 10m +` + // TestStandaloneUpgradeRollback tests the scenario where upgrading to a new version // of Agent fails due to the new Agent binary reporting an unhealthy status. It checks // that the Agent is rolled back to the previous version. @@ -232,8 +243,6 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) { require.NoError(t, err) // Create a new package with a different version (IAR-style) - newPackageContainingDir := t.TempDir() - // modify the version with the "+buildYYYYMMDDHHMMSS" currentVersion, err := version.ParseVersion(define.Version()) require.NoErrorf(t, err, "define.Version() %q is not parsable.", define.Version()) @@ -241,12 +250,19 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) { newVersionBuildMetadata := "build" + time.Now().Format("20060102150405") parsedNewVersion := version.NewParsedSemVer(currentVersion.Major(), currentVersion.Minor(), currentVersion.Patch(), "", newVersionBuildMetadata) - versionForFixture, err := repackageArchive(t.Context(), t, fromFixture, newVersionBuildMetadata, currentVersion, newPackageContainingDir, parsedNewVersion) + err = fromFixture.EnsurePrepared(t.Context()) + require.NoErrorf(t, err, "fixture should be prepared") + + // retrieve the compressed package file location + srcPackage, err := fromFixture.SrcPackage(t.Context()) + require.NoErrorf(t, err, "error retrieving start fixture source package") + + versionForFixture, repackagedArchiveFile, err := repackageArchive(t, srcPackage, newVersionBuildMetadata, currentVersion, parsedNewVersion) require.NoError(t, err, "error repackaging the archive built from the same commit") // I wish I could just pass the location of the package on disk to the whole upgrade tests/fixture/fetcher code // but I would have to break too much code for that, when in Rome... add more code on top of inflexible code - repackagedLocalFetcher := atesting.LocalFetcher(newPackageContainingDir) + repackagedLocalFetcher := atesting.LocalFetcher(filepath.Dir(repackagedArchiveFile)) toFixture, err := atesting.NewFixture( t, versionForFixture.String(), @@ -323,6 +339,79 @@ func TestFleetManagedUpgradeRollbackOnRestarts(t *testing.T) { } } +// TestStandaloneUpgradeManualRollback tests the scenario where, after upgrading to a new version +// of Agent, a manual rollback is triggered. It checks that the Agent is rolled back to the previous version. +func TestStandaloneUpgradeManualRollback(t *testing.T) { + define.Require(t, define.Requirements{ + Group: integration.Upgrade, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + type fixturesSetupFunc func(t *testing.T) (from *atesting.Fixture, to *atesting.Fixture) + testcases := []struct { + name string + fixturesSetup fixturesSetupFunc + }{ + { + name: "upgrade to a repackaged agent built from the same commit", + fixturesSetup: func(t *testing.T) (from *atesting.Fixture, to *atesting.Fixture) { + // Upgrade from the current build to the same build as Independent Agent Release. + + // Start from the build under test. + fromFixture, err := define.NewFixtureFromLocalBuild(t, define.Version()) + require.NoError(t, err) + + // Create a new package with a different version (IAR-style) + // modify the version with the "+buildYYYYMMDDHHMMSS" + currentVersion, err := version.ParseVersion(define.Version()) + require.NoErrorf(t, err, "define.Version() %q is not parsable.", define.Version()) + + newVersionBuildMetadata := "build" + time.Now().Format("20060102150405") + parsedNewVersion := version.NewParsedSemVer(currentVersion.Major(), currentVersion.Minor(), currentVersion.Patch(), "", newVersionBuildMetadata) + + err = fromFixture.EnsurePrepared(t.Context()) + require.NoErrorf(t, err, "fixture should be prepared") + + // retrieve the compressed package file location + srcPackage, err := fromFixture.SrcPackage(t.Context()) + require.NoErrorf(t, err, "error retrieving start fixture source package") + + versionForFixture, repackagedArchiveFile, err := repackageArchive(t, srcPackage, newVersionBuildMetadata, currentVersion, parsedNewVersion) + require.NoError(t, err, "error repackaging the archive built from the same commit") + + repackagedLocalFetcher := atesting.LocalFetcher(filepath.Dir(repackagedArchiveFile)) + toFixture, err := atesting.NewFixture( + t, + versionForFixture.String(), + atesting.WithFetcher(repackagedLocalFetcher), + ) + require.NoError(t, err) + + return fromFixture, toFixture + }, + }, + } + + // set up start ficture with a rollback window of 1h + rollbackWindowConfig := ` +agent.upgrade.rollback.window: 1h +` + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := testcontext.WithDeadline(t, t.Context(), time.Now().Add(10*time.Minute)) + defer cancel() + from, to := tc.fixturesSetup(t) + + err := from.Configure(ctx, []byte(rollbackWindowConfig)) + require.NoError(t, err, "error setting up rollback window") + standaloneManualRollbackTest(ctx, t, from, to) + }) + } + +} + func managedRollbackRestartTest(ctx context.Context, t *testing.T, info *define.Info, from *atesting.Fixture, to *atesting.Fixture) { startVersionInfo, err := from.ExecVersion(ctx) @@ -401,11 +490,29 @@ func managedRollbackRestartTest(ctx context.Context, t *testing.T, info *define. } func standaloneRollbackRestartTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture) { + standaloneRollbackTest(ctx, t, startFixture, endFixture, reallyFastWatcherCfg, details.ReasonWatchFailed, + func(t *testing.T, _ client.Client) { + restartAgentNTimes(t, 3, 10*time.Second) + }) +} + +func standaloneManualRollbackTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture) { + standaloneRollbackTest(ctx, t, startFixture, endFixture, fastWatcherCfgWithRollbackWindow, details.ReasonManualRollback, + func(t *testing.T, client client.Client) { + t.Logf("sending version=%s rollback=%v upgrade to agent", startFixture.Version(), true) + retVal, err := client.Upgrade(ctx, startFixture.Version(), true, "", false, false) + require.NoError(t, err, "error triggering manual rollback to version %s", startFixture.Version()) + t.Logf("received output %s from upgrade command", retVal) + }, + ) +} + +func standaloneRollbackTest(ctx context.Context, t *testing.T, startFixture *atesting.Fixture, endFixture *atesting.Fixture, customConfig string, rollbackReason string, rollbackTrigger func(t *testing.T, client client.Client)) { startVersionInfo, err := startFixture.ExecVersion(ctx) require.NoError(t, err, "failed to get start agent build version info") - endVersionInfo, err := startFixture.ExecVersion(ctx) + endVersionInfo, err := endFixture.ExecVersion(ctx) require.NoError(t, err, "failed to get end agent build version info") t.Logf("Testing Elastic Agent upgrade from %s to %s...", startFixture.Version(), endVersionInfo.Binary.String()) @@ -420,15 +527,19 @@ func standaloneRollbackRestartTest(ctx context.Context, t *testing.T, startFixtu err = upgradetest.PerformUpgrade( ctx, startFixture, endFixture, t, upgradetest.WithPostUpgradeHook(postUpgradeHook), - upgradetest.WithCustomWatcherConfig(reallyFastWatcherCfg), + upgradetest.WithCustomWatcherConfig(customConfig), upgradetest.WithDisableHashCheck(true)) if !errors.Is(err, ErrPostExit) { require.NoError(t, err) } - // A few seconds after the upgrade, deliberately restart upgraded Agent a - // couple of times to simulate Agent crashing. - restartAgentNTimes(t, 3, 10*time.Second) + elasticAgentClient := startFixture.Client() + err = elasticAgentClient.Connect(ctx) + require.NoError(t, err, "error connecting to installed elastic agent") + defer elasticAgentClient.Disconnect() + + // A few seconds after the upgrade, trigger a rollback using the passed trigger + rollbackTrigger(t, elasticAgentClient) // wait for the agent to be healthy and back at the start version err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 2*time.Minute, 10*time.Second, t) @@ -448,17 +559,15 @@ func standaloneRollbackRestartTest(ctx context.Context, t *testing.T, startFixtu require.NoError(t, err) if !startVersion.Less(*version.NewParsedSemVer(8, 12, 0, "", "")) { - client := startFixture.Client() - err = client.Connect(ctx) require.NoError(t, err) - state, err := client.State(ctx) + state, err := elasticAgentClient.State(ctx) require.NoError(t, err) require.NotNil(t, state.UpgradeDetails) assert.Equal(t, details.StateRollback, details.State(state.UpgradeDetails.State)) if !startVersion.Less(*upgradetest.Version_9_2_0_SNAPSHOT) { - assert.Equal(t, details.ReasonWatchFailed, state.UpgradeDetails.Metadata.Reason) + assert.Equal(t, rollbackReason, state.UpgradeDetails.Metadata.Reason) } } diff --git a/testing/integration/ess/upgrade_standalone_same_commit_test.go b/testing/integration/ess/upgrade_standalone_same_commit_test.go index 48e6e5993f6..cf4f854b7c8 100644 --- a/testing/integration/ess/upgrade_standalone_same_commit_test.go +++ b/testing/integration/ess/upgrade_standalone_same_commit_test.go @@ -9,6 +9,7 @@ package ess import ( "context" "fmt" + "path/filepath" "testing" "time" @@ -82,12 +83,16 @@ func TestStandaloneUpgradeSameCommit(t *testing.T) { newVersionBuildMetadata := "build" + time.Now().Format("20060102150405") parsedNewVersion := version.NewParsedSemVer(currentVersion.Major(), currentVersion.Minor(), currentVersion.Patch(), "", newVersionBuildMetadata) - newPackageContainingDir := t.TempDir() + err = startFixture.EnsurePrepared(t.Context()) + require.NoErrorf(t, err, "fixture should be prepared") - versionForFixture, err := repackageArchive(ctx, t, startFixture, newVersionBuildMetadata, currentVersion, newPackageContainingDir, parsedNewVersion) + // retrieve the compressed package file location + srcPackage, err := startFixture.SrcPackage(t.Context()) + require.NoErrorf(t, err, "error retrieving start fixture source package") - // I wish I could just pass the location of the package on disk to the whole upgrade tests/fixture/fetcher code - // but I would have to break too much code for that, when in Rome... add more code on top of inflexible code + versionForFixture, repackagedArchiveFile, err := repackageArchive(t, srcPackage, newVersionBuildMetadata, currentVersion, parsedNewVersion) + + newPackageContainingDir := filepath.Dir(repackagedArchiveFile) repackagedLocalFetcher := atesting.LocalFetcher(newPackageContainingDir) endFixture, err := atesting.NewFixture(t, versionForFixture.String(), atesting.WithFetcher(repackagedLocalFetcher)) From 09da0d7408c953013cf0877a68f093b7ac4a751f Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 6 Aug 2025 09:59:08 +0200 Subject: [PATCH 20/38] Create watcher subprocess with a new Console on windows --- .../pkg/agent/application/upgrade/rollback.go | 2 +- .../application/upgrade/rollback_windows.go | 21 +++++++++++++++++++ .../agent/application/upgrade/step_relink.go | 8 +++---- .../agent/application/upgrade/step_unpack.go | 2 +- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 90be1bbe2df..82126ba5fe0 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -161,7 +161,7 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error go func() { if err := cmd.Wait(); err != nil { - log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", "agent.process.pid", agentPID, upgradeWatcherPID, "error.message", err) + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) } }() diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index b7c273c9385..c4d9d46e7ed 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -8,8 +8,11 @@ package upgrade import ( "os/exec" + "syscall" "time" + "golang.org/x/sys/windows" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" ) @@ -25,5 +28,23 @@ func invokeCmd(agentExecutable string) *exec.Cmd { "--path.config", paths.Config(), "--path.home", paths.Top(), ) + + cmd.SysProcAttr = &syscall.SysProcAttr{ + // Signals are sent to process groups, and child process are part of the + // parent's prcoess group. So to send a signal to a + // child process and not have it also affect ourselves + // (the parent process), the child needs to be created in a new + // process group. + // + // Creating a child with CREATE_NEW_PROCESS_GROUP disables CTLR_C_EVENT + // handling for the child, so the only way to gracefully stop it is with + // a CTRL_BREAK_EVENT signal. + // https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags + // + // Watcher process will also need a console in order to receive CTRL_BREAK_EVENT on windows. + // Elastic Agent main process running as a service does not have a console allocated and the watcher process will also + // outlive its parent during an upgrade operation so we add the CREATE_NEW_CONSOLE flag. + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP & windows.CREATE_NEW_CONSOLE, + } return cmd } diff --git a/internal/pkg/agent/application/upgrade/step_relink.go b/internal/pkg/agent/application/upgrade/step_relink.go index f9256d9980d..d6fc9a6b9c1 100644 --- a/internal/pkg/agent/application/upgrade/step_relink.go +++ b/internal/pkg/agent/application/upgrade/step_relink.go @@ -15,14 +15,14 @@ import ( ) const ( - windows = "windows" - exe = ".exe" + windowsOSName = "windows" + exe = ".exe" ) func changeSymlink(log *logger.Logger, topDirPath, symlinkPath, newTarget string) error { // handle windows suffixes - if runtime.GOOS == windows { + if runtime.GOOS == windowsOSName { symlinkPath += exe newTarget += exe } @@ -47,7 +47,7 @@ func prevSymlinkPath(topDirPath string) string { agentPrevName := agentName + ".prev" // handle windows suffixes - if runtime.GOOS == windows { + if runtime.GOOS == windowsOSName { agentPrevName = agentName + ".exe.prev" } diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index 830fd1b0663..6d165722e6c 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -41,7 +41,7 @@ func (u *Upgrader) unpack(version, archivePath, dataDir string, flavor string) ( // or the extraction will be double nested var unpackRes UnpackResult var err error - if runtime.GOOS == windows { + if runtime.GOOS == windowsOSName { unpackRes, err = unzip(u.log, archivePath, dataDir, flavor) } else { unpackRes, err = untar(u.log, archivePath, dataDir, flavor) From ca2d07311be40201aeb472dad4539c55938441eb Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 6 Aug 2025 19:21:05 +0200 Subject: [PATCH 21/38] Gracefully terminate watcher process on windows --- .mockery.yaml | 15 +- .../upgrade/mock_watchergrappler_test.go | 89 +++++++++ ...cks_test.go => mock_watcherhelper_test.go} | 0 .../pkg/agent/application/upgrade/watcher.go | 47 +++-- .../application/upgrade/watcher_notwindows.go | 27 +++ .../agent/application/upgrade/watcher_test.go | 176 +++++++++--------- .../application/upgrade/watcher_windows.go | 34 ++++ internal/pkg/agent/cmd/watch.go | 20 +- internal/pkg/agent/cmd/watch_impl.go | 6 +- .../pkg/agent/cmd/watch_impl_notwindows.go | 47 +++++ internal/pkg/agent/cmd/watch_impl_windows.go | 105 +++++++++++ 11 files changed, 446 insertions(+), 120 deletions(-) create mode 100644 internal/pkg/agent/application/upgrade/mock_watchergrappler_test.go rename internal/pkg/agent/application/upgrade/{mocks_test.go => mock_watcherhelper_test.go} (100%) create mode 100644 internal/pkg/agent/application/upgrade/watcher_notwindows.go create mode 100644 internal/pkg/agent/application/upgrade/watcher_windows.go create mode 100644 internal/pkg/agent/cmd/watch_impl_notwindows.go create mode 100644 internal/pkg/agent/cmd/watch_impl_windows.go diff --git a/.mockery.yaml b/.mockery.yaml index ae6b4ef9cca..845fe9cf74f 100644 --- a/.mockery.yaml +++ b/.mockery.yaml @@ -38,12 +38,13 @@ packages: config: mockname: "InstallationModifier" github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade: + config: + inpackage: True + with-expecter: True + dir: "{{.InterfaceDirRelative}}" + mockname: "{{.Mock}}{{.InterfaceName | firstUpper}}" + outpkg: "{{.PackageName}}" + filename: "{{.Mock | lower}}_{{.InterfaceName | lower}}_test.go" interfaces: WatcherHelper: - config: - inpackage: True - with-expecter: True - dir: "{{.InterfaceDirRelative}}" - mockname: "{{.Mock}}{{.InterfaceName}}" - outpkg: "{{.PackageName}}" - filename: "mocks_test.go" \ No newline at end of file + watcherGrappler: diff --git a/internal/pkg/agent/application/upgrade/mock_watchergrappler_test.go b/internal/pkg/agent/application/upgrade/mock_watchergrappler_test.go new file mode 100644 index 00000000000..b5e6a668262 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/mock_watchergrappler_test.go @@ -0,0 +1,89 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +// Code generated by mockery v2.53.4. DO NOT EDIT. + +package upgrade + +import ( + context "context" + + mock "github.com/stretchr/testify/mock" + + logp "github.com/elastic/elastic-agent-libs/logp" +) + +// mockWatcherGrappler is an autogenerated mock type for the watcherGrappler type +type mockWatcherGrappler struct { + mock.Mock +} + +type mockWatcherGrappler_Expecter struct { + mock *mock.Mock +} + +func (_m *mockWatcherGrappler) EXPECT() *mockWatcherGrappler_Expecter { + return &mockWatcherGrappler_Expecter{mock: &_m.Mock} +} + +// TakeDownWatcher provides a mock function with given fields: ctx, log +func (_m *mockWatcherGrappler) TakeDownWatcher(ctx context.Context, log *logp.Logger) error { + ret := _m.Called(ctx, log) + + if len(ret) == 0 { + panic("no return value specified for TakeDownWatcher") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger) error); ok { + r0 = rf(ctx, log) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// mockWatcherGrappler_TakeDownWatcher_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'TakeDownWatcher' +type mockWatcherGrappler_TakeDownWatcher_Call struct { + *mock.Call +} + +// TakeDownWatcher is a helper method to define mock.On call +// - ctx context.Context +// - log *logp.Logger +func (_e *mockWatcherGrappler_Expecter) TakeDownWatcher(ctx interface{}, log interface{}) *mockWatcherGrappler_TakeDownWatcher_Call { + return &mockWatcherGrappler_TakeDownWatcher_Call{Call: _e.mock.On("TakeDownWatcher", ctx, log)} +} + +func (_c *mockWatcherGrappler_TakeDownWatcher_Call) Run(run func(ctx context.Context, log *logp.Logger)) *mockWatcherGrappler_TakeDownWatcher_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*logp.Logger)) + }) + return _c +} + +func (_c *mockWatcherGrappler_TakeDownWatcher_Call) Return(_a0 error) *mockWatcherGrappler_TakeDownWatcher_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *mockWatcherGrappler_TakeDownWatcher_Call) RunAndReturn(run func(context.Context, *logp.Logger) error) *mockWatcherGrappler_TakeDownWatcher_Call { + _c.Call.Return(run) + return _c +} + +// newMockWatcherGrappler creates a new instance of mockWatcherGrappler. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func newMockWatcherGrappler(t interface { + mock.TestingT + Cleanup(func()) +}) *mockWatcherGrappler { + mock := &mockWatcherGrappler{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/pkg/agent/application/upgrade/mocks_test.go b/internal/pkg/agent/application/upgrade/mock_watcherhelper_test.go similarity index 100% rename from internal/pkg/agent/application/upgrade/mocks_test.go rename to internal/pkg/agent/application/upgrade/mock_watcherhelper_test.go diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 22d7fffbf9c..b90cab647f9 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "os" "os/exec" "path/filepath" "time" @@ -20,8 +19,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" - "github.com/elastic/elastic-agent/pkg/core/process" - "github.com/elastic/elastic-agent/pkg/utils" ) const ( @@ -286,17 +283,32 @@ func (a AgentWatcherHelper) WaitForWatcher(ctx context.Context, log *logger.Logg } func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { - return takeOverWatcher(ctx, log, topDir, utils.GetWatcherPIDs, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) + return takeOverWatcher(ctx, log, new(commandWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) } -// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. -// This will allow for easier testing of takeOverWatcher using fake binaries -type watcherPIDsFetcher func() ([]int, error) +// watcherGrappler is an abstraction over the way elastic-agent main process should take down (stop, gracefully if possible) a watcher process +type watcherGrappler interface { + TakeDownWatcher(ctx context.Context, log *logger.Logger) error +} + +type commandWatcherGrappler struct{} + +func (c commandWatcherGrappler) TakeDownWatcher(ctx context.Context, log *logger.Logger) error { + cmd := createTakeDownWatcherCommand(ctx) + log.Debugf("launching takedown with %v", cmd.Args) + output, err := cmd.CombinedOutput() + log.Debugf("takedown output: %s", string(output)) + if err != nil { + return fmt.Errorf("watcher command takedown failed: %w", err) + } + return nil +} // Private functions providing implementation of AgentWatcherHelper -func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, pidFetchFunc watcherPIDsFetcher, timeout time.Duration, watcherSweepInterval time.Duration, takeOverInterval time.Duration) (*filelock.AppLocker, error) { +func takeOverWatcher(ctx context.Context, log *logger.Logger, watcherGrappler watcherGrappler, topDir string, timeout time.Duration, watcherSweepInterval time.Duration, takeOverInterval time.Duration) (*filelock.AppLocker, error) { takeoverCtx, takeoverCancel := context.WithTimeout(ctx, timeout) defer takeoverCancel() + go func() { sweepTicker := time.NewTicker(watcherSweepInterval) defer sweepTicker.Stop() @@ -305,27 +317,12 @@ func takeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, pid case <-takeoverCtx.Done(): return case <-sweepTicker.C: - pids, err := pidFetchFunc() + err := watcherGrappler.TakeDownWatcher(takeoverCtx, log) if err != nil { - log.Errorf("error listing watcher processes: %s", err) + log.Errorf("error taking down watcher: %s", err) continue } - // this should be run continuously and concurrently attempting to get the app locker - for _, pid := range pids { - log.Debugf("attempting to kill watcher process with PID: %d", pid) - watcherProcess, findProcErr := os.FindProcess(pid) - if findProcErr != nil { - log.Errorf("error finding process with PID: %d: %s", pid, findProcErr) - continue - } - killProcErr := process.Terminate(watcherProcess) - if killProcErr != nil { - log.Errorf("error killing process with PID: %d: %s", pid, killProcErr) - continue - } - log.Debugf("killed watcher process with PID: %d", pid) - } } } }() diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go new file mode 100644 index 00000000000..8c5e8726108 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -0,0 +1,27 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build !windows + +package upgrade + +import ( + "context" + "os" + "os/exec" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" +) + +func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { + executable, _ := os.Executable() + + // #nosec G204 -- user cannot inject any parameters to this command + cmd := exec.CommandContext(ctx, executable, watcherSubcommand, + "--path.config", paths.Config(), + "--path.home", paths.Top(), + "--takedown", + ) + return cmd +} diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 5f28cdd05ec..04a86f1b341 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -10,16 +10,17 @@ import ( "net" "os" "path/filepath" - "runtime" "sync" "testing" "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "google.golang.org/grpc" "gopkg.in/yaml.v3" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" @@ -874,34 +875,34 @@ func writeState(t *testing.T, path string, state details.State) { // This test cannot run in parallel because it deals with launching test processes and verifying their state. // In case of aggressive PID reuse along with parallel execution, this test could kill "innocent" processes func TestTakeOverWatcher(t *testing.T) { - testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") - if runtime.GOOS == "windows" { - testExecutablePath += ".exe" - } - testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) - require.NoError(t, err, "error calculating absolute test executable part") - - require.FileExists(t, testExecutableAbsolutePath, - "testlocker binary not found.\n"+ - "Check that:\n"+ - "- test binaries have been built with mage dev:buildtestbinaries\n"+ - "- the path of the executable is correct") - - returnCmdPIDsFetcher := func(cmds ...*process.Info) watcherPIDsFetcher { - return func() ([]int, error) { - pids := make([]int, 0, len(cmds)) - for _, c := range cmds { - if c.Process != nil { - pids = append(pids, c.Process.Pid) - } - } - - return pids, nil - } - } - - type setupFunc func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) - type assertFunc func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) + //testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") + //if runtime.GOOS == "windows" { + // testExecutablePath += ".exe" + //} + //testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) + //require.NoError(t, err, "error calculating absolute test executable part") + // + //require.FileExists(t, testExecutableAbsolutePath, + // "testlocker binary not found.\n"+ + // "Check that:\n"+ + // "- test binaries have been built with mage dev:buildtestbinaries\n"+ + // "- the path of the executable is correct") + + //returnCmdPIDsFetcher := func(cmds ...*process.Info) watcherPIDsFetcher { + // return func() ([]int, error) { + // pids := make([]int, 0, len(cmds)) + // for _, c := range cmds { + // if c.Process != nil { + // pids = append(pids, c.Process.Pid) + // } + // } + // + // return pids, nil + // } + //} + + type setupFunc func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) + type assertFunc func(t *testing.T, workdir string, appLocker *filelock.AppLocker) testcases := []struct { name string @@ -911,74 +912,81 @@ func TestTakeOverWatcher(t *testing.T) { }{ { name: "no contention for watcher applocker", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { - // nothing to do here, always return and empty list of pids - return func() ([]int, error) { - return nil, nil - }, nil + setup: func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) { + // nothing to do here }, wantErr: assert.NoError, - assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, _ []*process.Info) { + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker) { assert.NotNil(t, appLocker, "appLocker should not be nil") assert.FileExists(t, filepath.Join(workdir, watcherApplockerFileName)) }, }, { - name: "contention with test binary listening to signals: test binary is terminated gracefully", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { - cancelFunc, cmd := createTestlockerCommand(t.Context(), t, testExecutableAbsolutePath, workdir, false) - t.Cleanup(cancelFunc) - require.NoError(t, err, "error starting testlocker binary") - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, watcherApplockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []*process.Info{cmd} + name: "contention with a process that can be taken down: no error", + setup: func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) { + // create and lock an applocker + locker := filelock.NewAppLocker(workdir, watcherApplockerFileName) + err := locker.TryLock() + require.NoError(t, err, "error setting up the applocker") + mockWatcherGrappler.EXPECT().TakeDownWatcher(mock.Anything, mock.Anything).Run(func(_ context.Context, _ *logp.Logger) { + unlockErr := locker.Unlock() + assert.NoError(t, unlockErr, "error unlocking the applocker") + }).Return(nil) + + // add a cleanup to unlock the applocker at the end of the test anyway in case of failures + t.Cleanup(func() { + locker.Unlock() + }) }, wantErr: assert.NoError, - assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) { + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker) { assert.NotNil(t, appLocker, "appLocker should not be nil") assert.FileExists(t, filepath.Join(workdir, watcherApplockerFileName)) - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess.Cmd, "test locker process info should have exec.Cmd set") - err = testlockerProcess.Cmd.Wait() - assert.NoError(t, err, "error waiting for testlocker process to terminate") - if assert.NotNil(t, testlockerProcess.Cmd.ProcessState, "test locker process should have completed and process state set") { - assert.True(t, testlockerProcess.Cmd.ProcessState.Success(), "test locker process should be successful") - } }, }, { - name: "contention with test binary not listening to signals: test binary is not terminated and error is returned by takeOverWatcher", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*process.Info) { - cancelFunc, cmd := createTestlockerCommand(t.Context(), t, testExecutableAbsolutePath, workdir, true) - t.Cleanup(cancelFunc) - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, watcherApplockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []*process.Info{cmd} + name: "contention with a process that can be taken down with multiple attempts: no error", + setup: func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) { + // create and lock an applocker + locker := filelock.NewAppLocker(workdir, watcherApplockerFileName) + err := locker.TryLock() + require.NoError(t, err, "error setting up the applocker") + mockWatcherGrappler.EXPECT().TakeDownWatcher(mock.Anything, mock.Anything).Return(fmt.Errorf("some takedown error")).Once() + mockWatcherGrappler.EXPECT().TakeDownWatcher(mock.Anything, mock.Anything).Run(func(_ context.Context, _ *logp.Logger) { + unlockErr := locker.Unlock() + assert.NoError(t, unlockErr, "error unlocking the applocker") + }).Return(nil) + + // add a cleanup to unlock the applocker at the end of the test anyway in case of failures + t.Cleanup(func() { + locker.Unlock() + }) + }, + wantErr: assert.NoError, + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker) { + assert.NotNil(t, appLocker, "appLocker should not be nil") + assert.FileExists(t, filepath.Join(workdir, watcherApplockerFileName)) + }, + }, + { + name: "contention with a process that cannot be taken down: error is returned by takeOverWatcher", + setup: func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) { + // create and lock an applocker + locker := filelock.NewAppLocker(workdir, watcherApplockerFileName) + err := locker.TryLock() + require.NoError(t, err, "error setting up the applocker") + + // Expect the calls to applocker but do not release the lock + mockWatcherGrappler.EXPECT().TakeDownWatcher(mock.Anything, mock.Anything).Return(nil) + + // add a cleanup to unlock the applocker at the end of the test anyway + t.Cleanup(func() { + locker.Unlock() + }) }, wantErr: assert.Error, - assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker, cmds []*process.Info) { + assertPostTakeover: func(t *testing.T, workdir string, appLocker *filelock.AppLocker) { assert.Nil(t, appLocker, "appLocker should be nil") - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess.Process, "testlocker process should not be nil") - assert.Nil(t, testlockerProcess.Cmd.ProcessState, "testlocker process should not have ProcessState set since it should still be running") - err := testlockerProcess.Process.Kill() - assert.NoError(t, err, "error killing testlocker process") }, }, } @@ -987,9 +995,11 @@ func TestTakeOverWatcher(t *testing.T) { t.Run(tc.name, func(t *testing.T) { workDir := t.TempDir() logger, logs := loggertest.New(t.Name()) - pidFetcher, cmds := tc.setup(t, workDir) - appLocker, err := takeOverWatcher(t.Context(), logger, workDir, pidFetcher, 10*time.Second, 500*time.Millisecond, 100*time.Millisecond) + mockGrappler := newMockWatcherGrappler(t) + tc.setup(t, workDir, mockGrappler) + + appLocker, err := takeOverWatcher(t.Context(), logger, mockGrappler, workDir, 10*time.Second, 500*time.Millisecond, 100*time.Millisecond) loggertest.PrintObservedLogs(logs.TakeAll(), t.Log) tc.wantErr(t, err) @@ -1000,7 +1010,7 @@ func TestTakeOverWatcher(t *testing.T) { }(appLocker) } if tc.assertPostTakeover != nil { - tc.assertPostTakeover(t, workDir, appLocker, cmds) + tc.assertPostTakeover(t, workDir, appLocker) } }) } diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go new file mode 100644 index 00000000000..7a8a8e0d409 --- /dev/null +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -0,0 +1,34 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build windows + +package upgrade + +import ( + "context" + "os" + "os/exec" + "syscall" + + "golang.org/x/sys/windows" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" +) + +func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { + executable, _ := os.Executable() + + // #nosec G204 -- user cannot inject any parameters to this command + cmd := exec.CommandContext(ctx, executable, watcherSubcommand, + "--path.config", paths.Config(), + "--path.home", paths.Top(), + "--takedown", + ) + cmd.SysProcAttr = &syscall.SysProcAttr{ + // https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags + CreationFlags: windows.DETACHED_PROCESS, + } + return cmd +} diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index d25d56614df..a1c8338dfb0 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -16,6 +16,7 @@ import ( "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/logp/configure" "github.com/elastic/elastic-agent/pkg/control/v2/client" + "github.com/elastic/elastic-agent/pkg/utils" "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" @@ -42,7 +43,7 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command Use: "watch", Short: "Watch the Elastic Agent for failures and initiate rollback", Long: `This command watches Elastic Agent for failures and initiates rollback if necessary.`, - Run: func(_ *cobra.Command, _ []string) { + Run: func(c *cobra.Command, _ []string) { cfg := getConfig(streams) log, err := configuredLogger(cfg, watcherName) if err != nil { @@ -53,6 +54,16 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command // Make sure to flush any buffered logs before we're done. defer log.Sync() //nolint:errcheck // flushing buffered logs is best effort. + takedown, _ := c.Flags().GetBool("takedown") + if takedown { + err = takedownWatcher(log, utils.GetWatcherPIDs) + if err != nil { + log.Errorf("error taking down watcher: %v", err) + os.Exit(5) + } + return + } + if err := watchCmd(log, paths.Top(), cfg.Settings.Upgrade.Watcher, new(upgradeAgentWatcher), new(upgradeInstallationModifier)); err != nil { log.Errorw("Watch command failed", "error.message", err) fmt.Fprintf(streams.Err, "Watch command failed: %v\n%s\n", err, troubleshootMessage()) @@ -60,7 +71,8 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command } }, } - + cmd.Flags().BoolP("takedown", "t", false, "Take down the running watcher") + cmd.Flags().MarkHidden("takedown") //nolint:errcheck // not required return cmd } @@ -104,7 +116,7 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work - log.Info("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) + log.Infof("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) if err != nil { return fmt.Errorf("rolling back: %w", err) @@ -118,7 +130,7 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher marker.Details = details.NewDetails(marker.Version, details.StateRollback, actionID) } marker.Details.SetStateWithReason(details.StateRollback, details.ReasonManualRollback) - err := upgrade.SaveMarker(dataDir, marker, true) + err = upgrade.SaveMarker(dataDir, marker, true) if err != nil { return fmt.Errorf("saving marker after rolling back: %w", err) } diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index 2a198480209..5e6294c8bec 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -33,6 +33,10 @@ func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.L return upgrade.Rollback(ctx, log, c, topDirPath, prevVersionedHome, prevHash) } +// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. +// This will allow for easier testing of takeOverWatcher using fake binaries +type watcherPIDsFetcher func() ([]int, error) + func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logger.Logger) error { errChan := make(chan error) @@ -48,7 +52,7 @@ func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time. go agtWatcher.Run(ctx) signals := make(chan os.Signal, 1) - signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) graceTimer := time.NewTimer(tilGrace) defer graceTimer.Stop() diff --git a/internal/pkg/agent/cmd/watch_impl_notwindows.go b/internal/pkg/agent/cmd/watch_impl_notwindows.go new file mode 100644 index 00000000000..5295a7de94b --- /dev/null +++ b/internal/pkg/agent/cmd/watch_impl_notwindows.go @@ -0,0 +1,47 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build !windows + +package cmd + +import ( + "fmt" + "os" + "syscall" + + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %s", err) + } + + ownPID := os.Getpid() + + for _, pid := range pids { + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + + process, err := os.FindProcess(pid) + if err != nil { + log.Errorf("error finding watcher process with PID: %d: %s", pid, err) + continue + } + + err = process.Signal(syscall.SIGTERM) + if err != nil { + log.Errorf("error killing watcher process with PID: %d: %s", pid, err) + continue + } + + } + return nil +} diff --git a/internal/pkg/agent/cmd/watch_impl_windows.go b/internal/pkg/agent/cmd/watch_impl_windows.go new file mode 100644 index 00000000000..7e49b906633 --- /dev/null +++ b/internal/pkg/agent/cmd/watch_impl_windows.go @@ -0,0 +1,105 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build windows + +package cmd + +import ( + "fmt" + "os" + "unsafe" + + gowindows "golang.org/x/sys/windows" + + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +var ( + kernel32API = gowindows.NewLazySystemDLL("kernel32.dll") + + freeConsoleProc = kernel32API.NewProc("FreeConsole") + attachConsoleProc = kernel32API.NewProc("AttachConsole") + procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") +) + +func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %s", err) + } + + ownPID := os.Getpid() + + for _, pid := range pids { + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + // define an anonymous function in order to leverage the defer() for freeing console and other housekeeping + func() { + + r1, _, consoleErr := freeConsoleProc.Call() + if r1 == 0 { + log.Errorf("error preemptively detaching from console: %s", consoleErr) + } + + r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) + if r1 == 0 { + log.Errorf("error attaching console to watcher process with PID: %d -> %s", pid, consoleErr) + return + } + log.Infof("successfully attached console with PID: %d", pid) + + defer func() { + r1, _, consoleErr = freeConsoleProc.Call() + if r1 == 0 { + log.Errorf("error detaching from console: %s", consoleErr) + } else { + log.Infof("successfully detached from console of PID: %d", pid) + } + }() + + list, consoleErr := GetConsoleProcessList() + if consoleErr != nil { + log.Errorf("error listing console processes: %s", consoleErr) + } + + log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) + + // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that + // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us + // by specifying processGroupID=0 + killProcErr := gowindows.GenerateConsoleCtrlEvent(gowindows.CTRL_BREAK_EVENT, 0) + + if killProcErr != nil { + log.Errorf("error terminating process with PID: %d: %s", pid, killProcErr) + return + } + }() + + } + return nil +} + +// GetConsoleProcessList retrieves the list of process IDs attached to the current console +func GetConsoleProcessList() ([]uint32, error) { + // Allocate a buffer for PIDs + const maxProcs = 64 + pids := make([]uint32, maxProcs) + + r1, _, err := procGetConsoleProcessList.Call( + uintptr(unsafe.Pointer(&pids[0])), + uintptr(maxProcs), + ) + + count := uint32(r1) + if count == 0 { + return nil, err + } + + return pids[:count], nil +} From 14ce6841caaa1236498051bd4e31a4b1aa44191c Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Thu, 7 Aug 2025 15:35:27 +0200 Subject: [PATCH 22/38] Add watcher takedown tests --- .../application/filelock/testlocker/main.go | 12 +- .../pkg/agent/application/upgrade/rollback.go | 9 + .../application/upgrade/rollback_darwin.go | 9 +- .../application/upgrade/rollback_linux.go | 9 +- .../application/upgrade/rollback_windows.go | 13 +- .../agent/application/upgrade/watcher_test.go | 41 ---- internal/pkg/agent/cmd/watch_impl_test.go | 178 ++++++++++++++++++ internal/pkg/agent/cmd/watch_impl_windows.go | 88 ++++----- 8 files changed, 246 insertions(+), 113 deletions(-) diff --git a/internal/pkg/agent/application/filelock/testlocker/main.go b/internal/pkg/agent/application/filelock/testlocker/main.go index ed9817e4c58..4e95ed3c6e1 100644 --- a/internal/pkg/agent/application/filelock/testlocker/main.go +++ b/internal/pkg/agent/application/filelock/testlocker/main.go @@ -26,9 +26,8 @@ var lockFile = flag.String(lockFileFlagName, "", "path to lock file") var ignoreSignals = flag.Bool(ignoreSignalFlagName, false, "ignore signals") func main() { - - signalCh := make(chan os.Signal, 1) - signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, os.Interrupt, syscall.SIGINT, syscall.SIGTERM) flag.Parse() if *lockFile == "" { @@ -52,13 +51,14 @@ func main() { log.Printf(AcquiredLockLogFmt, *lockFile) for { - s := <-signalCh + + s := <-signalChan if *ignoreSignals { - log.Printf("Received signal: %s, ignoring it...", s.String()) + log.Printf("Received signal %v , ignoring it...", s) continue } - log.Printf("Received signal: %s, exiting", s.String()) + log.Printf("Received signal %v , exiting...", s) break } } diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 82126ba5fe0..20020614a4b 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -171,6 +171,15 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error } +func invokeCmd(agentExecutable string) *exec.Cmd { + return InvokeCmdWithArgs( + agentExecutable, + watcherSubcommand, + "--path.config", paths.Config(), + "--path.home", paths.Top(), + ) +} + func restartAgent(ctx context.Context, log *logger.Logger, c client.Client) error { restartViaDaemonFn := func(ctx context.Context) error { connectCtx, connectCancel := context.WithTimeout(ctx, 3*time.Second) diff --git a/internal/pkg/agent/application/upgrade/rollback_darwin.go b/internal/pkg/agent/application/upgrade/rollback_darwin.go index 041abf11b40..ca40a58d4d1 100644 --- a/internal/pkg/agent/application/upgrade/rollback_darwin.go +++ b/internal/pkg/agent/application/upgrade/rollback_darwin.go @@ -11,8 +11,6 @@ import ( "os/exec" "syscall" "time" - - "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" ) const ( @@ -21,12 +19,9 @@ const ( afterRestartDelay = 2 * time.Second ) -func invokeCmd(agentExecutable string) *exec.Cmd { +func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { // #nosec G204 -- user cannot inject any parameters to this command - cmd := exec.Command(agentExecutable, watcherSubcommand, - "--path.config", paths.Config(), - "--path.home", paths.Top(), - ) + cmd := exec.Command(executable, args...) var cred = &syscall.Credential{ Uid: uint32(os.Getuid()), diff --git a/internal/pkg/agent/application/upgrade/rollback_linux.go b/internal/pkg/agent/application/upgrade/rollback_linux.go index b3388e2cb54..694ab86d62f 100644 --- a/internal/pkg/agent/application/upgrade/rollback_linux.go +++ b/internal/pkg/agent/application/upgrade/rollback_linux.go @@ -11,8 +11,6 @@ import ( "os/exec" "syscall" "time" - - "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" ) const ( @@ -21,12 +19,9 @@ const ( afterRestartDelay = 2 * time.Second ) -func invokeCmd(agentExecutable string) *exec.Cmd { +func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { // #nosec G204 -- user cannot inject any parameters to this command - cmd := exec.Command(agentExecutable, watcherSubcommand, - "--path.config", paths.Config(), - "--path.home", paths.Top(), - ) + cmd := exec.Command(executable, args...) var cred = &syscall.Credential{ Uid: uint32(os.Getuid()), diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index c4d9d46e7ed..eb7056bf959 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -12,8 +12,6 @@ import ( "time" "golang.org/x/sys/windows" - - "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" ) const ( @@ -22,16 +20,13 @@ const ( afterRestartDelay = 20 * time.Second ) -func invokeCmd(agentExecutable string) *exec.Cmd { +func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { // #nosec G204 -- user cannot inject any parameters to this command - cmd := exec.Command(agentExecutable, watcherSubcommand, - "--path.config", paths.Config(), - "--path.home", paths.Top(), - ) + cmd := exec.Command(executable, args...) cmd.SysProcAttr = &syscall.SysProcAttr{ // Signals are sent to process groups, and child process are part of the - // parent's prcoess group. So to send a signal to a + // parent's process group. So to send a signal to a // child process and not have it also affect ourselves // (the parent process), the child needs to be created in a new // process group. @@ -44,7 +39,7 @@ func invokeCmd(agentExecutable string) *exec.Cmd { // Watcher process will also need a console in order to receive CTRL_BREAK_EVENT on windows. // Elastic Agent main process running as a service does not have a console allocated and the watcher process will also // outlive its parent during an upgrade operation so we add the CREATE_NEW_CONSOLE flag. - CreationFlags: windows.CREATE_NEW_PROCESS_GROUP & windows.CREATE_NEW_CONSOLE, + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP, } return cmd } diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 04a86f1b341..1a12fc2e844 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -27,7 +27,6 @@ import ( "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" - "github.com/elastic/elastic-agent/pkg/core/process" agtversion "github.com/elastic/elastic-agent/pkg/version" ) @@ -875,31 +874,6 @@ func writeState(t *testing.T, path string, state details.State) { // This test cannot run in parallel because it deals with launching test processes and verifying their state. // In case of aggressive PID reuse along with parallel execution, this test could kill "innocent" processes func TestTakeOverWatcher(t *testing.T) { - //testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") - //if runtime.GOOS == "windows" { - // testExecutablePath += ".exe" - //} - //testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) - //require.NoError(t, err, "error calculating absolute test executable part") - // - //require.FileExists(t, testExecutableAbsolutePath, - // "testlocker binary not found.\n"+ - // "Check that:\n"+ - // "- test binaries have been built with mage dev:buildtestbinaries\n"+ - // "- the path of the executable is correct") - - //returnCmdPIDsFetcher := func(cmds ...*process.Info) watcherPIDsFetcher { - // return func() ([]int, error) { - // pids := make([]int, 0, len(cmds)) - // for _, c := range cmds { - // if c.Process != nil { - // pids = append(pids, c.Process.Pid) - // } - // } - // - // return pids, nil - // } - //} type setupFunc func(t *testing.T, workdir string, mockWatcherGrappler *mockWatcherGrappler) type assertFunc func(t *testing.T, workdir string, appLocker *filelock.AppLocker) @@ -1016,18 +990,3 @@ func TestTakeOverWatcher(t *testing.T) { } } - -func createTestlockerCommand(ctx context.Context, t *testing.T, testExecutablePath string, workdir string, ignoreSignals bool) (context.CancelFunc, *process.Info) { - cmdCtx, cmdCancel := context.WithCancel(ctx) - args := []string{"-lockfile", filepath.Join(workdir, watcherApplockerFileName)} - if ignoreSignals { - args = append(args, "-ignoresignals") - } - proc, err := process.Start( - testExecutablePath, - process.WithArgs(args), - process.WithContext(cmdCtx), - ) - require.NoError(t, err, "error starting testlocker binary") - return cmdCancel, proc -} diff --git a/internal/pkg/agent/cmd/watch_impl_test.go b/internal/pkg/agent/cmd/watch_impl_test.go index d9537b58c92..4cc9587db5a 100644 --- a/internal/pkg/agent/cmd/watch_impl_test.go +++ b/internal/pkg/agent/cmd/watch_impl_test.go @@ -8,6 +8,9 @@ import ( "context" "fmt" "os" + "os/exec" + "path/filepath" + "runtime" "syscall" "testing" "time" @@ -15,9 +18,13 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" ) +const applockerFileName = "mocklocker.lock" + func Test_watchLoop(t *testing.T) { t.Run("watchloop returns when context expires - no error", func(t *testing.T) { @@ -71,3 +78,174 @@ func Test_watchLoop(t *testing.T) { } }) } + +func Test_takedownWatcher(t *testing.T) { + + testExecutablePath := filepath.Join("..", "application", "filelock", "testlocker", "testlocker") + if runtime.GOOS == "windows" { + testExecutablePath += ".exe" + } + testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) + require.NoError(t, err, "error calculating absolute test executable part") + + require.FileExists(t, testExecutableAbsolutePath, + "testlocker binary not found.\n"+ + "Check that:\n"+ + "- test binaries have been built with mage build:testbinaries\n"+ + "- the path of the executable is correct") + + returnCmdPIDsFetcher := func(cmds ...*exec.Cmd) watcherPIDsFetcher { + return func() ([]int, error) { + pids := make([]int, 0, len(cmds)) + for _, c := range cmds { + if c.Process != nil { + pids = append(pids, c.Process.Pid) + } + } + + return pids, nil + } + } + + type setupFunc func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) + type assertFunc func(t *testing.T, workdir string, cmds []*exec.Cmd) + + tests := []struct { + name string + setup setupFunc + wantErr assert.ErrorAssertionFunc + assertPostTakedown assertFunc + }{ + { + name: "no contention for watcher applocker", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + // nothing to do here, always return and empty list of pids + return func() ([]int, error) { + return nil, nil + }, nil + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, _ []*exec.Cmd) { + // we should be able to lock, no problem + locker := filelock.NewAppLocker(workdir, applockerFileName) + lockError := locker.TryLock() + t.Cleanup(func() { + _ = locker.Unlock() + }) + + assert.NoError(t, lockError) + + }, + }, + { + name: "contention with test binary listening to signals: test binary is terminated gracefully", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + cmd := createTestlockerCommand(t, testExecutableAbsolutePath, workdir, false) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") + + err = testlockerProcess.Wait() + assert.NoError(t, err, "error waiting for testlocker process to terminate") + + if assert.NotNil(t, testlockerProcess.ProcessState, "test locker process should have completed and process state set") { + assert.True(t, testlockerProcess.ProcessState.Success(), "test locker process should be successful") + } + + assert.FileExists(t, filepath.Join(workdir, applockerFileName)) + testApplocker := filelock.NewAppLocker(workdir, applockerFileName) + testApplockerError := testApplocker.TryLock() + t.Cleanup(func() { + _ = testApplocker.Unlock() + }) + assert.NoError(t, testApplockerError, "error locking applocker") + }, + }, + { + name: "contention with test binary not listening to signals: test binary is not terminated", + setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + cmd := createTestlockerCommand(t, testExecutableAbsolutePath, workdir, true) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") + + // check that the process is still running + assert.Nil(t, testlockerProcess.ProcessState, "testlocker process should not have ProcessState set since it should still be running") + assert.NotNil(t, testlockerProcess.Process, "testlocker process should have an os.Process set") + process, findProcessErr := os.FindProcess(testlockerProcess.Process.Pid) + require.NoErrorf(t, findProcessErr, "error finding test process with pid %d", testlockerProcess.Process.Pid) + require.NotNil(t, process, "test process should be found among the running processes") + if runtime.GOOS != "windows" { + // for unix systems we need an additional check since FindProcess will always return a *os.Process. + // Poke it with a stick (signal) + // see https://pkg.go.dev/os#FindProcess + signalErr := process.Signal(syscall.Signal(0)) + require.NoError(t, signalErr, "error signaling test process: this means it's not running") + } + err := testlockerProcess.Process.Kill() + assert.NoError(t, err, "error killing testlocker process") + assert.Nil(t, testlockerProcess.ProcessState, "testlocker process should not have ProcessState set since it should still be running") + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + workDir := t.TempDir() + log, obsLogs := loggertest.New(t.Name()) + pidFetcher, processInfos := tc.setup(t, workDir) + tc.wantErr(t, takedownWatcher(log, pidFetcher)) + t.Logf("takedown logs: %v", obsLogs) + if tc.assertPostTakedown != nil { + tc.assertPostTakedown(t, workDir, processInfos) + } + }) + } +} + +func createTestlockerCommand(t *testing.T, testExecutablePath string, workdir string, ignoreSignals bool) *exec.Cmd { + args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} + if ignoreSignals { + args = append(args, "-ignoresignals") + } + + // use the same invoke as the one used to launch a watcher + cmd := upgrade.InvokeCmdWithArgs(testExecutablePath, args...) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + err := cmd.Start() + require.NoError(t, err, "error starting testlocker binary") + return cmd +} diff --git a/internal/pkg/agent/cmd/watch_impl_windows.go b/internal/pkg/agent/cmd/watch_impl_windows.go index 7e49b906633..0dfff30837e 100644 --- a/internal/pkg/agent/cmd/watch_impl_windows.go +++ b/internal/pkg/agent/cmd/watch_impl_windows.go @@ -7,6 +7,7 @@ package cmd import ( + "errors" "fmt" "os" "unsafe" @@ -32,6 +33,7 @@ func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error ownPID := os.Getpid() + var accumulatedSignalingErrors error for _, pid := range pids { if pid == ownPID { @@ -39,50 +41,10 @@ func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error } log.Debugf("attempting to terminate watcher process with PID: %d", pid) - // define an anonymous function in order to leverage the defer() for freeing console and other housekeeping - func() { - - r1, _, consoleErr := freeConsoleProc.Call() - if r1 == 0 { - log.Errorf("error preemptively detaching from console: %s", consoleErr) - } - - r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) - if r1 == 0 { - log.Errorf("error attaching console to watcher process with PID: %d -> %s", pid, consoleErr) - return - } - log.Infof("successfully attached console with PID: %d", pid) - - defer func() { - r1, _, consoleErr = freeConsoleProc.Call() - if r1 == 0 { - log.Errorf("error detaching from console: %s", consoleErr) - } else { - log.Infof("successfully detached from console of PID: %d", pid) - } - }() - - list, consoleErr := GetConsoleProcessList() - if consoleErr != nil { - log.Errorf("error listing console processes: %s", consoleErr) - } - - log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) - - // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that - // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us - // by specifying processGroupID=0 - killProcErr := gowindows.GenerateConsoleCtrlEvent(gowindows.CTRL_BREAK_EVENT, 0) - - if killProcErr != nil { - log.Errorf("error terminating process with PID: %d: %s", pid, killProcErr) - return - } - }() - + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, signalPID(log, pid)) } - return nil + + return accumulatedSignalingErrors } // GetConsoleProcessList retrieves the list of process IDs attached to the current console @@ -103,3 +65,43 @@ func GetConsoleProcessList() ([]uint32, error) { return pids[:count], nil } + +// signalPID takes care of signaling a given PID. It also leverages defer() for freeing console and other housekeeping +func signalPID(log *logger.Logger, pid int) error { + r1, _, consoleErr := freeConsoleProc.Call() + if r1 == 0 { + log.Warnf("error preemptively detaching from console: %s", consoleErr) + } + + r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) + if r1 == 0 { + return fmt.Errorf("error attaching console to watcher process with PID %d: %w", pid, consoleErr) + } + log.Infof("successfully attached console with PID: %d", pid) + + defer func() { + r1, _, consoleErr = freeConsoleProc.Call() + if r1 == 0 { + log.Errorf("error detaching from console: %s", consoleErr) + } else { + log.Infof("successfully detached from console of PID: %d", pid) + } + }() + + if list, consoleProcessListErr := GetConsoleProcessList(); consoleProcessListErr != nil { + log.Errorf("error listing console processes: %s", consoleProcessListErr) + } else { + log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) + } + + // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that + // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us + // by specifying processGroupID=0 + killProcErr := gowindows.GenerateConsoleCtrlEvent(gowindows.CTRL_BREAK_EVENT, uint32(pid)) + + if killProcErr != nil { + return fmt.Errorf("error signaling process with PID: %d: %w", pid, killProcErr) + } + + return nil +} From 8e401cd3167ed5faac1134ac8510d095dbb1799c Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 11 Aug 2025 08:32:22 +0200 Subject: [PATCH 23/38] Add in-process watcher grappler --- .../pkg/agent/application/upgrade/watcher.go | 13 ++- .../application/upgrade/watcher_notwindows.go | 40 ++++++++ .../application/upgrade/watcher_windows.go | 99 +++++++++++++++++++ internal/pkg/agent/cmd/watch_impl_windows.go | 3 + 4 files changed, 154 insertions(+), 1 deletion(-) diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index b90cab647f9..3469185477c 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -19,6 +19,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/utils" ) const ( @@ -283,9 +284,13 @@ func (a AgentWatcherHelper) WaitForWatcher(ctx context.Context, log *logger.Logg } func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { - return takeOverWatcher(ctx, log, new(commandWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) + return takeOverWatcher(ctx, log, new(inProcessWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) } +// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. +// This will allow for easier testing of takeOverWatcher using fake binaries +type watcherPIDsFetcher func() ([]int, error) + // watcherGrappler is an abstraction over the way elastic-agent main process should take down (stop, gracefully if possible) a watcher process type watcherGrappler interface { TakeDownWatcher(ctx context.Context, log *logger.Logger) error @@ -346,6 +351,12 @@ func takeOverWatcher(ctx context.Context, log *logger.Logger, watcherGrappler wa } } +type inProcessWatcherGrappler struct{} + +func (i inProcessWatcherGrappler) TakeDownWatcher(ctx context.Context, log *logger.Logger) error { + return takedownWatcher(ctx, log, utils.GetWatcherPIDs) +} + func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { // check if the upgraded version is less than the previous (currently installed) version if current.parsedVersion.Less(*previous.parsedVersion) { diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go index 8c5e8726108..af4da8a56b7 100644 --- a/internal/pkg/agent/application/upgrade/watcher_notwindows.go +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -8,10 +8,14 @@ package upgrade import ( "context" + "errors" + "fmt" "os" "os/exec" + "syscall" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" ) func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { @@ -25,3 +29,39 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { ) return cmd } + +func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %s", err) + } + + ownPID := os.Getpid() + var accumulatedSignalingErrors error + for _, pid := range pids { + + if ctx.Err() != nil { + return ctx.Err() + } + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + + process, err := os.FindProcess(pid) + if err != nil { + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error finding watcher process with PID: %d: %s", pid, err)) + continue + } + + err = process.Signal(syscall.SIGTERM) + if err != nil { + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error killing watcher process with PID: %d: %s", pid, err)) + continue + } + + } + return accumulatedSignalingErrors +} diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index 7a8a8e0d409..92115dcecdf 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -8,13 +8,27 @@ package upgrade import ( "context" + "errors" + "fmt" "os" "os/exec" "syscall" + "unsafe" "golang.org/x/sys/windows" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +var ( + kernel32API = windows.NewLazySystemDLL("kernel32.dll") + + freeConsoleProc = kernel32API.NewProc("FreeConsole") + attachConsoleProc = kernel32API.NewProc("AttachConsole") + procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") + procSetConsoleHandler = kernel32API.NewProc("SetConsoleCtrlHandler") + allocConsoleProc = kernel32API.NewProc("AllocConsole") ) func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { @@ -32,3 +46,88 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { } return cmd } + +func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %s", err) + } + + ownPID := os.Getpid() + + var accumulatedSignalingErrors error + for _, pid := range pids { + + if ctx.Err() != nil { + return ctx.Err() + } + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, signalPID(log, pid)) + } + + return accumulatedSignalingErrors +} + +// GetConsoleProcessList retrieves the list of process IDs attached to the current console +func GetConsoleProcessList() ([]uint32, error) { + // Allocate a buffer for PIDs + const maxProcs = 64 + pids := make([]uint32, maxProcs) + + r1, _, err := procGetConsoleProcessList.Call( + uintptr(unsafe.Pointer(&pids[0])), + uintptr(maxProcs), + ) + + count := uint32(r1) + if count == 0 { + return nil, err + } + + return pids[:count], nil +} + +// signalPID takes care of signaling a given PID. It also leverages defer() for freeing console and other housekeeping +func signalPID(log *logger.Logger, pid int) error { + r1, _, consoleErr := freeConsoleProc.Call() + if r1 == 0 { + log.Warnf("error preemptively detaching from console: %s", consoleErr) + } + + r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) + if r1 == 0 { + return fmt.Errorf("error attaching console to watcher process with PID %d: %w", pid, consoleErr) + } + log.Infof("successfully attached console with PID: %d", pid) + + defer func() { + r1, _, consoleErr = freeConsoleProc.Call() + if r1 == 0 { + log.Errorf("error detaching from console: %s", consoleErr) + } else { + log.Infof("successfully detached from console of PID: %d", pid) + } + }() + + if list, consoleProcessListErr := GetConsoleProcessList(); consoleProcessListErr != nil { + log.Errorf("error listing console processes: %s", consoleProcessListErr) + } else { + log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) + } + + // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that + // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us + // by specifying processGroupID=0 + killProcErr := windows.GenerateConsoleCtrlEvent(windows.CTRL_BREAK_EVENT, uint32(pid)) + + if killProcErr != nil { + return fmt.Errorf("error signaling process with PID: %d: %w", pid, killProcErr) + } + + return nil +} diff --git a/internal/pkg/agent/cmd/watch_impl_windows.go b/internal/pkg/agent/cmd/watch_impl_windows.go index 0dfff30837e..0cc25c84b3e 100644 --- a/internal/pkg/agent/cmd/watch_impl_windows.go +++ b/internal/pkg/agent/cmd/watch_impl_windows.go @@ -23,9 +23,12 @@ var ( freeConsoleProc = kernel32API.NewProc("FreeConsole") attachConsoleProc = kernel32API.NewProc("AttachConsole") procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") + procSetConsoleHandler = kernel32API.NewProc("SetConsoleCtrlHandler") + allocConsoleProc = kernel32API.NewProc("AllocConsole") ) func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() if err != nil { return fmt.Errorf("error listing watcher processes: %s", err) From 98727ac22b212f9f575d63e8e140a6ad6448f6e2 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 11 Aug 2025 08:59:42 +0200 Subject: [PATCH 24/38] WIP use in-process grappler --- .../pkg/agent/application/upgrade/rollback.go | 20 +++------ .../upgrade/rollback_notwindows.go | 26 +++++++++++ .../application/upgrade/rollback_windows.go | 44 +++++++++++++++++++ .../integration/ess/upgrade_rollback_test.go | 3 +- 4 files changed, 79 insertions(+), 14 deletions(-) create mode 100644 internal/pkg/agent/application/upgrade/rollback_notwindows.go diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 20020614a4b..c8167a10c6e 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -149,29 +149,23 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error log.Info("agent is not upgradable, not starting watcher") return nil, nil } - - cmd := invokeCmd(agentExecutable) - log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) - if err := cmd.Start(); err != nil { - return nil, fmt.Errorf("failed to start Upgrade Watcher: %w", err) + // invokeWatcherCmd and StartWatcherCmd are platform-specific functions dealing with process launching details. + cmd, err := StartWatcherCmd(log, func() *exec.Cmd { return invokeWatcherCmd(agentExecutable) }) + if err != nil { + return nil, fmt.Errorf("starting watcher process: %w", err) } upgradeWatcherPID := cmd.Process.Pid agentPID := os.Getpid() - - go func() { - if err := cmd.Wait(); err != nil { - log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) - } - }() - log.Infow("Upgrade Watcher invoked", "agent.upgrade.watcher.process.pid", upgradeWatcherPID, "agent.process.pid", agentPID) return cmd, nil } -func invokeCmd(agentExecutable string) *exec.Cmd { +type cmdFactory func() *exec.Cmd + +func invokeWatcherCmd(agentExecutable string) *exec.Cmd { return InvokeCmdWithArgs( agentExecutable, watcherSubcommand, diff --git a/internal/pkg/agent/application/upgrade/rollback_notwindows.go b/internal/pkg/agent/application/upgrade/rollback_notwindows.go new file mode 100644 index 00000000000..5e9b3f4f3ba --- /dev/null +++ b/internal/pkg/agent/application/upgrade/rollback_notwindows.go @@ -0,0 +1,26 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build !windows + +package upgrade + +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory) (*exec.Cmd, error) { + cmd := createCmd() + log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start Upgrade Watcher: %w", err) + } + + upgradeWatcherPID := cmd.Process.Pid + agentPID := os.Getpid() + + go func() { + if err := cmd.Wait(); err != nil { + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) + } + }() + + return cmd, nil +} diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index eb7056bf959..9a1e494b146 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -7,11 +7,16 @@ package upgrade import ( + "errors" + "fmt" + "os" "os/exec" "syscall" "time" "golang.org/x/sys/windows" + + "github.com/elastic/elastic-agent/pkg/core/logger" ) const ( @@ -43,3 +48,42 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { } return cmd } + +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory) (*exec.Cmd, error) { + // allocConsole + r1, _, consoleErr := allocConsoleProc.Call() + if r1 == 0 { + if !errors.Is(consoleErr, windows.ERROR_ACCESS_DENIED) { + return nil, fmt.Errorf("error allocating console: %w", consoleErr) + } else { + log.Warnf("Already possessing a console") + } + + } + cmd := createCmd() + log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start Upgrade Watcher: %w", err) + } + list, consoleErr := GetConsoleProcessList() + if consoleErr != nil { + log.Errorf("failed to get console process list: %v", consoleErr) + } else { + log.Infof("Found console processes %v", list) + } + // free console + r1, _, consoleErr = freeConsoleProc.Call() + if r1 == 0 { + return nil, fmt.Errorf("error freeing console: %w", consoleErr) + } + upgradeWatcherPID := cmd.Process.Pid + agentPID := os.Getpid() + + go func() { + if err := cmd.Wait(); err != nil { + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) + } + }() + + return cmd, nil +} diff --git a/testing/integration/ess/upgrade_rollback_test.go b/testing/integration/ess/upgrade_rollback_test.go index e6e7045402b..fc2d64664f4 100644 --- a/testing/integration/ess/upgrade_rollback_test.go +++ b/testing/integration/ess/upgrade_rollback_test.go @@ -528,7 +528,8 @@ func standaloneRollbackTest(ctx context.Context, t *testing.T, startFixture *ate ctx, startFixture, endFixture, t, upgradetest.WithPostUpgradeHook(postUpgradeHook), upgradetest.WithCustomWatcherConfig(customConfig), - upgradetest.WithDisableHashCheck(true)) + upgradetest.WithDisableHashCheck(true), + ) if !errors.Is(err, ErrPostExit) { require.NoError(t, err) } From e7e7d4921910a73aae0a59b1cba54cf92007ce15 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 11 Aug 2025 11:31:07 +0200 Subject: [PATCH 25/38] remove in-process grappler in favor of commandGrappler --- .../pkg/agent/application/upgrade/rollback.go | 2 +- .../upgrade/rollback_notwindows.go | 23 ++- .../application/upgrade/rollback_windows.go | 2 +- .../pkg/agent/application/upgrade/watcher.go | 9 +- .../application/upgrade/watcher_notwindows.go | 2 +- .../agent/application/upgrade/watcher_test.go | 189 ++++++++++++++++++ .../application/upgrade/watcher_windows.go | 3 +- internal/pkg/agent/cmd/watch.go | 2 +- internal/pkg/agent/cmd/watch_impl.go | 4 - .../pkg/agent/cmd/watch_impl_notwindows.go | 47 ----- internal/pkg/agent/cmd/watch_impl_test.go | 178 ----------------- internal/pkg/agent/cmd/watch_impl_windows.go | 110 ---------- 12 files changed, 211 insertions(+), 360 deletions(-) delete mode 100644 internal/pkg/agent/cmd/watch_impl_notwindows.go delete mode 100644 internal/pkg/agent/cmd/watch_impl_windows.go diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index c8167a10c6e..727ff43e9b6 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -150,7 +150,7 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error return nil, nil } // invokeWatcherCmd and StartWatcherCmd are platform-specific functions dealing with process launching details. - cmd, err := StartWatcherCmd(log, func() *exec.Cmd { return invokeWatcherCmd(agentExecutable) }) + cmd, err := StartWatcherCmd(log, func() *exec.Cmd { return invokeWatcherCmd(agentExecutable) }, true) if err != nil { return nil, fmt.Errorf("starting watcher process: %w", err) } diff --git a/internal/pkg/agent/application/upgrade/rollback_notwindows.go b/internal/pkg/agent/application/upgrade/rollback_notwindows.go index 5e9b3f4f3ba..cb670fcbd5a 100644 --- a/internal/pkg/agent/application/upgrade/rollback_notwindows.go +++ b/internal/pkg/agent/application/upgrade/rollback_notwindows.go @@ -6,7 +6,15 @@ package upgrade -func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory) (*exec.Cmd, error) { +import ( + "fmt" + "os" + "os/exec" + + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec.Cmd, error) { cmd := createCmd() log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) if err := cmd.Start(); err != nil { @@ -16,11 +24,12 @@ func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory) (*exec.Cmd, error upgradeWatcherPID := cmd.Process.Pid agentPID := os.Getpid() - go func() { - if err := cmd.Wait(); err != nil { - log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) - } - }() - + if wait { + go func() { + if err := cmd.Wait(); err != nil { + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) + } + }() + } return cmd, nil } diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index 9a1e494b146..5452bdd7a5c 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -49,7 +49,7 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { return cmd } -func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory) (*exec.Cmd, error) { +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec.Cmd, error) { // allocConsole r1, _, consoleErr := allocConsoleProc.Call() if r1 == 0 { diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 3469185477c..26f787fc3e4 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -19,7 +19,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" - "github.com/elastic/elastic-agent/pkg/utils" ) const ( @@ -284,7 +283,7 @@ func (a AgentWatcherHelper) WaitForWatcher(ctx context.Context, log *logger.Logg } func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string) (*filelock.AppLocker, error) { - return takeOverWatcher(ctx, log, new(inProcessWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) + return takeOverWatcher(ctx, log, new(commandWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) } // watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. @@ -351,12 +350,6 @@ func takeOverWatcher(ctx context.Context, log *logger.Logger, watcherGrappler wa } } -type inProcessWatcherGrappler struct{} - -func (i inProcessWatcherGrappler) TakeDownWatcher(ctx context.Context, log *logger.Logger) error { - return takedownWatcher(ctx, log, utils.GetWatcherPIDs) -} - func selectWatcherExecutable(topDir string, previous agentInstall, current agentInstall) string { // check if the upgraded version is less than the previous (currently installed) version if current.parsedVersion.Less(*previous.parsedVersion) { diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go index af4da8a56b7..02f43cb2d3d 100644 --- a/internal/pkg/agent/application/upgrade/watcher_notwindows.go +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -30,7 +30,7 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { return cmd } -func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { +func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { pids, err := pidFetchFunc() if err != nil { return fmt.Errorf("error listing watcher processes: %s", err) diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 1a12fc2e844..dc7fa2df7a1 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -9,7 +9,9 @@ import ( "fmt" "net" "os" + "os/exec" "path/filepath" + "runtime" "sync" "testing" "time" @@ -26,6 +28,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" agtversion "github.com/elastic/elastic-agent/pkg/version" ) @@ -990,3 +993,189 @@ func TestTakeOverWatcher(t *testing.T) { } } + +func Test_takedownWatcher(t *testing.T) { + + const applockerFileName = "mocklocker.lock" + + testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") + if runtime.GOOS == "windows" { + testExecutablePath += ".exe" + } + testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) + require.NoError(t, err, "error calculating absolute test executable part") + + require.FileExists(t, testExecutableAbsolutePath, + "testlocker binary not found.\n"+ + "Check that:\n"+ + "- test binaries have been built with mage build:testbinaries\n"+ + "- the path of the executable is correct") + + returnCmdPIDsFetcher := func(cmds ...*exec.Cmd) watcherPIDsFetcher { + return func() ([]int, error) { + pids := make([]int, 0, len(cmds)) + for _, c := range cmds { + if c.Process != nil { + pids = append(pids, c.Process.Pid) + } + } + + return pids, nil + } + } + + type setupFunc func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) + type assertFunc func(t *testing.T, workdir string, cmds []*exec.Cmd) + + tests := []struct { + name string + setup setupFunc + wantErr assert.ErrorAssertionFunc + assertPostTakedown assertFunc + }{ + { + name: "no contention for watcher applocker", + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + // nothing to do here, always return and empty list of pids + return func() ([]int, error) { + return nil, nil + }, nil + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, _ []*exec.Cmd) { + // we should be able to lock, no problem + locker := filelock.NewAppLocker(workdir, applockerFileName) + lockError := locker.TryLock() + t.Cleanup(func() { + _ = locker.Unlock() + }) + + assert.NoError(t, lockError) + + }, + }, + { + name: "contention with test binary listening to signals: test binary is terminated gracefully", + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + cmd := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, false) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") + + go func() { + // reap the child process + waitErr := testlockerProcess.Wait() + assert.NoError(t, waitErr, "error waiting for test locker process to exit") + }() + + require.EventuallyWithT(t, func(t *assert.CollectT) { + require.NotNil(t, testlockerProcess.ProcessState, "test locker process should have completed and process state set") + assert.True(t, testlockerProcess.ProcessState.Exited(), "test locker process should have terminated") + assert.Equal(t, 0, testlockerProcess.ProcessState.ExitCode(), "test locker process should have a successful exit status") + }, 30*time.Second, 100*time.Millisecond, "test locker process should have exited gracefully") + + assert.FileExists(t, filepath.Join(workdir, applockerFileName)) + testApplocker := filelock.NewAppLocker(workdir, applockerFileName) + testApplockerError := testApplocker.TryLock() + t.Cleanup(func() { + _ = testApplocker.Unlock() + }) + assert.NoError(t, testApplockerError, "error locking applocker") + }, + }, + { + name: "contention with test binary not listening to signals: test binary is not terminated", + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + cmd := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, true) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") + + go func() { + // reap the child process + waitErr := testlockerProcess.Wait() + assert.Error(t, waitErr, "waiting for testlocker process should return error") + }() + + // check that the process is still running for a time + assert.Never(t, func() bool { + return testlockerProcess.ProcessState != nil && testlockerProcess.ProcessState.Exited() + }, 1*time.Second, 100*time.Millisecond, "test locker process should still be running for some time") + + err = testlockerProcess.Process.Kill() + assert.NoError(t, err, "error killing testlocker process") + + if assert.Nil(t, testlockerProcess.ProcessState, "test locker process should have been terminated") { + assert.NotEqual(t, 0, testlockerProcess.ProcessState.ExitCode(), "test locker process shouldnot return a successful exit code") + } + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + workDir := t.TempDir() + log, obsLogs := loggertest.New(t.Name()) + pidFetcher, processInfos := tc.setup(t, log, workDir) + tc.wantErr(t, TakedownWatcher(t.Context(), log, pidFetcher)) + t.Logf("test logs: %v", obsLogs) + if tc.assertPostTakedown != nil { + tc.assertPostTakedown(t, workDir, processInfos) + } + }) + } +} + +func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName string, testExecutablePath string, workdir string, ignoreSignals bool) *exec.Cmd { + + args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} + if ignoreSignals { + args = append(args, "-ignoresignals") + } + + // use the same invoke as the one used to launch a watcher + watcherCmd, err := StartWatcherCmd( + log, + func() *exec.Cmd { + cmd := InvokeCmdWithArgs(testExecutablePath, args...) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd + }, + false, + ) + + require.NoError(t, err, "error starting testlocker binary") + return watcherCmd +} diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index 92115dcecdf..7ce2e76b8a0 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -27,7 +27,6 @@ var ( freeConsoleProc = kernel32API.NewProc("FreeConsole") attachConsoleProc = kernel32API.NewProc("AttachConsole") procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") - procSetConsoleHandler = kernel32API.NewProc("SetConsoleCtrlHandler") allocConsoleProc = kernel32API.NewProc("AllocConsole") ) @@ -47,7 +46,7 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { return cmd } -func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { +func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { pids, err := pidFetchFunc() if err != nil { return fmt.Errorf("error listing watcher processes: %s", err) diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index a1c8338dfb0..6b6155b37b7 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -56,7 +56,7 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command takedown, _ := c.Flags().GetBool("takedown") if takedown { - err = takedownWatcher(log, utils.GetWatcherPIDs) + err = upgrade.TakedownWatcher(context.Background(), log, utils.GetWatcherPIDs) if err != nil { log.Errorf("error taking down watcher: %v", err) os.Exit(5) diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index 5e6294c8bec..56c9565f44c 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -33,10 +33,6 @@ func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.L return upgrade.Rollback(ctx, log, c, topDirPath, prevVersionedHome, prevHash) } -// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. -// This will allow for easier testing of takeOverWatcher using fake binaries -type watcherPIDsFetcher func() ([]int, error) - func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logger.Logger) error { errChan := make(chan error) diff --git a/internal/pkg/agent/cmd/watch_impl_notwindows.go b/internal/pkg/agent/cmd/watch_impl_notwindows.go deleted file mode 100644 index 5295a7de94b..00000000000 --- a/internal/pkg/agent/cmd/watch_impl_notwindows.go +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License 2.0; -// you may not use this file except in compliance with the Elastic License 2.0. - -//go:build !windows - -package cmd - -import ( - "fmt" - "os" - "syscall" - - "github.com/elastic/elastic-agent/pkg/core/logger" -) - -func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { - pids, err := pidFetchFunc() - if err != nil { - return fmt.Errorf("error listing watcher processes: %s", err) - } - - ownPID := os.Getpid() - - for _, pid := range pids { - - if pid == ownPID { - continue - } - - log.Debugf("attempting to terminate watcher process with PID: %d", pid) - - process, err := os.FindProcess(pid) - if err != nil { - log.Errorf("error finding watcher process with PID: %d: %s", pid, err) - continue - } - - err = process.Signal(syscall.SIGTERM) - if err != nil { - log.Errorf("error killing watcher process with PID: %d: %s", pid, err) - continue - } - - } - return nil -} diff --git a/internal/pkg/agent/cmd/watch_impl_test.go b/internal/pkg/agent/cmd/watch_impl_test.go index 4cc9587db5a..d9537b58c92 100644 --- a/internal/pkg/agent/cmd/watch_impl_test.go +++ b/internal/pkg/agent/cmd/watch_impl_test.go @@ -8,9 +8,6 @@ import ( "context" "fmt" "os" - "os/exec" - "path/filepath" - "runtime" "syscall" "testing" "time" @@ -18,13 +15,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" ) -const applockerFileName = "mocklocker.lock" - func Test_watchLoop(t *testing.T) { t.Run("watchloop returns when context expires - no error", func(t *testing.T) { @@ -78,174 +71,3 @@ func Test_watchLoop(t *testing.T) { } }) } - -func Test_takedownWatcher(t *testing.T) { - - testExecutablePath := filepath.Join("..", "application", "filelock", "testlocker", "testlocker") - if runtime.GOOS == "windows" { - testExecutablePath += ".exe" - } - testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) - require.NoError(t, err, "error calculating absolute test executable part") - - require.FileExists(t, testExecutableAbsolutePath, - "testlocker binary not found.\n"+ - "Check that:\n"+ - "- test binaries have been built with mage build:testbinaries\n"+ - "- the path of the executable is correct") - - returnCmdPIDsFetcher := func(cmds ...*exec.Cmd) watcherPIDsFetcher { - return func() ([]int, error) { - pids := make([]int, 0, len(cmds)) - for _, c := range cmds { - if c.Process != nil { - pids = append(pids, c.Process.Pid) - } - } - - return pids, nil - } - } - - type setupFunc func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) - type assertFunc func(t *testing.T, workdir string, cmds []*exec.Cmd) - - tests := []struct { - name string - setup setupFunc - wantErr assert.ErrorAssertionFunc - assertPostTakedown assertFunc - }{ - { - name: "no contention for watcher applocker", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { - // nothing to do here, always return and empty list of pids - return func() ([]int, error) { - return nil, nil - }, nil - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, _ []*exec.Cmd) { - // we should be able to lock, no problem - locker := filelock.NewAppLocker(workdir, applockerFileName) - lockError := locker.TryLock() - t.Cleanup(func() { - _ = locker.Unlock() - }) - - assert.NoError(t, lockError) - - }, - }, - { - name: "contention with test binary listening to signals: test binary is terminated gracefully", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { - cmd := createTestlockerCommand(t, testExecutableAbsolutePath, workdir, false) - require.NoError(t, err, "error starting testlocker binary") - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { - - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") - - err = testlockerProcess.Wait() - assert.NoError(t, err, "error waiting for testlocker process to terminate") - - if assert.NotNil(t, testlockerProcess.ProcessState, "test locker process should have completed and process state set") { - assert.True(t, testlockerProcess.ProcessState.Success(), "test locker process should be successful") - } - - assert.FileExists(t, filepath.Join(workdir, applockerFileName)) - testApplocker := filelock.NewAppLocker(workdir, applockerFileName) - testApplockerError := testApplocker.TryLock() - t.Cleanup(func() { - _ = testApplocker.Unlock() - }) - assert.NoError(t, testApplockerError, "error locking applocker") - }, - }, - { - name: "contention with test binary not listening to signals: test binary is not terminated", - setup: func(t *testing.T, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { - cmd := createTestlockerCommand(t, testExecutableAbsolutePath, workdir, true) - require.NoError(t, err, "error starting testlocker binary") - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { - - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") - - // check that the process is still running - assert.Nil(t, testlockerProcess.ProcessState, "testlocker process should not have ProcessState set since it should still be running") - assert.NotNil(t, testlockerProcess.Process, "testlocker process should have an os.Process set") - process, findProcessErr := os.FindProcess(testlockerProcess.Process.Pid) - require.NoErrorf(t, findProcessErr, "error finding test process with pid %d", testlockerProcess.Process.Pid) - require.NotNil(t, process, "test process should be found among the running processes") - if runtime.GOOS != "windows" { - // for unix systems we need an additional check since FindProcess will always return a *os.Process. - // Poke it with a stick (signal) - // see https://pkg.go.dev/os#FindProcess - signalErr := process.Signal(syscall.Signal(0)) - require.NoError(t, signalErr, "error signaling test process: this means it's not running") - } - err := testlockerProcess.Process.Kill() - assert.NoError(t, err, "error killing testlocker process") - assert.Nil(t, testlockerProcess.ProcessState, "testlocker process should not have ProcessState set since it should still be running") - }, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - workDir := t.TempDir() - log, obsLogs := loggertest.New(t.Name()) - pidFetcher, processInfos := tc.setup(t, workDir) - tc.wantErr(t, takedownWatcher(log, pidFetcher)) - t.Logf("takedown logs: %v", obsLogs) - if tc.assertPostTakedown != nil { - tc.assertPostTakedown(t, workDir, processInfos) - } - }) - } -} - -func createTestlockerCommand(t *testing.T, testExecutablePath string, workdir string, ignoreSignals bool) *exec.Cmd { - args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} - if ignoreSignals { - args = append(args, "-ignoresignals") - } - - // use the same invoke as the one used to launch a watcher - cmd := upgrade.InvokeCmdWithArgs(testExecutablePath, args...) - - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - - err := cmd.Start() - require.NoError(t, err, "error starting testlocker binary") - return cmd -} diff --git a/internal/pkg/agent/cmd/watch_impl_windows.go b/internal/pkg/agent/cmd/watch_impl_windows.go deleted file mode 100644 index 0cc25c84b3e..00000000000 --- a/internal/pkg/agent/cmd/watch_impl_windows.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License 2.0; -// you may not use this file except in compliance with the Elastic License 2.0. - -//go:build windows - -package cmd - -import ( - "errors" - "fmt" - "os" - "unsafe" - - gowindows "golang.org/x/sys/windows" - - "github.com/elastic/elastic-agent/pkg/core/logger" -) - -var ( - kernel32API = gowindows.NewLazySystemDLL("kernel32.dll") - - freeConsoleProc = kernel32API.NewProc("FreeConsole") - attachConsoleProc = kernel32API.NewProc("AttachConsole") - procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") - procSetConsoleHandler = kernel32API.NewProc("SetConsoleCtrlHandler") - allocConsoleProc = kernel32API.NewProc("AllocConsole") -) - -func takedownWatcher(log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { - - pids, err := pidFetchFunc() - if err != nil { - return fmt.Errorf("error listing watcher processes: %s", err) - } - - ownPID := os.Getpid() - - var accumulatedSignalingErrors error - for _, pid := range pids { - - if pid == ownPID { - continue - } - - log.Debugf("attempting to terminate watcher process with PID: %d", pid) - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, signalPID(log, pid)) - } - - return accumulatedSignalingErrors -} - -// GetConsoleProcessList retrieves the list of process IDs attached to the current console -func GetConsoleProcessList() ([]uint32, error) { - // Allocate a buffer for PIDs - const maxProcs = 64 - pids := make([]uint32, maxProcs) - - r1, _, err := procGetConsoleProcessList.Call( - uintptr(unsafe.Pointer(&pids[0])), - uintptr(maxProcs), - ) - - count := uint32(r1) - if count == 0 { - return nil, err - } - - return pids[:count], nil -} - -// signalPID takes care of signaling a given PID. It also leverages defer() for freeing console and other housekeeping -func signalPID(log *logger.Logger, pid int) error { - r1, _, consoleErr := freeConsoleProc.Call() - if r1 == 0 { - log.Warnf("error preemptively detaching from console: %s", consoleErr) - } - - r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) - if r1 == 0 { - return fmt.Errorf("error attaching console to watcher process with PID %d: %w", pid, consoleErr) - } - log.Infof("successfully attached console with PID: %d", pid) - - defer func() { - r1, _, consoleErr = freeConsoleProc.Call() - if r1 == 0 { - log.Errorf("error detaching from console: %s", consoleErr) - } else { - log.Infof("successfully detached from console of PID: %d", pid) - } - }() - - if list, consoleProcessListErr := GetConsoleProcessList(); consoleProcessListErr != nil { - log.Errorf("error listing console processes: %s", consoleProcessListErr) - } else { - log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) - } - - // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that - // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us - // by specifying processGroupID=0 - killProcErr := gowindows.GenerateConsoleCtrlEvent(gowindows.CTRL_BREAK_EVENT, uint32(pid)) - - if killProcErr != nil { - return fmt.Errorf("error signaling process with PID: %d: %w", pid, killProcErr) - } - - return nil -} From c4f24c067f889a0489e6d6981295414ccd071412 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Mon, 11 Aug 2025 18:04:09 +0200 Subject: [PATCH 26/38] Allow watcher to listen to signals only during watch loop --- internal/pkg/agent/cmd/watch.go | 6 ++++++ internal/pkg/agent/cmd/watch_impl.go | 2 ++ 2 files changed, 8 insertions(+) diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 6b6155b37b7..77203c8bcb1 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "os" + "os/signal" "runtime" "time" @@ -44,6 +45,11 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command Short: "Watch the Elastic Agent for failures and initiate rollback", Long: `This command watches Elastic Agent for failures and initiates rollback if necessary.`, Run: func(c *cobra.Command, _ []string) { + + // Initially ignore all signals + ignoredSignalsChannel := make(chan os.Signal, 1) + signal.Notify(ignoredSignalsChannel) + cfg := getConfig(streams) log, err := configuredLogger(cfg, watcherName) if err != nil { diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index 56c9565f44c..baf2270a73e 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -47,8 +47,10 @@ func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time. agtWatcher := upgrade.NewAgentWatcher(errChan, log, errorCheckInterval) go agtWatcher.Run(ctx) + // Allow for signals to interrupt the watch signals := make(chan os.Signal, 1) signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) + defer signal.Stop(signals) graceTimer := time.NewTimer(tilGrace) defer graceTimer.Stop() From 05778eb913cc3bcd6945b495f1b83819d199f6b4 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 12 Aug 2025 13:44:14 +0200 Subject: [PATCH 27/38] Add postWatchHook to watcher process start to keep race detector happy --- .../pkg/agent/application/upgrade/rollback.go | 23 ++- .../upgrade/rollback_notwindows.go | 20 +-- .../application/upgrade/rollback_windows.go | 8 +- .../agent/application/upgrade/watcher_test.go | 131 ++++++++++++------ 4 files changed, 128 insertions(+), 54 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 727ff43e9b6..7cb1ffeb030 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -150,7 +150,7 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error return nil, nil } // invokeWatcherCmd and StartWatcherCmd are platform-specific functions dealing with process launching details. - cmd, err := StartWatcherCmd(log, func() *exec.Cmd { return invokeWatcherCmd(agentExecutable) }, true) + cmd, err := StartWatcherCmd(log, func() *exec.Cmd { return invokeWatcherCmd(agentExecutable) }) if err != nil { return nil, fmt.Errorf("starting watcher process: %w", err) } @@ -163,6 +163,27 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error } +type WatcherInvocationOpt func(opts *watcherInvocationOptions) +type watcherHook func() + +type watcherInvocationOptions struct { + postWatchHook watcherHook +} + +func WithWatcherPostWaitHook(h watcherHook) WatcherInvocationOpt { + return func(opts *watcherInvocationOptions) { + opts.postWatchHook = h + } +} + +func applyWatcherInvocationOpts(opts ...WatcherInvocationOpt) *watcherInvocationOptions { + invocationOpts := new(watcherInvocationOptions) + for _, opt := range opts { + opt(invocationOpts) + } + return invocationOpts +} + type cmdFactory func() *exec.Cmd func invokeWatcherCmd(agentExecutable string) *exec.Cmd { diff --git a/internal/pkg/agent/application/upgrade/rollback_notwindows.go b/internal/pkg/agent/application/upgrade/rollback_notwindows.go index cb670fcbd5a..e73d85e818f 100644 --- a/internal/pkg/agent/application/upgrade/rollback_notwindows.go +++ b/internal/pkg/agent/application/upgrade/rollback_notwindows.go @@ -14,7 +14,10 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger" ) -func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec.Cmd, error) { +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, opts ...WatcherInvocationOpt) (*exec.Cmd, error) { + + invocationOpts := applyWatcherInvocationOpts(opts...) + cmd := createCmd() log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) if err := cmd.Start(); err != nil { @@ -24,12 +27,13 @@ func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec upgradeWatcherPID := cmd.Process.Pid agentPID := os.Getpid() - if wait { - go func() { - if err := cmd.Wait(); err != nil { - log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) - } - }() - } + go func() { + if err := cmd.Wait(); err != nil { + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) + } + if invocationOpts.postWatchHook != nil { + invocationOpts.postWatchHook() + } + }() return cmd, nil } diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index 5452bdd7a5c..adc816766fa 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -49,7 +49,10 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { return cmd } -func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec.Cmd, error) { +func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, opts ...WatcherInvocationOpt) (*exec.Cmd, error) { + + invocationOpts := applyWatcherInvocationOpts(opts...) + // allocConsole r1, _, consoleErr := allocConsoleProc.Call() if r1 == 0 { @@ -83,6 +86,9 @@ func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, wait bool) (*exec if err := cmd.Wait(); err != nil { log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", agentPID, "agent.process.pid", upgradeWatcherPID, "error.message", err) } + if invocationOpts.postWatchHook != nil { + invocationOpts.postWatchHook() + } }() return cmd, nil diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index dc7fa2df7a1..6ecf0107f68 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -13,6 +13,7 @@ import ( "path/filepath" "runtime" "sync" + "syscall" "testing" "time" @@ -1024,8 +1025,15 @@ func Test_takedownWatcher(t *testing.T) { } } - type setupFunc func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) - type assertFunc func(t *testing.T, workdir string, cmds []*exec.Cmd) + // create a struct with a *exec.Cmd and a channel that will be closed when Wait() returns for the exec.Cmd + // this should keep the data race detector happy. + type testProcess struct { + cmd *exec.Cmd + waitChan chan struct{} + } + + type setupFunc func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) + type assertFunc func(t *testing.T, workdir string, cmds []testProcess) tests := []struct { name string @@ -1035,14 +1043,14 @@ func Test_takedownWatcher(t *testing.T) { }{ { name: "no contention for watcher applocker", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { // nothing to do here, always return and empty list of pids return func() ([]int, error) { return nil, nil }, nil }, wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, _ []*exec.Cmd) { + assertPostTakedown: func(t *testing.T, workdir string, _ []testProcess) { // we should be able to lock, no problem locker := filelock.NewAppLocker(workdir, applockerFileName) lockError := locker.TryLock() @@ -1056,8 +1064,9 @@ func Test_takedownWatcher(t *testing.T) { }, { name: "contention with test binary listening to signals: test binary is terminated gracefully", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { - cmd := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, false) + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { + + cmd, testChan := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, false) require.NoError(t, err, "error starting testlocker binary") // wait for test binary to acquire lock @@ -1068,26 +1077,28 @@ func Test_takedownWatcher(t *testing.T) { t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: testChan}} }, wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { assert.Len(t, cmds, 1) testlockerProcess := cmds[0] require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") - go func() { - // reap the child process - waitErr := testlockerProcess.Wait() - assert.NoError(t, waitErr, "error waiting for test locker process to exit") - }() + require.Eventually(t, func() bool { + running, checkErr := isProcessRunning(testlockerProcess.cmd) + if checkErr != nil { + t.Logf("error checking for testlocker process running: %s", checkErr.Error()) + return false + } + return !running + }, 30*time.Second, 100*time.Millisecond, "test locker process should have exited") + + <-testlockerProcess.waitChan - require.EventuallyWithT(t, func(t *assert.CollectT) { - require.NotNil(t, testlockerProcess.ProcessState, "test locker process should have completed and process state set") - assert.True(t, testlockerProcess.ProcessState.Exited(), "test locker process should have terminated") - assert.Equal(t, 0, testlockerProcess.ProcessState.ExitCode(), "test locker process should have a successful exit status") - }, 30*time.Second, 100*time.Millisecond, "test locker process should have exited gracefully") + assert.True(t, testlockerProcess.cmd.ProcessState.Exited(), "test locker process should have terminated") + assert.Equal(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should have a successful exit status") assert.FileExists(t, filepath.Join(workdir, applockerFileName)) testApplocker := filelock.NewAppLocker(workdir, applockerFileName) @@ -1100,8 +1111,8 @@ func Test_takedownWatcher(t *testing.T) { }, { name: "contention with test binary not listening to signals: test binary is not terminated", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []*exec.Cmd) { - cmd := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, true) + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { + cmd, waitChan := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, true) require.NoError(t, err, "error starting testlocker binary") // wait for test binary to acquire lock @@ -1112,31 +1123,33 @@ func Test_takedownWatcher(t *testing.T) { t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - return returnCmdPIDsFetcher(cmd), []*exec.Cmd{cmd} + return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: waitChan}} }, wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []*exec.Cmd) { + assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { assert.Len(t, cmds, 1) testlockerProcess := cmds[0] require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") - go func() { - // reap the child process - waitErr := testlockerProcess.Wait() - assert.Error(t, waitErr, "waiting for testlocker process should return error") - }() - // check that the process is still running for a time assert.Never(t, func() bool { - return testlockerProcess.ProcessState != nil && testlockerProcess.ProcessState.Exited() + running, checkErr := isProcessRunning(testlockerProcess.cmd) + if checkErr != nil { + t.Logf("error checking for testlocker process running: %s", checkErr.Error()) + return false + } + return !running }, 1*time.Second, 100*time.Millisecond, "test locker process should still be running for some time") - err = testlockerProcess.Process.Kill() + // Kill the process explicitly + err = testlockerProcess.cmd.Process.Kill() assert.NoError(t, err, "error killing testlocker process") - if assert.Nil(t, testlockerProcess.ProcessState, "test locker process should have been terminated") { - assert.NotEqual(t, 0, testlockerProcess.ProcessState.ExitCode(), "test locker process shouldnot return a successful exit code") + <-testlockerProcess.waitChan + + if assert.NotNil(t, testlockerProcess.cmd.ProcessState, "test locker process should have been terminated") { + assert.NotEqual(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process shouldnot return a successful exit code") } }, }, @@ -1156,7 +1169,9 @@ func Test_takedownWatcher(t *testing.T) { } } -func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName string, testExecutablePath string, workdir string, ignoreSignals bool) *exec.Cmd { +func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName string, testExecutablePath string, workdir string, ignoreSignals bool) (*exec.Cmd, chan struct{}) { + + watchTerminated := make(chan struct{}) args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} if ignoreSignals { @@ -1164,18 +1179,46 @@ func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName } // use the same invoke as the one used to launch a watcher - watcherCmd, err := StartWatcherCmd( - log, - func() *exec.Cmd { - cmd := InvokeCmdWithArgs(testExecutablePath, args...) - - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd - }, - false, + watcherCmd, err := StartWatcherCmd(log, func() *exec.Cmd { + cmd := InvokeCmdWithArgs(testExecutablePath, args...) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd + }, + WithWatcherPostWaitHook(func() { + close(watchTerminated) + }), ) require.NoError(t, err, "error starting testlocker binary") - return watcherCmd + return watcherCmd, watchTerminated +} + +func isProcessRunning(cmd *exec.Cmd) (bool, error) { + if cmd.Process == nil { + return false, nil + } + + // search for the pid on the running processes + process, err := os.FindProcess(cmd.Process.Pid) + if err != nil { + return false, err + } + + if process == nil { + return false, nil + } + // if process is not nil we need to split between unix and non-unix OSes + if runtime.GOOS == "windows" { + return true, nil + } else { + // on unix system we always get a process back, we need to do some further checks + signalErr := cmd.Process.Signal(syscall.Signal(0)) + if signalErr != nil { + return false, nil + } else { + return true, nil + } + } } From 7e18b868df93884b30794ff8232ee2e52c6a8dbd Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 12 Aug 2025 15:03:02 +0200 Subject: [PATCH 28/38] fix lint errors --- .../pkg/agent/application/upgrade/rollback_darwin.go | 4 ++-- .../pkg/agent/application/upgrade/rollback_linux.go | 4 ++-- .../agent/application/upgrade/watcher_notwindows.go | 6 +++--- internal/pkg/agent/application/upgrade/watcher_test.go | 10 +++++----- .../pkg/agent/application/upgrade/watcher_windows.go | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback_darwin.go b/internal/pkg/agent/application/upgrade/rollback_darwin.go index ca40a58d4d1..00b4b8cde56 100644 --- a/internal/pkg/agent/application/upgrade/rollback_darwin.go +++ b/internal/pkg/agent/application/upgrade/rollback_darwin.go @@ -24,8 +24,8 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { cmd := exec.Command(executable, args...) var cred = &syscall.Credential{ - Uid: uint32(os.Getuid()), - Gid: uint32(os.Getgid()), + Uid: uint32(os.Getuid()), //nolint:gosec // int -> uint32 no overflow is possible since os.Getuid() should return a value compatible with uint32 + Gid: uint32(os.Getgid()), //nolint:gosec // int -> uint32 no overflow is possible since os.Getgid() should return a value compatible with uint32 Groups: nil, NoSetGroups: true, } diff --git a/internal/pkg/agent/application/upgrade/rollback_linux.go b/internal/pkg/agent/application/upgrade/rollback_linux.go index 694ab86d62f..602de94ceb7 100644 --- a/internal/pkg/agent/application/upgrade/rollback_linux.go +++ b/internal/pkg/agent/application/upgrade/rollback_linux.go @@ -24,8 +24,8 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { cmd := exec.Command(executable, args...) var cred = &syscall.Credential{ - Uid: uint32(os.Getuid()), - Gid: uint32(os.Getgid()), + Uid: uint32(os.Getuid()), //nolint:gosec // int -> uint32 no overflow is possible since os.Getuid() should return a value compatible with uint32 + Gid: uint32(os.Getgid()), //nolint:gosec // int -> uint32 no overflow is possible since os.Getgid() should return a value compatible with uint32 Groups: nil, NoSetGroups: true, } diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go index 02f43cb2d3d..30b83140f77 100644 --- a/internal/pkg/agent/application/upgrade/watcher_notwindows.go +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -33,7 +33,7 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { pids, err := pidFetchFunc() if err != nil { - return fmt.Errorf("error listing watcher processes: %s", err) + return fmt.Errorf("error listing watcher processes: %w", err) } ownPID := os.Getpid() @@ -52,13 +52,13 @@ func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watch process, err := os.FindProcess(pid) if err != nil { - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error finding watcher process with PID: %d: %s", pid, err)) + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error finding watcher process with PID: %d: %w", pid, err)) continue } err = process.Signal(syscall.SIGTERM) if err != nil { - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error killing watcher process with PID: %d: %s", pid, err)) + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error killing watcher process with PID: %d: %w", pid, err)) continue } diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 6ecf0107f68..6ec28832a0b 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -913,7 +913,7 @@ func TestTakeOverWatcher(t *testing.T) { // add a cleanup to unlock the applocker at the end of the test anyway in case of failures t.Cleanup(func() { - locker.Unlock() + _ = locker.Unlock() }) }, wantErr: assert.NoError, @@ -937,7 +937,7 @@ func TestTakeOverWatcher(t *testing.T) { // add a cleanup to unlock the applocker at the end of the test anyway in case of failures t.Cleanup(func() { - locker.Unlock() + _ = locker.Unlock() }) }, wantErr: assert.NoError, @@ -959,7 +959,7 @@ func TestTakeOverWatcher(t *testing.T) { // add a cleanup to unlock the applocker at the end of the test anyway t.Cleanup(func() { - locker.Unlock() + _ = locker.Unlock() }) }, wantErr: assert.Error, @@ -1149,7 +1149,7 @@ func Test_takedownWatcher(t *testing.T) { <-testlockerProcess.waitChan if assert.NotNil(t, testlockerProcess.cmd.ProcessState, "test locker process should have been terminated") { - assert.NotEqual(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process shouldnot return a successful exit code") + assert.NotEqual(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should not return a successful exit code") } }, }, @@ -1216,7 +1216,7 @@ func isProcessRunning(cmd *exec.Cmd) (bool, error) { // on unix system we always get a process back, we need to do some further checks signalErr := cmd.Process.Signal(syscall.Signal(0)) if signalErr != nil { - return false, nil + return false, nil //nolint:nilerr // if we receive an error it means that the process is not running, so the check completed without errors } else { return true, nil } diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index 7ce2e76b8a0..adbc5fce793 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -49,7 +49,7 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { pids, err := pidFetchFunc() if err != nil { - return fmt.Errorf("error listing watcher processes: %s", err) + return fmt.Errorf("error listing watcher processes: %w", err) } ownPID := os.Getpid() From f1a0a99a31548e95004cfdc4270d3dd9aa945b1d Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 12 Aug 2025 17:03:00 +0200 Subject: [PATCH 29/38] Fix data races in unit tests --- .../application/upgrade/watcher_notwindows.go | 9 ++++ .../agent/application/upgrade/watcher_test.go | 48 ++++++++++--------- .../application/upgrade/watcher_windows.go | 22 +++++++++ 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go index 30b83140f77..4337855eef3 100644 --- a/internal/pkg/agent/application/upgrade/watcher_notwindows.go +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -65,3 +65,12 @@ func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watch } return accumulatedSignalingErrors } + +func isProcessLive(process *os.Process) (bool, error) { + signalErr := process.Signal(syscall.Signal(0)) + if signalErr != nil { + return false, nil //nolint:nilerr // if we receive an error it means that the process is not running, so the check completed without errors + } else { + return true, nil + } +} diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 6ec28832a0b..6e8369b4b8c 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -12,8 +12,8 @@ import ( "os/exec" "path/filepath" "runtime" + "strings" "sync" - "syscall" "testing" "time" @@ -1043,7 +1043,7 @@ func Test_takedownWatcher(t *testing.T) { }{ { name: "no contention for watcher applocker", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { + setup: func(_ *testing.T, _ *logger.Logger, _ string) (watcherPIDsFetcher, []testProcess) { // nothing to do here, always return and empty list of pids return func() ([]int, error) { return nil, nil @@ -1065,8 +1065,7 @@ func Test_takedownWatcher(t *testing.T) { { name: "contention with test binary listening to signals: test binary is terminated gracefully", setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { - - cmd, testChan := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, false) + cmd, testChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, false) require.NoError(t, err, "error starting testlocker binary") // wait for test binary to acquire lock @@ -1087,7 +1086,7 @@ func Test_takedownWatcher(t *testing.T) { require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") require.Eventually(t, func() bool { - running, checkErr := isProcessRunning(testlockerProcess.cmd) + running, checkErr := isProcessRunning(t, testlockerProcess.cmd) if checkErr != nil { t.Logf("error checking for testlocker process running: %s", checkErr.Error()) return false @@ -1112,7 +1111,7 @@ func Test_takedownWatcher(t *testing.T) { { name: "contention with test binary not listening to signals: test binary is not terminated", setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { - cmd, waitChan := createTestlockerCommand(t, log, applockerFileName, testExecutableAbsolutePath, workdir, true) + cmd, waitChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, true) require.NoError(t, err, "error starting testlocker binary") // wait for test binary to acquire lock @@ -1134,7 +1133,7 @@ func Test_takedownWatcher(t *testing.T) { // check that the process is still running for a time assert.Never(t, func() bool { - running, checkErr := isProcessRunning(testlockerProcess.cmd) + running, checkErr := isProcessRunning(t, testlockerProcess.cmd) if checkErr != nil { t.Logf("error checking for testlocker process running: %s", checkErr.Error()) return false @@ -1159,9 +1158,12 @@ func Test_takedownWatcher(t *testing.T) { t.Run(tc.name, func(t *testing.T) { workDir := t.TempDir() log, obsLogs := loggertest.New(t.Name()) + t.Cleanup(func() { + // however it ends, try to print out the logs of TakedownWatcher + loggertest.PrintObservedLogs(obsLogs.All(), t.Log) + }) pidFetcher, processInfos := tc.setup(t, log, workDir) - tc.wantErr(t, TakedownWatcher(t.Context(), log, pidFetcher)) - t.Logf("test logs: %v", obsLogs) + tc.wantErr(t, TakedownWatcher(t.Context(), log.Named("TakedownWatcher"), pidFetcher)) if tc.assertPostTakedown != nil { tc.assertPostTakedown(t, workDir, processInfos) } @@ -1195,30 +1197,30 @@ func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName return watcherCmd, watchTerminated } -func isProcessRunning(cmd *exec.Cmd) (bool, error) { +func isProcessRunning(t *testing.T, cmd *exec.Cmd) (bool, error) { if cmd.Process == nil { return false, nil } - + t.Logf("checking if pid %d is still running", cmd.Process.Pid) // search for the pid on the running processes process, err := os.FindProcess(cmd.Process.Pid) if err != nil { + t.Logf("error string: %q", err.Error()) + if runtime.GOOS == "windows" && strings.Contains(err.Error(), "The parameter is incorrect") { + // in windows, noone can hear you scream + // invalid parameter means that the process object cannot be found + t.Logf("pid %d is not running because on windows we got an incorrect parameter error", cmd.Process.Pid) + return false, nil + } + + t.Logf("error finding process: %T %v", err, err) return false, err } if process == nil { + t.Logf("pid %d is not running because os.GetProcess returned a nil process", cmd.Process.Pid) return false, nil } - // if process is not nil we need to split between unix and non-unix OSes - if runtime.GOOS == "windows" { - return true, nil - } else { - // on unix system we always get a process back, we need to do some further checks - signalErr := cmd.Process.Signal(syscall.Signal(0)) - if signalErr != nil { - return false, nil //nolint:nilerr // if we receive an error it means that the process is not running, so the check completed without errors - } else { - return true, nil - } - } + + return isProcessLive(cmd.Process) } diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index adbc5fce793..1633c1baa40 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -130,3 +130,25 @@ func signalPID(log *logger.Logger, pid int) error { return nil } + +func isProcessLive(process *os.Process) (bool, error) { + //exitCodeStillActive according to https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getexitcodeprocess + const exitCodeStillActive = 259 + // Open the process with PROCESS_QUERY_LIMITED_INFORMATION access + handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(process.Pid)) + if err != nil { + return false, fmt.Errorf("OpenProcess failed: %w", err) + } + + defer func(handle windows.Handle) { + _ = windows.CloseHandle(handle) + }(handle) + + var exitCode uint32 + err = windows.GetExitCodeProcess(handle, &exitCode) + if err != nil { + return false, fmt.Errorf("getting process exit code: %w", err) + } + + return exitCode == exitCodeStillActive, nil +} From e6cff5c5349d8d7702bea98b0d5f0fc87073675b Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 15 Aug 2025 12:00:40 +0200 Subject: [PATCH 30/38] make watcher rollback only if the agent has not been already rolled back --- internal/pkg/agent/cmd/watch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 77203c8bcb1..661f3a4982e 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -120,7 +120,7 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher _ = locker.Unlock() }() - if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK { + if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK && marker.Details.State != details.StateRollback { // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work log.Infof("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) From dac5bb214f7e5fcedbd0460ca4ac08f3a15d2277 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 15 Aug 2025 12:27:30 +0200 Subject: [PATCH 31/38] fix lint --- internal/pkg/agent/application/upgrade/watcher_windows.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index 1633c1baa40..4d87503449c 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -122,6 +122,7 @@ func signalPID(log *logger.Logger, pid int) error { // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us // by specifying processGroupID=0 + //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) killProcErr := windows.GenerateConsoleCtrlEvent(windows.CTRL_BREAK_EVENT, uint32(pid)) if killProcErr != nil { @@ -135,6 +136,7 @@ func isProcessLive(process *os.Process) (bool, error) { //exitCodeStillActive according to https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getexitcodeprocess const exitCodeStillActive = 259 // Open the process with PROCESS_QUERY_LIMITED_INFORMATION access + //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(process.Pid)) if err != nil { return false, fmt.Errorf("OpenProcess failed: %w", err) From 282248618c45ac0587ceb7b9a093647756f97c86 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 15 Aug 2025 12:01:30 +0200 Subject: [PATCH 32/38] Add a pre-restart hook to Rollback operation --- .mockery.yaml | 11 +- .../pkg/agent/application/upgrade/rollback.go | 42 +++++ .../pkg/agent/cmd/mock_agentwatcher_test.go | 34 ++-- .../cmd/mock_installationmodifier_test.go | 147 ++++++++++++++++++ internal/pkg/agent/cmd/watch.go | 36 +++-- internal/pkg/agent/cmd/watch_impl.go | 8 +- internal/pkg/agent/cmd/watch_test.go | 29 ++-- .../agent/cmd/installation_modifier_mock.go | 146 ----------------- 8 files changed, 254 insertions(+), 199 deletions(-) rename testing/mocks/internal_/pkg/agent/cmd/agent_watcher_mock.go => internal/pkg/agent/cmd/mock_agentwatcher_test.go (50%) create mode 100644 internal/pkg/agent/cmd/mock_installationmodifier_test.go delete mode 100644 testing/mocks/internal_/pkg/agent/cmd/installation_modifier_mock.go diff --git a/.mockery.yaml b/.mockery.yaml index 845fe9cf74f..2b09e2b4f57 100644 --- a/.mockery.yaml +++ b/.mockery.yaml @@ -30,13 +30,16 @@ packages: interfaces: Agent: github.com/elastic/elastic-agent/internal/pkg/agent/cmd: + config: + inpackage: True + with-expecter: True + dir: "{{.InterfaceDirRelative}}" + mockname: "{{.Mock}}{{.InterfaceName | firstUpper}}" + outpkg: "{{.PackageName}}" + filename: "{{.Mock | lower}}_{{.InterfaceName | lower}}_test.go" interfaces: agentWatcher: - config: - mockname: "AgentWatcher" installationModifier: - config: - mockname: "InstallationModifier" github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade: config: inpackage: True diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 7cb1ffeb030..8f69f8fdcc5 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -35,6 +35,36 @@ const ( // Rollback rollbacks to previous version which was functioning before upgrade. func Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string) error { + return RollbackWithOpts(ctx, log, c, topDirPath, prevVersionedHome, prevHash) +} + +var FatalRollbackError = errors.New("Fatal rollback error") + +type RollbackHook func(ctx context.Context, log *logger.Logger, topDirPath string) error +type rollbackSettings struct { + preRestartHook RollbackHook +} + +func newRollbackSettings(opts ...RollbackOpt) *rollbackSettings { + rs := new(rollbackSettings) + for _, opt := range opts { + opt(rs) + } + return rs +} + +type RollbackOpt func(*rollbackSettings) + +func WithPreRestartHook(h RollbackHook) RollbackOpt { + return func(s *rollbackSettings) { + s.preRestartHook = h + } +} + +func RollbackWithOpts(ctx context.Context, log *logger.Logger, c client.Client, topDirPath string, prevVersionedHome string, prevHash string, opts ...RollbackOpt) error { + + settings := newRollbackSettings(opts...) + symlinkPath := filepath.Join(topDirPath, agentName) var symlinkTarget string @@ -56,6 +86,18 @@ func Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPa return err } + // Hook + if settings.preRestartHook != nil { + hookErr := settings.preRestartHook(ctx, log, topDirPath) + if hookErr != nil { + if errors.Is(hookErr, FatalRollbackError) { + return fmt.Errorf("pre-restart hook failed: %w", hookErr) + } else { + log.Warnf("pre-restart hook failed: %v", hookErr) + } + } + } + // Restart log.Info("Restarting the agent after rollback") if err := restartAgent(ctx, log, c); err != nil { diff --git a/testing/mocks/internal_/pkg/agent/cmd/agent_watcher_mock.go b/internal/pkg/agent/cmd/mock_agentwatcher_test.go similarity index 50% rename from testing/mocks/internal_/pkg/agent/cmd/agent_watcher_mock.go rename to internal/pkg/agent/cmd/mock_agentwatcher_test.go index 4541e5d9aa0..e3685a86fb5 100644 --- a/testing/mocks/internal_/pkg/agent/cmd/agent_watcher_mock.go +++ b/internal/pkg/agent/cmd/mock_agentwatcher_test.go @@ -16,21 +16,21 @@ import ( time "time" ) -// AgentWatcher is an autogenerated mock type for the agentWatcher type -type AgentWatcher struct { +// mockAgentWatcher is an autogenerated mock type for the agentWatcher type +type mockAgentWatcher struct { mock.Mock } -type AgentWatcher_Expecter struct { +type mockAgentWatcher_Expecter struct { mock *mock.Mock } -func (_m *AgentWatcher) EXPECT() *AgentWatcher_Expecter { - return &AgentWatcher_Expecter{mock: &_m.Mock} +func (_m *mockAgentWatcher) EXPECT() *mockAgentWatcher_Expecter { + return &mockAgentWatcher_Expecter{mock: &_m.Mock} } // Watch provides a mock function with given fields: ctx, tilGrace, errorCheckInterval, log -func (_m *AgentWatcher) Watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logp.Logger) error { +func (_m *mockAgentWatcher) Watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logp.Logger) error { ret := _m.Called(ctx, tilGrace, errorCheckInterval, log) if len(ret) == 0 { @@ -47,8 +47,8 @@ func (_m *AgentWatcher) Watch(ctx context.Context, tilGrace time.Duration, error return r0 } -// AgentWatcher_Watch_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Watch' -type AgentWatcher_Watch_Call struct { +// mockAgentWatcher_Watch_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Watch' +type mockAgentWatcher_Watch_Call struct { *mock.Call } @@ -57,34 +57,34 @@ type AgentWatcher_Watch_Call struct { // - tilGrace time.Duration // - errorCheckInterval time.Duration // - log *logp.Logger -func (_e *AgentWatcher_Expecter) Watch(ctx interface{}, tilGrace interface{}, errorCheckInterval interface{}, log interface{}) *AgentWatcher_Watch_Call { - return &AgentWatcher_Watch_Call{Call: _e.mock.On("Watch", ctx, tilGrace, errorCheckInterval, log)} +func (_e *mockAgentWatcher_Expecter) Watch(ctx interface{}, tilGrace interface{}, errorCheckInterval interface{}, log interface{}) *mockAgentWatcher_Watch_Call { + return &mockAgentWatcher_Watch_Call{Call: _e.mock.On("Watch", ctx, tilGrace, errorCheckInterval, log)} } -func (_c *AgentWatcher_Watch_Call) Run(run func(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logp.Logger)) *AgentWatcher_Watch_Call { +func (_c *mockAgentWatcher_Watch_Call) Run(run func(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logp.Logger)) *mockAgentWatcher_Watch_Call { _c.Call.Run(func(args mock.Arguments) { run(args[0].(context.Context), args[1].(time.Duration), args[2].(time.Duration), args[3].(*logp.Logger)) }) return _c } -func (_c *AgentWatcher_Watch_Call) Return(_a0 error) *AgentWatcher_Watch_Call { +func (_c *mockAgentWatcher_Watch_Call) Return(_a0 error) *mockAgentWatcher_Watch_Call { _c.Call.Return(_a0) return _c } -func (_c *AgentWatcher_Watch_Call) RunAndReturn(run func(context.Context, time.Duration, time.Duration, *logp.Logger) error) *AgentWatcher_Watch_Call { +func (_c *mockAgentWatcher_Watch_Call) RunAndReturn(run func(context.Context, time.Duration, time.Duration, *logp.Logger) error) *mockAgentWatcher_Watch_Call { _c.Call.Return(run) return _c } -// NewAgentWatcher creates a new instance of AgentWatcher. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// newMockAgentWatcher creates a new instance of mockAgentWatcher. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. -func NewAgentWatcher(t interface { +func newMockAgentWatcher(t interface { mock.TestingT Cleanup(func()) -}) *AgentWatcher { - mock := &AgentWatcher{} +}) *mockAgentWatcher { + mock := &mockAgentWatcher{} mock.Mock.Test(t) t.Cleanup(func() { mock.AssertExpectations(t) }) diff --git a/internal/pkg/agent/cmd/mock_installationmodifier_test.go b/internal/pkg/agent/cmd/mock_installationmodifier_test.go new file mode 100644 index 00000000000..7581a521000 --- /dev/null +++ b/internal/pkg/agent/cmd/mock_installationmodifier_test.go @@ -0,0 +1,147 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +// Code generated by mockery v2.53.4. DO NOT EDIT. + +package cmd + +import ( + context "context" + + client "github.com/elastic/elastic-agent/pkg/control/v2/client" + + logp "github.com/elastic/elastic-agent-libs/logp" + + mock "github.com/stretchr/testify/mock" +) + +// mockInstallationModifier is an autogenerated mock type for the installationModifier type +type mockInstallationModifier struct { + mock.Mock +} + +type mockInstallationModifier_Expecter struct { + mock *mock.Mock +} + +func (_m *mockInstallationModifier) EXPECT() *mockInstallationModifier_Expecter { + return &mockInstallationModifier_Expecter{mock: &_m.Mock} +} + +// Cleanup provides a mock function with given fields: log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs +func (_m *mockInstallationModifier) Cleanup(log *logp.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool) error { + ret := _m.Called(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) + + if len(ret) == 0 { + panic("no return value specified for Cleanup") + } + + var r0 error + if rf, ok := ret.Get(0).(func(*logp.Logger, string, string, string, bool, bool) error); ok { + r0 = rf(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// mockInstallationModifier_Cleanup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Cleanup' +type mockInstallationModifier_Cleanup_Call struct { + *mock.Call +} + +// Cleanup is a helper method to define mock.On call +// - log *logp.Logger +// - topDirPath string +// - currentVersionedHome string +// - currentHash string +// - removeMarker bool +// - keepLogs bool +func (_e *mockInstallationModifier_Expecter) Cleanup(log interface{}, topDirPath interface{}, currentVersionedHome interface{}, currentHash interface{}, removeMarker interface{}, keepLogs interface{}) *mockInstallationModifier_Cleanup_Call { + return &mockInstallationModifier_Cleanup_Call{Call: _e.mock.On("Cleanup", log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs)} +} + +func (_c *mockInstallationModifier_Cleanup_Call) Run(run func(log *logp.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool)) *mockInstallationModifier_Cleanup_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*logp.Logger), args[1].(string), args[2].(string), args[3].(string), args[4].(bool), args[5].(bool)) + }) + return _c +} + +func (_c *mockInstallationModifier_Cleanup_Call) Return(_a0 error) *mockInstallationModifier_Cleanup_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *mockInstallationModifier_Cleanup_Call) RunAndReturn(run func(*logp.Logger, string, string, string, bool, bool) error) *mockInstallationModifier_Cleanup_Call { + _c.Call.Return(run) + return _c +} + +// Rollback provides a mock function with given fields: ctx, log, c, topDirPath, prevVersionedHome, prevHash, preRestart +func (_m *mockInstallationModifier) Rollback(ctx context.Context, log *logp.Logger, c client.Client, topDirPath string, prevVersionedHome string, prevHash string, preRestart rollbackHook) error { + ret := _m.Called(ctx, log, c, topDirPath, prevVersionedHome, prevHash, preRestart) + + if len(ret) == 0 { + panic("no return value specified for Rollback") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger, client.Client, string, string, string, rollbackHook) error); ok { + r0 = rf(ctx, log, c, topDirPath, prevVersionedHome, prevHash, preRestart) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// mockInstallationModifier_Rollback_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Rollback' +type mockInstallationModifier_Rollback_Call struct { + *mock.Call +} + +// Rollback is a helper method to define mock.On call +// - ctx context.Context +// - log *logp.Logger +// - c client.Client +// - topDirPath string +// - prevVersionedHome string +// - prevHash string +// - preRestart rollbackHook +func (_e *mockInstallationModifier_Expecter) Rollback(ctx interface{}, log interface{}, c interface{}, topDirPath interface{}, prevVersionedHome interface{}, prevHash interface{}, preRestart interface{}) *mockInstallationModifier_Rollback_Call { + return &mockInstallationModifier_Rollback_Call{Call: _e.mock.On("Rollback", ctx, log, c, topDirPath, prevVersionedHome, prevHash, preRestart)} +} + +func (_c *mockInstallationModifier_Rollback_Call) Run(run func(ctx context.Context, log *logp.Logger, c client.Client, topDirPath string, prevVersionedHome string, prevHash string, preRestart rollbackHook)) *mockInstallationModifier_Rollback_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(*logp.Logger), args[2].(client.Client), args[3].(string), args[4].(string), args[5].(string), args[6].(rollbackHook)) + }) + return _c +} + +func (_c *mockInstallationModifier_Rollback_Call) Return(_a0 error) *mockInstallationModifier_Rollback_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *mockInstallationModifier_Rollback_Call) RunAndReturn(run func(context.Context, *logp.Logger, client.Client, string, string, string, rollbackHook) error) *mockInstallationModifier_Rollback_Call { + _c.Call.Return(run) + return _c +} + +// newMockInstallationModifier creates a new instance of mockInstallationModifier. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func newMockInstallationModifier(t interface { + mock.TestingT + Cleanup(func()) +}) *mockInstallationModifier { + mock := &mockInstallationModifier{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 661f3a4982e..4a08f4cd8a5 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -86,9 +86,11 @@ type agentWatcher interface { Watch(ctx context.Context, tilGrace, errorCheckInterval time.Duration, log *logp.Logger) error } +type rollbackHook func(ctx context.Context, log *logger.Logger, topDirPath string) error + type installationModifier interface { Cleanup(log *logger.Logger, topDirPath, currentVersionedHome, currentHash string, removeMarker, keepLogs bool) error - Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string) error + Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string, preRestart rollbackHook) error } func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcherConfig, watcher agentWatcher, installModifier installationModifier) error { @@ -120,25 +122,29 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher _ = locker.Unlock() }() - if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK && marker.Details.State != details.StateRollback { + if marker.DesiredOutcome == upgrade.OUTCOME_ROLLBACK && marker.Details != nil && marker.Details.State != details.StateRollback { // TODO: there should be some sanity check in rollback functions like the installation we are going back to should exist and work log.Infof("rolling back because of DesiredOutcome=%s", marker.DesiredOutcome.String()) - err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) - if err != nil { - return fmt.Errorf("rolling back: %w", err) - } - if marker.Details == nil { - actionID := "" - if marker.Action != nil { - actionID = marker.Action.ActionID + updateMarkerAndDetails := func(_ context.Context, _ *logger.Logger, _ string) error { + if marker.Details == nil { + actionID := "" + if marker.Action != nil { + actionID = marker.Action.ActionID + } + marker.Details = details.NewDetails(marker.Version, details.StateRollback, actionID) } - marker.Details = details.NewDetails(marker.Version, details.StateRollback, actionID) + marker.Details.SetStateWithReason(details.StateRollback, details.ReasonManualRollback) + err = upgrade.SaveMarker(dataDir, marker, true) + if err != nil { + return fmt.Errorf("saving marker after rolling back: %w", err) + } + return nil } - marker.Details.SetStateWithReason(details.StateRollback, details.ReasonManualRollback) - err = upgrade.SaveMarker(dataDir, marker, true) + + err = installModifier.Rollback(context.Background(), log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash, updateMarkerAndDetails) if err != nil { - return fmt.Errorf("saving marker after rolling back: %w", err) + return fmt.Errorf("rolling back: %w", err) } return nil @@ -180,7 +186,7 @@ func watchCmd(log *logp.Logger, topDir string, cfg *configuration.UpgradeWatcher log.Error("Error detected, proceeding to rollback: %v", err) upgradeDetails.SetStateWithReason(details.StateRollback, details.ReasonWatchFailed) - err = installModifier.Rollback(ctx, log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash) + err = installModifier.Rollback(ctx, log, client.New(), paths.Top(), marker.PrevVersionedHome, marker.PrevHash, nil) if err != nil { log.Error("rollback failed", err) upgradeDetails.Fail(err) diff --git a/internal/pkg/agent/cmd/watch_impl.go b/internal/pkg/agent/cmd/watch_impl.go index baf2270a73e..8fac2ac315e 100644 --- a/internal/pkg/agent/cmd/watch_impl.go +++ b/internal/pkg/agent/cmd/watch_impl.go @@ -29,8 +29,12 @@ func (a upgradeInstallationModifier) Cleanup(log *logger.Logger, topDirPath, cur return upgrade.Cleanup(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) } -func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string) error { - return upgrade.Rollback(ctx, log, c, topDirPath, prevVersionedHome, prevHash) +func (a upgradeInstallationModifier) Rollback(ctx context.Context, log *logger.Logger, c client.Client, topDirPath, prevVersionedHome, prevHash string, preRestart rollbackHook) error { + var opts []upgrade.RollbackOpt + if preRestart != nil { + opts = append(opts, upgrade.WithPreRestartHook(upgrade.RollbackHook(preRestart))) + } + return upgrade.RollbackWithOpts(ctx, log, c, topDirPath, prevVersionedHome, prevHash, opts...) } func watch(ctx context.Context, tilGrace time.Duration, errorCheckInterval time.Duration, log *logger.Logger) error { diff --git a/internal/pkg/agent/cmd/watch_test.go b/internal/pkg/agent/cmd/watch_test.go index afac6794129..3727a7a211b 100644 --- a/internal/pkg/agent/cmd/watch_test.go +++ b/internal/pkg/agent/cmd/watch_test.go @@ -26,7 +26,6 @@ import ( "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" - cmdmocks "github.com/elastic/elastic-agent/testing/mocks/internal_/pkg/agent/cmd" ) func TestInitUpgradeDetails(t *testing.T) { @@ -91,13 +90,13 @@ func Test_watchCmd(t *testing.T) { } tests := []struct { name string - setupUpgradeMarker func(t *testing.T, tmpDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) + setupUpgradeMarker func(t *testing.T, tmpDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) args args wantErr assert.ErrorAssertionFunc }{ { name: "no upgrade marker, no party", - setupUpgradeMarker: func(t *testing.T, topDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, topDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(topDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -109,7 +108,7 @@ func Test_watchCmd(t *testing.T) { }, { name: "happy path: no error watching, cleanup prev install", - setupUpgradeMarker: func(t *testing.T, topDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, topDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(topDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -150,7 +149,7 @@ func Test_watchCmd(t *testing.T) { }, { name: "unhappy path: error watching, rollback to previous install", - setupUpgradeMarker: func(t *testing.T, topDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, topDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(topDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -177,7 +176,7 @@ func Test_watchCmd(t *testing.T) { Watch(mock.Anything, mock.Anything, mock.Anything, mock.Anything). Return(errors.New("some watch error due to agent misbehaving")) installModifier.EXPECT(). - Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver"). + Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver", mock.MatchedBy(func(hook rollbackHook) bool { return hook == nil })). Return(nil) }, args: args{ @@ -187,7 +186,7 @@ func Test_watchCmd(t *testing.T) { }, { name: "upgrade rolled back: no watching, cleanup must be called", - setupUpgradeMarker: func(t *testing.T, topDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, topDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(topDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -228,7 +227,7 @@ func Test_watchCmd(t *testing.T) { }, { name: "after grace period: no watching, cleanup must be called", - setupUpgradeMarker: func(t *testing.T, topDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, topDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(topDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -270,7 +269,7 @@ func Test_watchCmd(t *testing.T) { }, { name: "Desired outcome is rollback, rollback immediately", - setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(tmpDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -301,7 +300,7 @@ func Test_watchCmd(t *testing.T) { require.NoError(t, err) installModifier.EXPECT(). - Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver"). + Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver", mock.Anything). Return(nil) }, args: args{ @@ -310,8 +309,8 @@ func Test_watchCmd(t *testing.T) { wantErr: assert.NoError, }, { - name: "Desired outcome is rollback no upgrade details, rollback immediately", - setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *cmdmocks.AgentWatcher, installModifier *cmdmocks.InstallationModifier) { + name: "Desired outcome is rollback no upgrade details, no rollback and simple cleanup", + setupUpgradeMarker: func(t *testing.T, tmpDir string, watcher *mockAgentWatcher, installModifier *mockInstallationModifier) { dataDirPath := paths.DataFrom(tmpDir) err := os.MkdirAll(dataDirPath, 0755) require.NoError(t, err) @@ -341,7 +340,7 @@ func Test_watchCmd(t *testing.T) { require.NoError(t, err) installModifier.EXPECT(). - Rollback(mock.Anything, mock.Anything, mock.Anything, paths.Top(), "elastic-agent-prvver", "prvver"). + Cleanup(mock.Anything, paths.Top(), paths.VersionedHome(tmpDir), release.ShortCommit(), true, false). Return(nil) }, args: args{ @@ -354,8 +353,8 @@ func Test_watchCmd(t *testing.T) { t.Run(tt.name, func(t *testing.T) { log, obs := loggertest.New(t.Name()) tmpDir := t.TempDir() - mockWatcher := cmdmocks.NewAgentWatcher(t) - mockInstallModifier := cmdmocks.NewInstallationModifier(t) + mockWatcher := newMockAgentWatcher(t) + mockInstallModifier := newMockInstallationModifier(t) tt.setupUpgradeMarker(t, tmpDir, mockWatcher, mockInstallModifier) tt.wantErr(t, watchCmd(log, tmpDir, tt.args.cfg, mockWatcher, mockInstallModifier), fmt.Sprintf("watchCmd(%v, ...)", tt.args.cfg)) t.Log("watchCmd logs:\n") diff --git a/testing/mocks/internal_/pkg/agent/cmd/installation_modifier_mock.go b/testing/mocks/internal_/pkg/agent/cmd/installation_modifier_mock.go deleted file mode 100644 index 14f73d912a8..00000000000 --- a/testing/mocks/internal_/pkg/agent/cmd/installation_modifier_mock.go +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License 2.0; -// you may not use this file except in compliance with the Elastic License 2.0. - -// Code generated by mockery v2.53.4. DO NOT EDIT. - -package cmd - -import ( - client "github.com/elastic/elastic-agent/pkg/control/v2/client" - - context "context" - - logp "github.com/elastic/elastic-agent-libs/logp" - - mock "github.com/stretchr/testify/mock" -) - -// InstallationModifier is an autogenerated mock type for the installationModifier type -type InstallationModifier struct { - mock.Mock -} - -type InstallationModifier_Expecter struct { - mock *mock.Mock -} - -func (_m *InstallationModifier) EXPECT() *InstallationModifier_Expecter { - return &InstallationModifier_Expecter{mock: &_m.Mock} -} - -// Cleanup provides a mock function with given fields: log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs -func (_m *InstallationModifier) Cleanup(log *logp.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool) error { - ret := _m.Called(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) - - if len(ret) == 0 { - panic("no return value specified for Cleanup") - } - - var r0 error - if rf, ok := ret.Get(0).(func(*logp.Logger, string, string, string, bool, bool) error); ok { - r0 = rf(log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// InstallationModifier_Cleanup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Cleanup' -type InstallationModifier_Cleanup_Call struct { - *mock.Call -} - -// Cleanup is a helper method to define mock.On call -// - log *logp.Logger -// - topDirPath string -// - currentVersionedHome string -// - currentHash string -// - removeMarker bool -// - keepLogs bool -func (_e *InstallationModifier_Expecter) Cleanup(log interface{}, topDirPath interface{}, currentVersionedHome interface{}, currentHash interface{}, removeMarker interface{}, keepLogs interface{}) *InstallationModifier_Cleanup_Call { - return &InstallationModifier_Cleanup_Call{Call: _e.mock.On("Cleanup", log, topDirPath, currentVersionedHome, currentHash, removeMarker, keepLogs)} -} - -func (_c *InstallationModifier_Cleanup_Call) Run(run func(log *logp.Logger, topDirPath string, currentVersionedHome string, currentHash string, removeMarker bool, keepLogs bool)) *InstallationModifier_Cleanup_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*logp.Logger), args[1].(string), args[2].(string), args[3].(string), args[4].(bool), args[5].(bool)) - }) - return _c -} - -func (_c *InstallationModifier_Cleanup_Call) Return(_a0 error) *InstallationModifier_Cleanup_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *InstallationModifier_Cleanup_Call) RunAndReturn(run func(*logp.Logger, string, string, string, bool, bool) error) *InstallationModifier_Cleanup_Call { - _c.Call.Return(run) - return _c -} - -// Rollback provides a mock function with given fields: ctx, log, c, topDirPath, prevVersionedHome, prevHash -func (_m *InstallationModifier) Rollback(ctx context.Context, log *logp.Logger, c client.Client, topDirPath string, prevVersionedHome string, prevHash string) error { - ret := _m.Called(ctx, log, c, topDirPath, prevVersionedHome, prevHash) - - if len(ret) == 0 { - panic("no return value specified for Rollback") - } - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *logp.Logger, client.Client, string, string, string) error); ok { - r0 = rf(ctx, log, c, topDirPath, prevVersionedHome, prevHash) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// InstallationModifier_Rollback_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Rollback' -type InstallationModifier_Rollback_Call struct { - *mock.Call -} - -// Rollback is a helper method to define mock.On call -// - ctx context.Context -// - log *logp.Logger -// - c client.Client -// - topDirPath string -// - prevVersionedHome string -// - prevHash string -func (_e *InstallationModifier_Expecter) Rollback(ctx interface{}, log interface{}, c interface{}, topDirPath interface{}, prevVersionedHome interface{}, prevHash interface{}) *InstallationModifier_Rollback_Call { - return &InstallationModifier_Rollback_Call{Call: _e.mock.On("Rollback", ctx, log, c, topDirPath, prevVersionedHome, prevHash)} -} - -func (_c *InstallationModifier_Rollback_Call) Run(run func(ctx context.Context, log *logp.Logger, c client.Client, topDirPath string, prevVersionedHome string, prevHash string)) *InstallationModifier_Rollback_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(*logp.Logger), args[2].(client.Client), args[3].(string), args[4].(string), args[5].(string)) - }) - return _c -} - -func (_c *InstallationModifier_Rollback_Call) Return(_a0 error) *InstallationModifier_Rollback_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *InstallationModifier_Rollback_Call) RunAndReturn(run func(context.Context, *logp.Logger, client.Client, string, string, string) error) *InstallationModifier_Rollback_Call { - _c.Call.Return(run) - return _c -} - -// NewInstallationModifier creates a new instance of InstallationModifier. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewInstallationModifier(t interface { - mock.TestingT - Cleanup(func()) -}) *InstallationModifier { - mock := &InstallationModifier{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} From 1f936ce040b78a1c5ca70253403691dcc34f2d75 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 27 Aug 2025 14:59:47 +0200 Subject: [PATCH 33/38] Update upgrade details metadata Equals() with new fields --- internal/pkg/agent/application/upgrade/details/details.go | 5 ++++- .../pkg/agent/application/upgrade/marker_watcher_test.go | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/pkg/agent/application/upgrade/details/details.go b/internal/pkg/agent/application/upgrade/details/details.go index 8e246c26f3c..66c8f69d047 100644 --- a/internal/pkg/agent/application/upgrade/details/details.go +++ b/internal/pkg/agent/application/upgrade/details/details.go @@ -6,6 +6,7 @@ package details import ( "math" + "slices" "sync" "time" @@ -244,7 +245,9 @@ func (m Metadata) Equals(otherM Metadata) bool { m.DownloadPercent == otherM.DownloadPercent && m.DownloadRate == otherM.DownloadRate && equalTimePointers(m.RetryUntil, otherM.RetryUntil) && - m.RetryErrorMsg == otherM.RetryErrorMsg + m.RetryErrorMsg == otherM.RetryErrorMsg && + m.Reason == otherM.Reason && + slices.Equal(m.RollbacksAvailable, otherM.RollbacksAvailable) } func equalTimePointers(t, otherT *time.Time) bool { diff --git a/internal/pkg/agent/application/upgrade/marker_watcher_test.go b/internal/pkg/agent/application/upgrade/marker_watcher_test.go index 45db5bc4a6b..ec25d198035 100644 --- a/internal/pkg/agent/application/upgrade/marker_watcher_test.go +++ b/internal/pkg/agent/application/upgrade/marker_watcher_test.go @@ -126,6 +126,9 @@ details: expectedDetails: &details.Details{ TargetVersion: "8.9.2", State: details.StateRollback, + Metadata: details.Metadata{ + Reason: details.ReasonWatchFailed, + }, }, }, "same_version_with_details_some_state": { From d04351aab4aef7b66a73ab490c6239ce6f76b062 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Thu, 28 Aug 2025 15:31:54 +0200 Subject: [PATCH 34/38] Remove parent death signal for watcher on linux --- .../application/upgrade/rollback_linux.go | 2 +- internal/pkg/agent/cmd/watch.go | 14 ++++++----- internal/pkg/agent/cmd/watch_signals_linux.go | 23 +++++++++++++++++++ .../pkg/agent/cmd/watch_signals_notlinux.go | 13 +++++++++++ 4 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 internal/pkg/agent/cmd/watch_signals_linux.go create mode 100644 internal/pkg/agent/cmd/watch_signals_notlinux.go diff --git a/internal/pkg/agent/application/upgrade/rollback_linux.go b/internal/pkg/agent/application/upgrade/rollback_linux.go index 602de94ceb7..fb587dab842 100644 --- a/internal/pkg/agent/application/upgrade/rollback_linux.go +++ b/internal/pkg/agent/application/upgrade/rollback_linux.go @@ -32,7 +32,7 @@ func InvokeCmdWithArgs(executable string, args ...string) *exec.Cmd { var sysproc = &syscall.SysProcAttr{ Credential: cred, Setsid: true, - // propagate sigint instead of sigkill so we can ignore it + // disable parent death signal for the watcher process Pdeathsig: syscall.Signal(0x0), } cmd.SysProcAttr = sysproc diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 4a08f4cd8a5..02583b1c724 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -8,7 +8,6 @@ import ( "context" "fmt" "os" - "os/signal" "runtime" "time" @@ -35,6 +34,8 @@ import ( const ( watcherName = "elastic-agent-watcher" watcherLockFile = "watcher.lock" + + errorSettingParentSignalsExitCode = 6 ) var ErrWatchCancelled = errors.New("watch cancelled") @@ -45,11 +46,6 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command Short: "Watch the Elastic Agent for failures and initiate rollback", Long: `This command watches Elastic Agent for failures and initiates rollback if necessary.`, Run: func(c *cobra.Command, _ []string) { - - // Initially ignore all signals - ignoredSignalsChannel := make(chan os.Signal, 1) - signal.Notify(ignoredSignalsChannel) - cfg := getConfig(streams) log, err := configuredLogger(cfg, watcherName) if err != nil { @@ -60,6 +56,12 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command // Make sure to flush any buffered logs before we're done. defer log.Sync() //nolint:errcheck // flushing buffered logs is best effort. + err = setupParentProcessSignals() + if err != nil { + fmt.Fprintf(streams.Err, "Error setting parent process signals: %v\n", err) + os.Exit(errorSettingParentSignalsExitCode) + } + takedown, _ := c.Flags().GetBool("takedown") if takedown { err = upgrade.TakedownWatcher(context.Background(), log, utils.GetWatcherPIDs) diff --git a/internal/pkg/agent/cmd/watch_signals_linux.go b/internal/pkg/agent/cmd/watch_signals_linux.go new file mode 100644 index 00000000000..97d5a329d61 --- /dev/null +++ b/internal/pkg/agent/cmd/watch_signals_linux.go @@ -0,0 +1,23 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build linux + +package cmd + +import ( + "fmt" + + "golang.org/x/sys/unix" +) + +func setupParentProcessSignals() error { + // Perform prctl(PR_SET_PDEATHSIG, 0) to clear the parent death signal + err := unix.Prctl(unix.PR_SET_PDEATHSIG, 0, 0, 0, 0) + if err != nil { + return fmt.Errorf("clearing parent death signal: %w", err) + } + + return nil +} diff --git a/internal/pkg/agent/cmd/watch_signals_notlinux.go b/internal/pkg/agent/cmd/watch_signals_notlinux.go new file mode 100644 index 00000000000..ab565f95d39 --- /dev/null +++ b/internal/pkg/agent/cmd/watch_signals_notlinux.go @@ -0,0 +1,13 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build !linux + +package cmd + +func setupParentProcessSignals() error { + // nothing to do here + + return nil +} From acb0c65fbb6b98d25151df8560b4dd0c05495468 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Thu, 28 Aug 2025 18:18:48 +0200 Subject: [PATCH 35/38] Refactor: move TakedownWatcher function to watch subcommand --- .../application/upgrade/rollback_windows.go | 22 +- .../pkg/agent/application/upgrade/watcher.go | 4 - .../application/upgrade/watcher_notwindows.go | 49 ---- .../agent/application/upgrade/watcher_test.go | 234 ----------------- .../application/upgrade/watcher_windows.go | 114 --------- internal/pkg/agent/cmd/watch.go | 6 +- internal/pkg/agent/cmd/watch_notwindows.go | 62 +++++ internal/pkg/agent/cmd/watch_test.go | 235 ++++++++++++++++++ internal/pkg/agent/cmd/watch_windows.go | 136 ++++++++++ 9 files changed, 459 insertions(+), 403 deletions(-) create mode 100644 internal/pkg/agent/cmd/watch_notwindows.go create mode 100644 internal/pkg/agent/cmd/watch_windows.go diff --git a/internal/pkg/agent/application/upgrade/rollback_windows.go b/internal/pkg/agent/application/upgrade/rollback_windows.go index adc816766fa..3ddfbac4690 100644 --- a/internal/pkg/agent/application/upgrade/rollback_windows.go +++ b/internal/pkg/agent/application/upgrade/rollback_windows.go @@ -13,6 +13,7 @@ import ( "os/exec" "syscall" "time" + "unsafe" "golang.org/x/sys/windows" @@ -68,7 +69,7 @@ func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, opts ...WatcherIn if err := cmd.Start(); err != nil { return nil, fmt.Errorf("failed to start Upgrade Watcher: %w", err) } - list, consoleErr := GetConsoleProcessList() + list, consoleErr := getConsoleProcessList() if consoleErr != nil { log.Errorf("failed to get console process list: %v", consoleErr) } else { @@ -93,3 +94,22 @@ func StartWatcherCmd(log *logger.Logger, createCmd cmdFactory, opts ...WatcherIn return cmd, nil } + +// getConsoleProcessList retrieves the list of process IDs attached to the current console +func getConsoleProcessList() ([]uint32, error) { + // Allocate a buffer for PIDs + const maxProcs = 64 + pids := make([]uint32, maxProcs) + + r1, _, err := procGetConsoleProcessList.Call( + uintptr(unsafe.Pointer(&pids[0])), + uintptr(maxProcs), + ) + + count := uint32(r1) + if count == 0 { + return nil, err + } + + return pids[:count], nil +} diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 26f787fc3e4..b90cab647f9 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -286,10 +286,6 @@ func (a AgentWatcherHelper) TakeOverWatcher(ctx context.Context, log *logger.Log return takeOverWatcher(ctx, log, new(commandWatcherGrappler), topDir, 30*time.Second, 500*time.Millisecond, 100*time.Millisecond) } -// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. -// This will allow for easier testing of takeOverWatcher using fake binaries -type watcherPIDsFetcher func() ([]int, error) - // watcherGrappler is an abstraction over the way elastic-agent main process should take down (stop, gracefully if possible) a watcher process type watcherGrappler interface { TakeDownWatcher(ctx context.Context, log *logger.Logger) error diff --git a/internal/pkg/agent/application/upgrade/watcher_notwindows.go b/internal/pkg/agent/application/upgrade/watcher_notwindows.go index 4337855eef3..8c5e8726108 100644 --- a/internal/pkg/agent/application/upgrade/watcher_notwindows.go +++ b/internal/pkg/agent/application/upgrade/watcher_notwindows.go @@ -8,14 +8,10 @@ package upgrade import ( "context" - "errors" - "fmt" "os" "os/exec" - "syscall" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" - "github.com/elastic/elastic-agent/pkg/core/logger" ) func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { @@ -29,48 +25,3 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { ) return cmd } - -func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { - pids, err := pidFetchFunc() - if err != nil { - return fmt.Errorf("error listing watcher processes: %w", err) - } - - ownPID := os.Getpid() - var accumulatedSignalingErrors error - for _, pid := range pids { - - if ctx.Err() != nil { - return ctx.Err() - } - - if pid == ownPID { - continue - } - - log.Debugf("attempting to terminate watcher process with PID: %d", pid) - - process, err := os.FindProcess(pid) - if err != nil { - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error finding watcher process with PID: %d: %w", pid, err)) - continue - } - - err = process.Signal(syscall.SIGTERM) - if err != nil { - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error killing watcher process with PID: %d: %w", pid, err)) - continue - } - - } - return accumulatedSignalingErrors -} - -func isProcessLive(process *os.Process) (bool, error) { - signalErr := process.Signal(syscall.Signal(0)) - if signalErr != nil { - return false, nil //nolint:nilerr // if we receive an error it means that the process is not running, so the check completed without errors - } else { - return true, nil - } -} diff --git a/internal/pkg/agent/application/upgrade/watcher_test.go b/internal/pkg/agent/application/upgrade/watcher_test.go index 6e8369b4b8c..12a7c744c61 100644 --- a/internal/pkg/agent/application/upgrade/watcher_test.go +++ b/internal/pkg/agent/application/upgrade/watcher_test.go @@ -9,10 +9,7 @@ import ( "fmt" "net" "os" - "os/exec" "path/filepath" - "runtime" - "strings" "sync" "testing" "time" @@ -29,7 +26,6 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/control/v2/cproto" - "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" agtversion "github.com/elastic/elastic-agent/pkg/version" ) @@ -994,233 +990,3 @@ func TestTakeOverWatcher(t *testing.T) { } } - -func Test_takedownWatcher(t *testing.T) { - - const applockerFileName = "mocklocker.lock" - - testExecutablePath := filepath.Join("..", "filelock", "testlocker", "testlocker") - if runtime.GOOS == "windows" { - testExecutablePath += ".exe" - } - testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) - require.NoError(t, err, "error calculating absolute test executable part") - - require.FileExists(t, testExecutableAbsolutePath, - "testlocker binary not found.\n"+ - "Check that:\n"+ - "- test binaries have been built with mage build:testbinaries\n"+ - "- the path of the executable is correct") - - returnCmdPIDsFetcher := func(cmds ...*exec.Cmd) watcherPIDsFetcher { - return func() ([]int, error) { - pids := make([]int, 0, len(cmds)) - for _, c := range cmds { - if c.Process != nil { - pids = append(pids, c.Process.Pid) - } - } - - return pids, nil - } - } - - // create a struct with a *exec.Cmd and a channel that will be closed when Wait() returns for the exec.Cmd - // this should keep the data race detector happy. - type testProcess struct { - cmd *exec.Cmd - waitChan chan struct{} - } - - type setupFunc func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) - type assertFunc func(t *testing.T, workdir string, cmds []testProcess) - - tests := []struct { - name string - setup setupFunc - wantErr assert.ErrorAssertionFunc - assertPostTakedown assertFunc - }{ - { - name: "no contention for watcher applocker", - setup: func(_ *testing.T, _ *logger.Logger, _ string) (watcherPIDsFetcher, []testProcess) { - // nothing to do here, always return and empty list of pids - return func() ([]int, error) { - return nil, nil - }, nil - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, _ []testProcess) { - // we should be able to lock, no problem - locker := filelock.NewAppLocker(workdir, applockerFileName) - lockError := locker.TryLock() - t.Cleanup(func() { - _ = locker.Unlock() - }) - - assert.NoError(t, lockError) - - }, - }, - { - name: "contention with test binary listening to signals: test binary is terminated gracefully", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { - cmd, testChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, false) - require.NoError(t, err, "error starting testlocker binary") - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: testChan}} - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { - - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") - - require.Eventually(t, func() bool { - running, checkErr := isProcessRunning(t, testlockerProcess.cmd) - if checkErr != nil { - t.Logf("error checking for testlocker process running: %s", checkErr.Error()) - return false - } - return !running - }, 30*time.Second, 100*time.Millisecond, "test locker process should have exited") - - <-testlockerProcess.waitChan - - assert.True(t, testlockerProcess.cmd.ProcessState.Exited(), "test locker process should have terminated") - assert.Equal(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should have a successful exit status") - - assert.FileExists(t, filepath.Join(workdir, applockerFileName)) - testApplocker := filelock.NewAppLocker(workdir, applockerFileName) - testApplockerError := testApplocker.TryLock() - t.Cleanup(func() { - _ = testApplocker.Unlock() - }) - assert.NoError(t, testApplockerError, "error locking applocker") - }, - }, - { - name: "contention with test binary not listening to signals: test binary is not terminated", - setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { - cmd, waitChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, true) - require.NoError(t, err, "error starting testlocker binary") - - // wait for test binary to acquire lock - require.EventuallyWithT(t, func(collect *assert.CollectT) { - assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") - }, 10*time.Second, 100*time.Millisecond) - require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") - - t.Logf("started testlocker process with PID %d", cmd.Process.Pid) - - return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: waitChan}} - }, - wantErr: assert.NoError, - assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { - - assert.Len(t, cmds, 1) - testlockerProcess := cmds[0] - require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") - - // check that the process is still running for a time - assert.Never(t, func() bool { - running, checkErr := isProcessRunning(t, testlockerProcess.cmd) - if checkErr != nil { - t.Logf("error checking for testlocker process running: %s", checkErr.Error()) - return false - } - return !running - }, 1*time.Second, 100*time.Millisecond, "test locker process should still be running for some time") - - // Kill the process explicitly - err = testlockerProcess.cmd.Process.Kill() - assert.NoError(t, err, "error killing testlocker process") - - <-testlockerProcess.waitChan - - if assert.NotNil(t, testlockerProcess.cmd.ProcessState, "test locker process should have been terminated") { - assert.NotEqual(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should not return a successful exit code") - } - }, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - workDir := t.TempDir() - log, obsLogs := loggertest.New(t.Name()) - t.Cleanup(func() { - // however it ends, try to print out the logs of TakedownWatcher - loggertest.PrintObservedLogs(obsLogs.All(), t.Log) - }) - pidFetcher, processInfos := tc.setup(t, log, workDir) - tc.wantErr(t, TakedownWatcher(t.Context(), log.Named("TakedownWatcher"), pidFetcher)) - if tc.assertPostTakedown != nil { - tc.assertPostTakedown(t, workDir, processInfos) - } - }) - } -} - -func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName string, testExecutablePath string, workdir string, ignoreSignals bool) (*exec.Cmd, chan struct{}) { - - watchTerminated := make(chan struct{}) - - args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} - if ignoreSignals { - args = append(args, "-ignoresignals") - } - - // use the same invoke as the one used to launch a watcher - watcherCmd, err := StartWatcherCmd(log, func() *exec.Cmd { - cmd := InvokeCmdWithArgs(testExecutablePath, args...) - - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - return cmd - }, - WithWatcherPostWaitHook(func() { - close(watchTerminated) - }), - ) - - require.NoError(t, err, "error starting testlocker binary") - return watcherCmd, watchTerminated -} - -func isProcessRunning(t *testing.T, cmd *exec.Cmd) (bool, error) { - if cmd.Process == nil { - return false, nil - } - t.Logf("checking if pid %d is still running", cmd.Process.Pid) - // search for the pid on the running processes - process, err := os.FindProcess(cmd.Process.Pid) - if err != nil { - t.Logf("error string: %q", err.Error()) - if runtime.GOOS == "windows" && strings.Contains(err.Error(), "The parameter is incorrect") { - // in windows, noone can hear you scream - // invalid parameter means that the process object cannot be found - t.Logf("pid %d is not running because on windows we got an incorrect parameter error", cmd.Process.Pid) - return false, nil - } - - t.Logf("error finding process: %T %v", err, err) - return false, err - } - - if process == nil { - t.Logf("pid %d is not running because os.GetProcess returned a nil process", cmd.Process.Pid) - return false, nil - } - - return isProcessLive(cmd.Process) -} diff --git a/internal/pkg/agent/application/upgrade/watcher_windows.go b/internal/pkg/agent/application/upgrade/watcher_windows.go index 4d87503449c..9f973826681 100644 --- a/internal/pkg/agent/application/upgrade/watcher_windows.go +++ b/internal/pkg/agent/application/upgrade/watcher_windows.go @@ -8,24 +8,19 @@ package upgrade import ( "context" - "errors" - "fmt" "os" "os/exec" "syscall" - "unsafe" "golang.org/x/sys/windows" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" - "github.com/elastic/elastic-agent/pkg/core/logger" ) var ( kernel32API = windows.NewLazySystemDLL("kernel32.dll") freeConsoleProc = kernel32API.NewProc("FreeConsole") - attachConsoleProc = kernel32API.NewProc("AttachConsole") procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") allocConsoleProc = kernel32API.NewProc("AllocConsole") ) @@ -45,112 +40,3 @@ func createTakeDownWatcherCommand(ctx context.Context) *exec.Cmd { } return cmd } - -func TakedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { - pids, err := pidFetchFunc() - if err != nil { - return fmt.Errorf("error listing watcher processes: %w", err) - } - - ownPID := os.Getpid() - - var accumulatedSignalingErrors error - for _, pid := range pids { - - if ctx.Err() != nil { - return ctx.Err() - } - - if pid == ownPID { - continue - } - - log.Debugf("attempting to terminate watcher process with PID: %d", pid) - accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, signalPID(log, pid)) - } - - return accumulatedSignalingErrors -} - -// GetConsoleProcessList retrieves the list of process IDs attached to the current console -func GetConsoleProcessList() ([]uint32, error) { - // Allocate a buffer for PIDs - const maxProcs = 64 - pids := make([]uint32, maxProcs) - - r1, _, err := procGetConsoleProcessList.Call( - uintptr(unsafe.Pointer(&pids[0])), - uintptr(maxProcs), - ) - - count := uint32(r1) - if count == 0 { - return nil, err - } - - return pids[:count], nil -} - -// signalPID takes care of signaling a given PID. It also leverages defer() for freeing console and other housekeeping -func signalPID(log *logger.Logger, pid int) error { - r1, _, consoleErr := freeConsoleProc.Call() - if r1 == 0 { - log.Warnf("error preemptively detaching from console: %s", consoleErr) - } - - r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) - if r1 == 0 { - return fmt.Errorf("error attaching console to watcher process with PID %d: %w", pid, consoleErr) - } - log.Infof("successfully attached console with PID: %d", pid) - - defer func() { - r1, _, consoleErr = freeConsoleProc.Call() - if r1 == 0 { - log.Errorf("error detaching from console: %s", consoleErr) - } else { - log.Infof("successfully detached from console of PID: %d", pid) - } - }() - - if list, consoleProcessListErr := GetConsoleProcessList(); consoleProcessListErr != nil { - log.Errorf("error listing console processes: %s", consoleProcessListErr) - } else { - log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) - } - - // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that - // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us - // by specifying processGroupID=0 - //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) - killProcErr := windows.GenerateConsoleCtrlEvent(windows.CTRL_BREAK_EVENT, uint32(pid)) - - if killProcErr != nil { - return fmt.Errorf("error signaling process with PID: %d: %w", pid, killProcErr) - } - - return nil -} - -func isProcessLive(process *os.Process) (bool, error) { - //exitCodeStillActive according to https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getexitcodeprocess - const exitCodeStillActive = 259 - // Open the process with PROCESS_QUERY_LIMITED_INFORMATION access - //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) - handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(process.Pid)) - if err != nil { - return false, fmt.Errorf("OpenProcess failed: %w", err) - } - - defer func(handle windows.Handle) { - _ = windows.CloseHandle(handle) - }(handle) - - var exitCode uint32 - err = windows.GetExitCodeProcess(handle, &exitCode) - if err != nil { - return false, fmt.Errorf("getting process exit code: %w", err) - } - - return exitCode == exitCodeStillActive, nil -} diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 02583b1c724..16b2082588f 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -38,6 +38,10 @@ const ( errorSettingParentSignalsExitCode = 6 ) +// watcherPIDsFetcher defines the type of function responsible for fetching watcher PIDs. +// This will allow for easier testing of takeOverWatcher using fake binaries +type watcherPIDsFetcher func() ([]int, error) + var ErrWatchCancelled = errors.New("watch cancelled") func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command { @@ -64,7 +68,7 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command takedown, _ := c.Flags().GetBool("takedown") if takedown { - err = upgrade.TakedownWatcher(context.Background(), log, utils.GetWatcherPIDs) + err = takedownWatcher(context.Background(), log, utils.GetWatcherPIDs) if err != nil { log.Errorf("error taking down watcher: %v", err) os.Exit(5) diff --git a/internal/pkg/agent/cmd/watch_notwindows.go b/internal/pkg/agent/cmd/watch_notwindows.go new file mode 100644 index 00000000000..ece82dc020c --- /dev/null +++ b/internal/pkg/agent/cmd/watch_notwindows.go @@ -0,0 +1,62 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build !windows + +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + "syscall" + + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %w", err) + } + + ownPID := os.Getpid() + var accumulatedSignalingErrors error + for _, pid := range pids { + + if ctx.Err() != nil { + return ctx.Err() + } + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + + process, err := os.FindProcess(pid) + if err != nil { + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error finding watcher process with PID: %d: %w", pid, err)) + continue + } + + err = process.Signal(syscall.SIGTERM) + if err != nil { + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, fmt.Errorf("error killing watcher process with PID: %d: %w", pid, err)) + continue + } + + } + return accumulatedSignalingErrors +} + +func isProcessLive(process *os.Process) (bool, error) { + signalErr := process.Signal(syscall.Signal(0)) + if signalErr != nil { + return false, nil //nolint:nilerr // if we receive an error it means that the process is not running, so the check completed without errors + } else { + return true, nil + } +} diff --git a/internal/pkg/agent/cmd/watch_test.go b/internal/pkg/agent/cmd/watch_test.go index 3727a7a211b..91b2a1899c8 100644 --- a/internal/pkg/agent/cmd/watch_test.go +++ b/internal/pkg/agent/cmd/watch_test.go @@ -7,7 +7,10 @@ package cmd import ( "fmt" "os" + "os/exec" + "path/filepath" "runtime" + "strings" "testing" "time" @@ -17,12 +20,14 @@ import ( "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details" "github.com/elastic/elastic-agent/internal/pkg/agent/configuration" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" "github.com/elastic/elastic-agent/internal/pkg/release" + "github.com/elastic/elastic-agent/pkg/core/logger" "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade" @@ -364,3 +369,233 @@ func Test_watchCmd(t *testing.T) { }) } } + +func Test_takedownWatcher(t *testing.T) { + + const applockerFileName = "mocklocker.lock" + + testExecutablePath := filepath.Join("..", "application", "filelock", "testlocker", "testlocker") + if runtime.GOOS == "windows" { + testExecutablePath += ".exe" + } + testExecutableAbsolutePath, err := filepath.Abs(testExecutablePath) + require.NoError(t, err, "error calculating absolute test executable part") + + require.FileExists(t, testExecutableAbsolutePath, + "testlocker binary not found.\n"+ + "Check that:\n"+ + "- test binaries have been built with mage build:testbinaries\n"+ + "- the path of the executable is correct") + + returnCmdPIDsFetcher := func(cmds ...*exec.Cmd) watcherPIDsFetcher { + return func() ([]int, error) { + pids := make([]int, 0, len(cmds)) + for _, c := range cmds { + if c.Process != nil { + pids = append(pids, c.Process.Pid) + } + } + + return pids, nil + } + } + + // create a struct with a *exec.Cmd and a channel that will be closed when Wait() returns for the exec.Cmd + // this should keep the data race detector happy. + type testProcess struct { + cmd *exec.Cmd + waitChan chan struct{} + } + + type setupFunc func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) + type assertFunc func(t *testing.T, workdir string, cmds []testProcess) + + tests := []struct { + name string + setup setupFunc + wantErr assert.ErrorAssertionFunc + assertPostTakedown assertFunc + }{ + { + name: "no contention for watcher applocker", + setup: func(_ *testing.T, _ *logger.Logger, _ string) (watcherPIDsFetcher, []testProcess) { + // nothing to do here, always return and empty list of pids + return func() ([]int, error) { + return nil, nil + }, nil + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, _ []testProcess) { + // we should be able to lock, no problem + locker := filelock.NewAppLocker(workdir, applockerFileName) + lockError := locker.TryLock() + t.Cleanup(func() { + _ = locker.Unlock() + }) + + assert.NoError(t, lockError) + + }, + }, + { + name: "contention with test binary listening to signals: test binary is terminated gracefully", + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { + cmd, testChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, false) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: testChan}} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have a not nil cmd") + + require.Eventually(t, func() bool { + running, checkErr := isProcessRunning(t, testlockerProcess.cmd) + if checkErr != nil { + t.Logf("error checking for testlocker process running: %s", checkErr.Error()) + return false + } + return !running + }, 30*time.Second, 100*time.Millisecond, "test locker process should have exited") + + <-testlockerProcess.waitChan + + assert.True(t, testlockerProcess.cmd.ProcessState.Exited(), "test locker process should have terminated") + assert.Equal(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should have a successful exit status") + + assert.FileExists(t, filepath.Join(workdir, applockerFileName)) + testApplocker := filelock.NewAppLocker(workdir, applockerFileName) + testApplockerError := testApplocker.TryLock() + t.Cleanup(func() { + _ = testApplocker.Unlock() + }) + assert.NoError(t, testApplockerError, "error locking applocker") + }, + }, + { + name: "contention with test binary not listening to signals: test binary is not terminated", + setup: func(t *testing.T, log *logger.Logger, workdir string) (watcherPIDsFetcher, []testProcess) { + cmd, waitChan := createTestlockerCommand(t, log.Named("testlocker"), applockerFileName, testExecutableAbsolutePath, workdir, true) + require.NoError(t, err, "error starting testlocker binary") + + // wait for test binary to acquire lock + require.EventuallyWithT(t, func(collect *assert.CollectT) { + assert.FileExists(collect, filepath.Join(workdir, applockerFileName), "watcher applocker should have been created by the test binary") + }, 10*time.Second, 100*time.Millisecond) + require.NotNil(t, cmd.Process, "process details for testlocker should not be nil") + + t.Logf("started testlocker process with PID %d", cmd.Process.Pid) + + return returnCmdPIDsFetcher(cmd), []testProcess{{cmd: cmd, waitChan: waitChan}} + }, + wantErr: assert.NoError, + assertPostTakedown: func(t *testing.T, workdir string, cmds []testProcess) { + + assert.Len(t, cmds, 1) + testlockerProcess := cmds[0] + require.NotNil(t, testlockerProcess, "test locker process info should have exec.Cmd set") + + // check that the process is still running for a time + assert.Never(t, func() bool { + running, checkErr := isProcessRunning(t, testlockerProcess.cmd) + if checkErr != nil { + t.Logf("error checking for testlocker process running: %s", checkErr.Error()) + return false + } + return !running + }, 1*time.Second, 100*time.Millisecond, "test locker process should still be running for some time") + + // Kill the process explicitly + err = testlockerProcess.cmd.Process.Kill() + assert.NoError(t, err, "error killing testlocker process") + + <-testlockerProcess.waitChan + + if assert.NotNil(t, testlockerProcess.cmd.ProcessState, "test locker process should have been terminated") { + assert.NotEqual(t, 0, testlockerProcess.cmd.ProcessState.ExitCode(), "test locker process should not return a successful exit code") + } + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + workDir := t.TempDir() + log, obsLogs := loggertest.New(t.Name()) + t.Cleanup(func() { + // however it ends, try to print out the logs of takedownWatcher + loggertest.PrintObservedLogs(obsLogs.All(), t.Log) + }) + pidFetcher, processInfos := tc.setup(t, log, workDir) + tc.wantErr(t, takedownWatcher(t.Context(), log.Named("takedownWatcher"), pidFetcher)) + if tc.assertPostTakedown != nil { + tc.assertPostTakedown(t, workDir, processInfos) + } + }) + } +} + +func createTestlockerCommand(t *testing.T, log *logger.Logger, applockerFileName string, testExecutablePath string, workdir string, ignoreSignals bool) (*exec.Cmd, chan struct{}) { + + watchTerminated := make(chan struct{}) + + args := []string{"-lockfile", filepath.Join(workdir, applockerFileName)} + if ignoreSignals { + args = append(args, "-ignoresignals") + } + + // use the same invoke as the one used to launch a watcher + watcherCmd, err := upgrade.StartWatcherCmd(log, func() *exec.Cmd { + cmd := upgrade.InvokeCmdWithArgs(testExecutablePath, args...) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd + }, + upgrade.WithWatcherPostWaitHook(func() { + close(watchTerminated) + }), + ) + + require.NoError(t, err, "error starting testlocker binary") + return watcherCmd, watchTerminated +} + +func isProcessRunning(t *testing.T, cmd *exec.Cmd) (bool, error) { + if cmd.Process == nil { + return false, nil + } + t.Logf("checking if pid %d is still running", cmd.Process.Pid) + // search for the pid on the running processes + process, err := os.FindProcess(cmd.Process.Pid) + if err != nil { + t.Logf("error string: %q", err.Error()) + if runtime.GOOS == "windows" && strings.Contains(err.Error(), "The parameter is incorrect") { + // in windows, noone can hear you scream + // invalid parameter means that the process object cannot be found + t.Logf("pid %d is not running because on windows we got an incorrect parameter error", cmd.Process.Pid) + return false, nil + } + + t.Logf("error finding process: %T %v", err, err) + return false, err + } + + if process == nil { + t.Logf("pid %d is not running because os.GetProcess returned a nil process", cmd.Process.Pid) + return false, nil + } + + return isProcessLive(cmd.Process) +} diff --git a/internal/pkg/agent/cmd/watch_windows.go b/internal/pkg/agent/cmd/watch_windows.go new file mode 100644 index 00000000000..90ef342ff16 --- /dev/null +++ b/internal/pkg/agent/cmd/watch_windows.go @@ -0,0 +1,136 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +//go:build windows + +package cmd + +import ( + "context" + "errors" + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/windows" + + "github.com/elastic/elastic-agent/pkg/core/logger" +) + +var ( + kernel32API = windows.NewLazySystemDLL("kernel32.dll") + + freeConsoleProc = kernel32API.NewProc("FreeConsole") + attachConsoleProc = kernel32API.NewProc("AttachConsole") + procGetConsoleProcessList = kernel32API.NewProc("GetConsoleProcessList") +) + +func takedownWatcher(ctx context.Context, log *logger.Logger, pidFetchFunc watcherPIDsFetcher) error { + pids, err := pidFetchFunc() + if err != nil { + return fmt.Errorf("error listing watcher processes: %w", err) + } + + ownPID := os.Getpid() + + var accumulatedSignalingErrors error + for _, pid := range pids { + + if ctx.Err() != nil { + return ctx.Err() + } + + if pid == ownPID { + continue + } + + log.Debugf("attempting to terminate watcher process with PID: %d", pid) + accumulatedSignalingErrors = errors.Join(accumulatedSignalingErrors, signalPID(log, pid)) + } + + return accumulatedSignalingErrors +} + +// GetConsoleProcessList retrieves the list of process IDs attached to the current console +func GetConsoleProcessList() ([]uint32, error) { + // Allocate a buffer for PIDs + const maxProcs = 64 + pids := make([]uint32, maxProcs) + + r1, _, err := procGetConsoleProcessList.Call( + uintptr(unsafe.Pointer(&pids[0])), + uintptr(maxProcs), + ) + + count := uint32(r1) + if count == 0 { + return nil, err + } + + return pids[:count], nil +} + +// signalPID takes care of signaling a given PID. It also leverages defer() for freeing console and other housekeeping +func signalPID(log *logger.Logger, pid int) error { + r1, _, consoleErr := freeConsoleProc.Call() + if r1 == 0 { + log.Warnf("error preemptively detaching from console: %s", consoleErr) + } + + r1, _, consoleErr = attachConsoleProc.Call(uintptr(pid)) + if r1 == 0 { + return fmt.Errorf("error attaching console to watcher process with PID %d: %w", pid, consoleErr) + } + log.Infof("successfully attached console with PID: %d", pid) + + defer func() { + r1, _, consoleErr = freeConsoleProc.Call() + if r1 == 0 { + log.Errorf("error detaching from console: %s", consoleErr) + } else { + log.Infof("successfully detached from console of PID: %d", pid) + } + }() + + if list, consoleProcessListErr := GetConsoleProcessList(); consoleProcessListErr != nil { + log.Errorf("error listing console processes: %s", consoleProcessListErr) + } else { + log.Infof("Own PID: %d, Watcher pid %d, Process list on console: %v", os.Getpid(), pid, list) + } + + // Normally we would want to send the Ctrl+Break event only to the watcher process but due to the fact that + // the parent process of the watcher has already terminated, we have to hug it tightly and take it down with us + // by specifying processGroupID=0 + //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) + killProcErr := windows.GenerateConsoleCtrlEvent(windows.CTRL_BREAK_EVENT, uint32(pid)) + + if killProcErr != nil { + return fmt.Errorf("error signaling process with PID: %d: %w", pid, killProcErr) + } + + return nil +} + +func isProcessLive(process *os.Process) (bool, error) { + //exitCodeStillActive according to https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getexitcodeprocess + const exitCodeStillActive = 259 + // Open the process with PROCESS_QUERY_LIMITED_INFORMATION access + //nolint:gosec // int -> uint32 no overflow is possible since windows PID is a DWORD (uint32) (see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocessid and https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types) + handle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(process.Pid)) + if err != nil { + return false, fmt.Errorf("OpenProcess failed: %w", err) + } + + defer func(handle windows.Handle) { + _ = windows.CloseHandle(handle) + }(handle) + + var exitCode uint32 + err = windows.GetExitCodeProcess(handle, &exitCode) + if err != nil { + return false, fmt.Errorf("getting process exit code: %w", err) + } + + return exitCode == exitCodeStillActive, nil +} From 5d6d7f47ed86ab75edc962d0453ae1e3057f93a6 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 29 Aug 2025 11:49:51 +0200 Subject: [PATCH 36/38] Move RollbacksAvailable struct out of upgrade details --- .../application/upgrade/details/details.go | 17 +-- .../agent/application/upgrade/step_mark.go | 103 ++++++++++-------- .../application/upgrade/step_mark_test.go | 17 ++- .../pkg/agent/application/upgrade/upgrade.go | 6 +- .../agent/application/upgrade/upgrade_test.go | 10 +- internal/pkg/agent/cmd/watch.go | 4 - 6 files changed, 75 insertions(+), 82 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/details/details.go b/internal/pkg/agent/application/upgrade/details/details.go index 66c8f69d047..b3fcf99069b 100644 --- a/internal/pkg/agent/application/upgrade/details/details.go +++ b/internal/pkg/agent/application/upgrade/details/details.go @@ -6,7 +6,6 @@ package details import ( "math" - "slices" "sync" "time" @@ -60,19 +59,6 @@ type Metadata struct { // Reason is a string that may give out more information about transitioning to the current state. It has been // introduced initially to distinguish between manual and automatic rollbacks Reason string `json:"reason,omitempty" yaml:"reason,omitempty"` - - RollbacksAvailable []RollbackAvailable `json:"rollbacks_available,omitempty" yaml:"rollbacks_available,omitempty"` -} - -//rollback_available: -//- version: 8.16.0-SNAPSHOT, -//home: data/elastic-agent-8.16.0-SNAPSHOT-b65953 -//valid_until: "2024-11-21T14:42:21Z" - -type RollbackAvailable struct { - Version string `json:"version" yaml:"version"` - Home string `json:"home" yaml:"home"` - ValidUntil time.Time `json:"valid_until" yaml:"valid_until"` } func NewDetails(targetVersion string, initialState State, actionID string) *Details { @@ -246,8 +232,7 @@ func (m Metadata) Equals(otherM Metadata) bool { m.DownloadRate == otherM.DownloadRate && equalTimePointers(m.RetryUntil, otherM.RetryUntil) && m.RetryErrorMsg == otherM.RetryErrorMsg && - m.Reason == otherM.Reason && - slices.Equal(m.RollbacksAvailable, otherM.RollbacksAvailable) + m.Reason == otherM.Reason } func equalTimePointers(t, otherT *time.Time) bool { diff --git a/internal/pkg/agent/application/upgrade/step_mark.go b/internal/pkg/agent/application/upgrade/step_mark.go index c376b334712..b462e0b90c6 100644 --- a/internal/pkg/agent/application/upgrade/step_mark.go +++ b/internal/pkg/agent/application/upgrade/step_mark.go @@ -31,6 +31,13 @@ const ( OUTCOME_ROLLBACK ) +// RollbackAvailable identifies an elastic-agent install available for rollback +type RollbackAvailable struct { + Version string `json:"version" yaml:"version"` + Home string `json:"home" yaml:"home"` + ValidUntil time.Time `json:"valid_until" yaml:"valid_until"` +} + // UpdateMarker is a marker holding necessary information about ongoing upgrade. type UpdateMarker struct { // Version represents the version the agent is upgraded to @@ -57,6 +64,8 @@ type UpdateMarker struct { Details *details.Details `json:"details,omitempty" yaml:"details,omitempty"` DesiredOutcome UpgradeOutcome `json:"desired_outcome" yaml:"desired_outcome"` + + RollbacksAvailable []RollbackAvailable `json:"rollbacks_available,omitempty" yaml:"rollbacks_available,omitempty"` } // GetActionID returns the Fleet Action ID associated with the @@ -103,32 +112,34 @@ func convertToActionUpgrade(a *MarkerActionUpgrade) *fleetapi.ActionUpgrade { } type updateMarkerSerializer struct { - Version string `yaml:"version"` - Hash string `yaml:"hash"` - VersionedHome string `yaml:"versioned_home"` - UpdatedOn time.Time `yaml:"updated_on"` - PrevVersion string `yaml:"prev_version"` - PrevHash string `yaml:"prev_hash"` - PrevVersionedHome string `yaml:"prev_versioned_home"` - Acked bool `yaml:"acked"` - Action *MarkerActionUpgrade `yaml:"action"` - Details *details.Details `yaml:"details"` - DesiredOutcome UpgradeOutcome `yaml:"desired_outcome"` + Version string `yaml:"version"` + Hash string `yaml:"hash"` + VersionedHome string `yaml:"versioned_home"` + UpdatedOn time.Time `yaml:"updated_on"` + PrevVersion string `yaml:"prev_version"` + PrevHash string `yaml:"prev_hash"` + PrevVersionedHome string `yaml:"prev_versioned_home"` + Acked bool `yaml:"acked"` + Action *MarkerActionUpgrade `yaml:"action"` + Details *details.Details `yaml:"details"` + DesiredOutcome UpgradeOutcome `yaml:"desired_outcome"` + RollbacksAvailable []RollbackAvailable `yaml:"rollbacks_available,omitempty"` } func newMarkerSerializer(m *UpdateMarker) *updateMarkerSerializer { return &updateMarkerSerializer{ - Version: m.Version, - Hash: m.Hash, - VersionedHome: m.VersionedHome, - UpdatedOn: m.UpdatedOn, - PrevVersion: m.PrevVersion, - PrevHash: m.PrevHash, - PrevVersionedHome: m.PrevVersionedHome, - Acked: m.Acked, - Action: convertToMarkerAction(m.Action), - Details: m.Details, - DesiredOutcome: m.DesiredOutcome, + Version: m.Version, + Hash: m.Hash, + VersionedHome: m.VersionedHome, + UpdatedOn: m.UpdatedOn, + PrevVersion: m.PrevVersion, + PrevHash: m.PrevHash, + PrevVersionedHome: m.PrevVersionedHome, + Acked: m.Acked, + Action: convertToMarkerAction(m.Action), + Details: m.Details, + DesiredOutcome: m.DesiredOutcome, + RollbacksAvailable: m.RollbacksAvailable, } } @@ -220,7 +231,7 @@ func markUpgrade(log *logger.Logger, dataDirPath string, updatedOn time.Time, ag // if we have a not empty rollback window, write the prev version in the rollbacks_available field // we also need to check the destination version because the manual rollback and delayed cleanup will be // handled by that version of agent, so it needs to be recent enough - upgradeDetails.Metadata.RollbacksAvailable = []details.RollbackAvailable{ + marker.RollbacksAvailable = []RollbackAvailable{ { Version: previousAgent.version, Home: previousAgent.versionedHome, @@ -291,17 +302,18 @@ func loadMarker(markerFile string) (*UpdateMarker, error) { } return &UpdateMarker{ - Version: marker.Version, - Hash: marker.Hash, - VersionedHome: marker.VersionedHome, - UpdatedOn: marker.UpdatedOn, - PrevVersion: marker.PrevVersion, - PrevHash: marker.PrevHash, - PrevVersionedHome: marker.PrevVersionedHome, - Acked: marker.Acked, - Action: convertToActionUpgrade(marker.Action), - Details: marker.Details, - DesiredOutcome: marker.DesiredOutcome, + Version: marker.Version, + Hash: marker.Hash, + VersionedHome: marker.VersionedHome, + UpdatedOn: marker.UpdatedOn, + PrevVersion: marker.PrevVersion, + PrevHash: marker.PrevHash, + PrevVersionedHome: marker.PrevVersionedHome, + Acked: marker.Acked, + Action: convertToActionUpgrade(marker.Action), + Details: marker.Details, + DesiredOutcome: marker.DesiredOutcome, + RollbacksAvailable: marker.RollbacksAvailable, }, nil } @@ -314,17 +326,18 @@ func SaveMarker(dataDirPath string, marker *UpdateMarker, shouldFsync bool) erro func saveMarkerToPath(marker *UpdateMarker, markerFile string, shouldFsync bool) error { makerSerializer := &updateMarkerSerializer{ - Version: marker.Version, - Hash: marker.Hash, - VersionedHome: marker.VersionedHome, - UpdatedOn: marker.UpdatedOn, - PrevVersion: marker.PrevVersion, - PrevHash: marker.PrevHash, - PrevVersionedHome: marker.PrevVersionedHome, - Acked: marker.Acked, - Action: convertToMarkerAction(marker.Action), - Details: marker.Details, - DesiredOutcome: marker.DesiredOutcome, + Version: marker.Version, + Hash: marker.Hash, + VersionedHome: marker.VersionedHome, + UpdatedOn: marker.UpdatedOn, + PrevVersion: marker.PrevVersion, + PrevHash: marker.PrevHash, + PrevVersionedHome: marker.PrevVersionedHome, + Acked: marker.Acked, + Action: convertToMarkerAction(marker.Action), + Details: marker.Details, + DesiredOutcome: marker.DesiredOutcome, + RollbacksAvailable: marker.RollbacksAvailable, } markerBytes, err := yaml.Marshal(makerSerializer) if err != nil { diff --git a/internal/pkg/agent/application/upgrade/step_mark_test.go b/internal/pkg/agent/application/upgrade/step_mark_test.go index abe3bac6c1a..ba8736ce915 100644 --- a/internal/pkg/agent/application/upgrade/step_mark_test.go +++ b/internal/pkg/agent/application/upgrade/step_mark_test.go @@ -456,17 +456,16 @@ func TestMarkUpgrade(t *testing.T) { TargetVersion: "9.2.0-SNAPSHOT", State: "UPG_REPLACING", ActionID: "", - Metadata: details.Metadata{ - RollbacksAvailable: []details.RollbackAvailable{ - { - Version: "1.2.3-SNAPSHOT", - Home: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), - ValidUntil: updatedOnNow.Add(7 * 24 * time.Hour), - }, - }, - }, + Metadata: details.Metadata{}, }, DesiredOutcome: OUTCOME_UPGRADE, + RollbacksAvailable: []RollbackAvailable{ + { + Version: "1.2.3-SNAPSHOT", + Home: filepath.Join("data", "elastic-agent-1.2.3-SNAPSHOT-prvagt"), + ValidUntil: updatedOnNow.Add(7 * 24 * time.Hour), + }, + }, } assert.Equal(t, expectedMarker, actualMarker) }, diff --git a/internal/pkg/agent/application/upgrade/upgrade.go b/internal/pkg/agent/application/upgrade/upgrade.go index 00c91440457..5a00d2abf61 100644 --- a/internal/pkg/agent/application/upgrade/upgrade.go +++ b/internal/pkg/agent/application/upgrade/upgrade.go @@ -451,11 +451,11 @@ func (u *Upgrader) rollbackToPreviousVersion(ctx context.Context, topDir string, return ErrNilUpdateMarker } - if updateMarker.Details == nil || len(updateMarker.Details.Metadata.RollbacksAvailable) == 0 { + if len(updateMarker.RollbacksAvailable) == 0 { return ErrNoRollbacksAvailable } - var selectedRollback *details.RollbackAvailable - for _, rollback := range updateMarker.Details.Metadata.RollbacksAvailable { + var selectedRollback *RollbackAvailable + for _, rollback := range updateMarker.RollbacksAvailable { if rollback.Version == version && now.Before(rollback.ValidUntil) { selectedRollback = &rollback break diff --git a/internal/pkg/agent/application/upgrade/upgrade_test.go b/internal/pkg/agent/application/upgrade/upgrade_test.go index 9d6ec22f1c1..4030c266d2c 100644 --- a/internal/pkg/agent/application/upgrade/upgrade_test.go +++ b/internal/pkg/agent/application/upgrade/upgrade_test.go @@ -1070,11 +1070,11 @@ func TestManualRollback(t *testing.T) { state: UPG_WATCHING metadata: retry_until: null - rollbacks_available: - - version: 1.2.3 - home: data/elastic-agent-1.2.3-oldver - valid_until: 2025-07-18T10:11:12.131415Z desired_outcome: UPGRADE + rollbacks_available: + - version: 1.2.3 + home: data/elastic-agent-1.2.3-oldver + valid_until: 2025-07-18T10:11:12.131415Z ` parsed123Version, err := agtversion.ParseVersion("1.2.3") @@ -1276,7 +1276,7 @@ func TestManualRollback(t *testing.T) { assert.Equal(t, OUTCOME_ROLLBACK, marker.DesiredOutcome) require.NotNil(t, marker.Details) - assert.NotEmpty(t, marker.Details.Metadata.RollbacksAvailable) + assert.NotEmpty(t, marker.RollbacksAvailable) }, }, } diff --git a/internal/pkg/agent/cmd/watch.go b/internal/pkg/agent/cmd/watch.go index 16b2082588f..5ed49672262 100644 --- a/internal/pkg/agent/cmd/watch.go +++ b/internal/pkg/agent/cmd/watch.go @@ -290,11 +290,7 @@ func getConfig(streams *cli.IOStreams) *configuration.Configuration { } func initUpgradeDetails(marker *upgrade.UpdateMarker, saveMarker func(*upgrade.UpdateMarker, bool) error, log *logp.Logger) *details.Details { - // FIXME this should edit details not rewrite them upgradeDetails := details.NewDetails(version.GetAgentPackageVersion(), details.StateWatching, marker.GetActionID()) - if marker.Details != nil { - upgradeDetails.Metadata.RollbacksAvailable = marker.Details.Metadata.RollbacksAvailable - } upgradeDetails.RegisterObserver(func(details *details.Details) { marker.Details = details if err := saveMarker(marker, true); err != nil { From 35a0aec70376d190105f42c2000feb7d2190d22b Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 29 Aug 2025 14:03:58 +0200 Subject: [PATCH 37/38] extract manifest PathMapper to its own package --- .../agent/application/upgrade/step_unpack.go | 26 +++++-------------- .../integration/ess/upgrade_rollback_test.go | 1 - 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/step_unpack.go b/internal/pkg/agent/application/upgrade/step_unpack.go index 6d165722e6c..9989cae434e 100644 --- a/internal/pkg/agent/application/upgrade/step_unpack.go +++ b/internal/pkg/agent/application/upgrade/step_unpack.go @@ -24,6 +24,7 @@ import ( v1 "github.com/elastic/elastic-agent/pkg/api/v1" "github.com/elastic/elastic-agent/pkg/component" "github.com/elastic/elastic-agent/pkg/core/logger" + manifestutils "github.com/elastic/elastic-agent/pkg/utils/manifest" ) // UnpackResult contains the location and hash of the unpacked agent files @@ -88,7 +89,7 @@ func unzip(log *logger.Logger, archivePath, dataDir string, flavor string) (Unpa fileNamePrefix := strings.TrimSuffix(filepath.Base(archivePath), ".zip") + "/" // omitting `elastic-agent-{version}-{os}-{arch}/` in filename - pm := pathMapper{} + var pm *manifestutils.PathMapper var versionedHome string metadata, err := getPackageMetadataFromZipReader(r, fileNamePrefix) @@ -99,10 +100,11 @@ func unzip(log *logger.Logger, archivePath, dataDir string, flavor string) (Unpa hash = metadata.hash[:hashLen] var registry map[string][]string if metadata.manifest != nil { - pm.mappings = metadata.manifest.Package.PathMappings + pm = manifestutils.NewPathMapper(metadata.manifest.Package.PathMappings) versionedHome = filepath.FromSlash(pm.Map(metadata.manifest.Package.VersionedHome)) registry = metadata.manifest.Package.Flavors } else { + pm = manifestutils.NewPathMapper(nil) // if at this point we didn't load the manifest, set the versioned to the backup value versionedHome = createVersionedHomeFromHash(hash) } @@ -319,7 +321,7 @@ func untar(log *logger.Logger, archivePath, dataDir string, flavor string) (Unpa var hash string // Look up manifest in the archive and prepare path mappings, if any - pm := pathMapper{} + var pm *manifestutils.PathMapper metadata, err := getPackageMetadataFromTar(archivePath) if err != nil { @@ -331,10 +333,11 @@ func untar(log *logger.Logger, archivePath, dataDir string, flavor string) (Unpa if metadata.manifest != nil { // set the path mappings - pm.mappings = metadata.manifest.Package.PathMappings + pm = manifestutils.NewPathMapper(metadata.manifest.Package.PathMappings) versionedHome = filepath.FromSlash(pm.Map(metadata.manifest.Package.VersionedHome)) registry = metadata.manifest.Package.Flavors } else { + pm = manifestutils.NewPathMapper(nil) // set default value of versioned home if it wasn't set by reading the manifest versionedHome = createVersionedHomeFromHash(metadata.hash) } @@ -610,21 +613,6 @@ func validFileName(p string) bool { return true } -type pathMapper struct { - mappings []map[string]string -} - -func (pm pathMapper) Map(packagePath string) string { - for _, mapping := range pm.mappings { - for pkgPath, mappedPath := range mapping { - if strings.HasPrefix(packagePath, pkgPath) { - return path.Join(mappedPath, packagePath[len(pkgPath):]) - } - } - } - return packagePath -} - type tarCloser struct { tarFile *os.File gzipReader *gzip.Reader diff --git a/testing/integration/ess/upgrade_rollback_test.go b/testing/integration/ess/upgrade_rollback_test.go index fc2d64664f4..7519740e122 100644 --- a/testing/integration/ess/upgrade_rollback_test.go +++ b/testing/integration/ess/upgrade_rollback_test.go @@ -229,7 +229,6 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) { atesting.WithFetcher(atesting.ArtifactFetcher()), ) require.NoError(t, err) - return fromFixture, toFixture }, }, From 13eec08044708409fbeea1546585a4862ff74a8a Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Fri, 29 Aug 2025 14:58:12 +0200 Subject: [PATCH 38/38] add install descriptor during initial install --- internal/pkg/agent/cmd/run.go | 2 +- internal/pkg/agent/install/install.go | 43 ++++++++++++++++------ internal/pkg/agent/install/install_test.go | 16 +++++++- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/internal/pkg/agent/cmd/run.go b/internal/pkg/agent/cmd/run.go index 281d5a8870b..e32e1ae3423 100644 --- a/internal/pkg/agent/cmd/run.go +++ b/internal/pkg/agent/cmd/run.go @@ -718,7 +718,7 @@ func ensureInstallMarkerPresent() error { if err != nil { return fmt.Errorf("failed to get current file owner: %w", err) } - if err := install.CreateInstallMarker(paths.Top(), ownership); err != nil { + if err := install.CreateInstallMarker(paths.Top(), ownership, paths.Home(), version.GetAgentPackageVersion()); err != nil { return fmt.Errorf("unable to create installation marker file during upgrade: %w", err) } diff --git a/internal/pkg/agent/install/install.go b/internal/pkg/agent/install/install.go index 79f62f88833..70015865c4e 100644 --- a/internal/pkg/agent/install/install.go +++ b/internal/pkg/agent/install/install.go @@ -17,6 +17,7 @@ import ( "github.com/kardianos/service" "github.com/otiai10/copy" "github.com/schollz/progressbar/v3" + "gopkg.in/yaml.v3" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" @@ -25,6 +26,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/cli" v1 "github.com/elastic/elastic-agent/pkg/api/v1" "github.com/elastic/elastic-agent/pkg/utils" + manifestutils "github.com/elastic/elastic-agent/pkg/utils/manifest" ) const ( @@ -61,17 +63,20 @@ func Install(cfgFile, topPath string, unprivileged bool, log *logp.Logger, pt *p } } - err = setupInstallPath(topPath, ownership) - if err != nil { - return utils.FileOwner{}, fmt.Errorf("error setting up install path: %w", err) - } - manifest, err := readPackageManifest(dir) if err != nil { return utils.FileOwner{}, fmt.Errorf("reading package manifest: %w", err) } pathMappings := manifest.Package.PathMappings + pathMapper := manifestutils.NewPathMapper(pathMappings) + + targetVersionedHome := filepath.FromSlash(pathMapper.Map(manifest.Package.VersionedHome)) + + err = setupInstallPath(topPath, ownership, targetVersionedHome, manifest.Package.Version) + if err != nil { + return utils.FileOwner{}, fmt.Errorf("error setting up install path: %w", err) + } pt.Describe("Copying install files") copyConcurrency := calculateCopyConcurrency(streams) @@ -184,7 +189,7 @@ func Install(cfgFile, topPath string, unprivileged bool, log *logp.Logger, pt *p } // setup the basic topPath, and the .installed file -func setupInstallPath(topPath string, ownership utils.FileOwner) error { +func setupInstallPath(topPath string, ownership utils.FileOwner, versionedHome string, version string) error { // ensure parent directory exists err := os.MkdirAll(filepath.Dir(topPath), 0755) if err != nil { @@ -198,7 +203,7 @@ func setupInstallPath(topPath string, ownership utils.FileOwner) error { } // create the install marker - if err := CreateInstallMarker(topPath, ownership); err != nil { + if err := CreateInstallMarker(topPath, ownership, versionedHome, version); err != nil { return fmt.Errorf("failed to create install marker: %w", err) } return nil @@ -516,16 +521,32 @@ func hasAllSSDs(block ghw.BlockInfo) bool { // CreateInstallMarker creates a `.installed` file at the given install path, // and then calls fixInstallMarkerPermissions to set the ownership provided by `ownership` -func CreateInstallMarker(topPath string, ownership utils.FileOwner) error { +func CreateInstallMarker(topPath string, ownership utils.FileOwner, home string, version string) error { markerFilePath := filepath.Join(topPath, paths.MarkerFileName) - handle, err := os.Create(markerFilePath) + err := createInstallMarkerFile(markerFilePath, version, home) if err != nil { - return err + return fmt.Errorf("creating install marker: %w", err) } - _ = handle.Close() return fixInstallMarkerPermissions(markerFilePath, ownership) } +func createInstallMarkerFile(markerFilePath string, version string, home string) error { + handle, err := os.Create(markerFilePath) + if err != nil { + return fmt.Errorf("creating destination file %q : %w", markerFilePath, err) + } + defer func() { + _ = handle.Close() + }() + installDescriptor := v1.NewInstallDescriptor() + installDescriptor.AgentInstalls = []v1.AgentInstallDesc{{Version: version, VersionedHome: home}} + err = yaml.NewEncoder(handle).Encode(installDescriptor) + if err != nil { + return fmt.Errorf("writing install descriptor: %w", err) + } + return nil +} + func UnprivilegedUser(username, password string) (string, string) { if username != "" { return username, password diff --git a/internal/pkg/agent/install/install_test.go b/internal/pkg/agent/install/install_test.go index f2716e493f6..98aea5cce10 100644 --- a/internal/pkg/agent/install/install_test.go +++ b/internal/pkg/agent/install/install_test.go @@ -224,7 +224,19 @@ func TestSetupInstallPath(t *testing.T) { tmpdir := t.TempDir() ownership, err := utils.CurrentFileOwner() require.NoError(t, err) - err = setupInstallPath(tmpdir, ownership) + err = setupInstallPath(tmpdir, ownership, "data/elastic-agent-1.2.3-SNAPSHOT", "1.2.3-SNAPSHOT") require.NoError(t, err) - require.FileExists(t, filepath.Join(tmpdir, paths.MarkerFileName)) + markerFilePath := filepath.Join(tmpdir, paths.MarkerFileName) + require.FileExists(t, markerFilePath) + + const expectedInstallDescriptor = ` + version: co.elastic.agent/v1 + kind: InstallDescriptor + agentInstalls: + - version: 1.2.3-SNAPSHOT + versioned-home: data/elastic-agent-1.2.3-SNAPSHOT + ` + actualInstallDescriptorBytes, err := os.ReadFile(markerFilePath) + require.NoError(t, err, "error reading actual install descriptor") + assert.YAMLEq(t, expectedInstallDescriptor, string(actualInstallDescriptorBytes), "expected and actual install descriptor do not match") }