-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgalog_cloudlogging.go
More file actions
355 lines (310 loc) · 12.2 KB
/
galog_cloudlogging.go
File metadata and controls
355 lines (310 loc) · 12.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package galog
import (
"context"
"errors"
"fmt"
"time"
"cloud.google.com/go/logging"
logpb "cloud.google.com/go/logging/apiv2/loggingpb"
"google.golang.org/api/option"
)
// CloudLoggingInitMode is the cloud logging backend initialization mode.
type CloudLoggingInitMode int
const (
// CloudLoggingInitModeLazy is the lazy initialization mode. In this mode the
// backend object is created but the cloud logging client and logger are not
// initialized until the first call to InitClient.
CloudLoggingInitModeLazy CloudLoggingInitMode = iota
// CloudLoggingInitModeActive is the active initialization mode. In this mode
// the backend object is created and the cloud logging client and logger are
// initialized immediately.
CloudLoggingInitModeActive
// DefaultCloudLoggingPingTimeout is the default timeout for pinging cloud
// logging.
DefaultCloudLoggingPingTimeout = time.Second
// DefaultClientErrorInterval is the default interval for logging the client
// errors.
DefaultClientErrorInterval = time.Minute * 20
// DefaultClientErrorLoggingDisabled is whether to enable the client error
// logging by default.
DefaultClientErrorLoggingDisabled = false
// defaultCloudLoggingQueueSize is the default queue size of the cloud logging
// backend implementation. In general writing to cloud logging should not
// require buffering as cloud logging is async and logging library takes care
// of flushing.
defaultCloudLoggingQueueSize = 100
)
var (
// errCloudLoggingNotInitialized is the error returned when the cloud
// logging backend is not yet initialized.
errCloudLoggingNotInitialized = errors.New("cloud logging logger is not yet fully initialized")
// errCloudLoggingAlreadyInitialized is the error returned when the InitClient
// is called and cloud logging backend is already initialized.
errCloudLoggingAlreadyInitialized = errors.New("cloud logging logger is already initialized")
)
// CloudBackend is a Backend implementation for cloud logging.
type CloudBackend struct {
// backendID is the cloud logging backend implementation's ID.
backendID string
// client is the cloud logging client pointer.
client *logging.Client
// logger is the cloud logging logger pointer.
logger *logging.Logger
// config is a pointer to the generic Config interface implementation.
config *backendConfig
// opts is the cloud logging options.
opts *CloudOptions
// periodicLogger is the periodic logger used to capture the client errors.
periodicLogger *periodicLogger
// disableClientErrorLogging is whether to disable the client error logging.
disableClientErrorLogging bool
}
// CloudOptions defines the cloud logging behavior and setup options.
type CloudOptions struct {
// Ident is the logger's ident, or the logger's name.
Ident string
// ProgramName is the program name, it's used on the logging payload.
ProgramName string
// ProgramVersion is the program version, it's used on the logging payload.
ProgramVersion string
// Project the gcp project name.
Project string
// Instance the running instance name.
Instance string
// UserAgent is the logging user agent option.
UserAgent string
// FlushCadence is how frequently we should push the log to the server.
FlushCadence time.Duration
// ClientErrorInterval is how frequently we should log the client errors. This
// defaults to [DefaultClientErrorInterval].
ClientErrorInterval time.Duration
// DisableClientErrorLogging is whether to disable the client error logging.
// If enabled, any errors from the cloud logging client will be logged
// periodically. Period is controlled by [ClientErrorInterval] option.
// Periodical logging is used to prevent spamming the error logs in case of a
// persistent error. By default this is enabled and controlled by
// [DefaultClientErrorLoggingDisabled]. This periodic logger would attempt to
// log at [WARN] level to every other configured backend (file, serial, etc).
DisableClientErrorLogging bool
// PingTimeout is the timeout for pinging Cloud Logging.
//
// This is required currently because the cloud logging flush operation hangs
// indefinitely when the server is unreachable due to having no external IP
// address or private Google access enabled. The timeout is used to attempt
// pinging the Cloud Logging server, and if it's not reachable, we skip the
// flush operation.
PingTimeout time.Duration
// WithoutAuthentication is whether to use authentication for cloud logging
// operations.
WithoutAuthentication bool
// ExtraLabels are extra labels to be added to the cloud logging entry.
ExtraLabels map[string]string
// Endpoint is the cloud logging endpoint to use. If empty, the default
// endpoint will be used.
Endpoint string
// UniverseDomain is the universe domain to use for cloud logging. If empty,
// the default universe domain will be used.
UniverseDomain string
}
// CloudEntryPayload contains the data to be sent to cloud logging as the
// entry payload. It's translated from the log subsystem's Entry structure.
type CloudEntryPayload struct {
// Message is the formatted message.
Message string `json:"message"`
// LocalTimestamp is the unix timestamp got from the entry's When field.
LocalTimestamp string `json:"localTimestamp"`
// ProgName is the program name - or the binary name.
ProgName string `json:"progName,omitempty"`
// ProgVersion is the program version.
ProgVersion string `json:"progVersion,omitempty"`
}
// NewCloudBackend returns a Backend implementation that will log out to google
// cloud logging.
//
// Initialization Mode:
//
// If mode is InitModeLazy the backend object will be allocated and only the
// the basic elements will be initialized, all log entries will be queued
// until InitClient is called.
//
// Why lazy initialization is important/needed?
//
// The Cloud Logging depends on instance name that's mainly a data fed by - or
// accessed from - metadata server and depending on the application and
// environment the metadata server might not be available at the time of the
// application start - being only available later on. In such cases the early
// initializing and registering the backend will result into entries being
// queued and written/sent to the cloud logging later on - that way no logging
// entries are lost.
func NewCloudBackend(ctx context.Context, mode CloudLoggingInitMode, opts *CloudOptions) (*CloudBackend, error) {
res := &CloudBackend{
backendID: "log-backend,cloudlogging",
config: newBackendConfig(defaultFileQueueSize),
}
res.config.SetFormat(ErrorLevel, `{{.Message}}`)
res.config.SetFormat(DebugLevel, `{{.Message}}`)
if mode == CloudLoggingInitModeActive {
if err := res.InitClient(ctx, opts); err != nil {
return nil, fmt.Errorf("failed to initialize cloud logging client: %+v", err)
}
}
return res, nil
}
// InitClient initializes the cloud logging client and logger. If the backend
// was initialized in "active" mode this function is no-op.
func (cb *CloudBackend) InitClient(ctx context.Context, opts *CloudOptions) error {
if cb.client != nil {
return errCloudLoggingAlreadyInitialized
}
var clientOptions []option.ClientOption
if opts.UserAgent != "" {
clientOptions = append(clientOptions, option.WithUserAgent(opts.UserAgent))
}
if opts.WithoutAuthentication {
clientOptions = append(clientOptions, option.WithoutAuthentication())
}
if opts.Endpoint != "" {
clientOptions = append(clientOptions, option.WithEndpoint(opts.Endpoint))
}
if opts.UniverseDomain != "" {
clientOptions = append(clientOptions, option.WithUniverseDomain(opts.UniverseDomain))
}
// Set the default flush timeout if not provided.
if opts.PingTimeout == 0 {
opts.PingTimeout = DefaultCloudLoggingPingTimeout
}
cb.disableClientErrorLogging = opts.DisableClientErrorLogging
errTimeout := opts.ClientErrorInterval
if errTimeout == 0 {
errTimeout = DefaultClientErrorInterval
}
cb.periodicLogger = &periodicLogger{
interval: errTimeout,
}
client, err := logging.NewClient(ctx, opts.Project, clientOptions...)
if err != nil {
return fmt.Errorf("failed to initialize cloud logging client: %+v", err)
}
client.OnError = func(err error) {
if opts.DisableClientErrorLogging {
return
}
cb.periodicLogger.log(err)
}
labels := make(map[string]string)
for k, v := range opts.ExtraLabels {
labels[k] = v
}
if opts.Instance != "" {
labels["instance_name"] = opts.Instance
}
var loggerOptions []logging.LoggerOption
loggerOptions = append(loggerOptions, logging.CommonLabels(labels))
loggerOptions = append(loggerOptions, logging.DelayThreshold(opts.FlushCadence))
logger := client.Logger(opts.Ident, loggerOptions...)
cb.client = client
cb.logger = logger
cb.opts = opts
return nil
}
// Log prints sends the log entry to cloud logging.
func (cb *CloudBackend) Log(entry *LogEntry) error {
// If the logger is nil it means the backend is lazy initialized, we return
// an error to indicate that the backend is not yet initialized - the entries
// will be queued.
if cb.logger == nil {
return errCloudLoggingNotInitialized
}
levelMap := map[Level]logging.Severity{
FatalLevel: logging.Critical,
ErrorLevel: logging.Error,
WarningLevel: logging.Warning,
InfoLevel: logging.Info,
DebugLevel: logging.Debug,
}
severity := levelMap[entry.Level]
sourceLocation := &logpb.LogEntrySourceLocation{
File: entry.File,
Line: int64(entry.Line),
Function: entry.Function,
}
format := cb.config.Format(entry.Level)
message, err := entry.Format(format)
if err != nil {
return fmt.Errorf("failed to format log message: %+v", err)
}
payload := &CloudEntryPayload{
Message: message,
LocalTimestamp: entry.When.Format("2006-01-02T15:04:05.0000Z07:00"),
ProgName: cb.opts.ProgramName,
ProgVersion: cb.opts.ProgramVersion,
}
cb.logger.Log(logging.Entry{
Severity: severity,
SourceLocation: sourceLocation,
Payload: payload,
})
return nil
}
// ID returns the cloud logging backend implementation's ID.
func (cb *CloudBackend) ID() string {
return cb.backendID
}
// Config returns the configuration of the cloud logging backend.
func (cb *CloudBackend) Config() Config {
return cb.config
}
// Shutdown forces the cloud logging backend to flush its content and closes the
// logging client. This operation is skipped if Cloud Logging is unreachable.
func (cb *CloudBackend) Shutdown(ctx context.Context) error {
if cb.logger == nil {
return errCloudLoggingNotInitialized
}
pingTimeout, cancelFunc := context.WithTimeout(ctx, cb.opts.PingTimeout)
defer cancelFunc()
// Ensure we can reach Cloud Logging before attempting to flush.
if err := cb.client.Ping(pingTimeout); err != nil {
return fmt.Errorf("failed to reach cloud logging, skipping flush: %v", err)
}
// Closing the client will flush the logs.
if err := cb.client.Close(); err != nil {
return fmt.Errorf("failed to close cloud logging client: %v", err)
}
return nil
}
// periodicLogger is a helper struct used to log the client errors periodically.
// Its to prevent spamming the logs with client errors. If there is a persistent
// error, the client will call the error handler too frequently.
type periodicLogger struct {
// interval is the interval between logging the client errors.
interval time.Duration
// lastLog is the last time the client error was logged.
lastLog time.Time
// firstRunPassed is whether this is the first time the error handler was
// called.
firstRunPassed bool
}
// log logs the error if the interval has passed since the last log.
// Returns true if the error was logged, this is only used for testing.
func (pl *periodicLogger) log(err error) bool {
if !pl.firstRunPassed || time.Since(pl.lastLog) >= pl.interval {
pl.firstRunPassed = true
pl.lastLog = time.Now()
Warnf("Cloud Logging Client Error: %v", err)
return true
}
return false
}