Skip to content

Commit 7659fa8

Browse files
Clean up logging and tags (#156)
1 parent efe2cd2 commit 7659fa8

File tree

2 files changed

+142
-66
lines changed

2 files changed

+142
-66
lines changed

src/go/cmd/strelka-frontend/main.go

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,7 @@ var (
6262
loggerOnce sync.Once
6363
)
6464

65-
func DebugLog(msg string, fields ...zap.Field) {
66-
// The value here doesn't matter, we'll just check for its existence
67-
if _, enableDebugLogging := os.LookupEnv("ENABLE_VERBOSE_LOGGING"); !enableDebugLogging {
68-
return
69-
}
70-
65+
func getLogger() *zap.Logger {
7166
loggerOnce.Do(func() {
7267
encoderConfig := zap.NewProductionEncoderConfig()
7368
encoderConfig.MessageKey = "message"
@@ -88,8 +83,29 @@ func DebugLog(msg string, fields ...zap.Field) {
8883
loggerInstance = logger
8984
})
9085

86+
return loggerInstance
87+
}
88+
89+
func Infof(format string, v ...any) {
90+
logger := getLogger()
91+
if logger != nil {
92+
logger.Sugar().Infof(format, v...)
93+
} else {
94+
log.Printf(format, v...)
95+
}
96+
}
97+
98+
func Trace(msg string, fields ...zap.Field) {
99+
// The value here doesn't matter, we'll just check for its existence
100+
if _, enableTraceLogging := os.LookupEnv("ENABLE_TRACE_LOGGING"); !enableTraceLogging {
101+
return
102+
}
103+
104+
traceMsg := fmt.Sprintf("[TRACE] %s", msg)
91105
if loggerInstance != nil {
92-
loggerInstance.Info(msg, fields...)
106+
loggerInstance.Info(traceMsg, fields...)
107+
} else {
108+
log.Println(traceMsg)
93109
}
94110
}
95111

@@ -217,7 +233,7 @@ func (s *server) ScanFile(stream strelka.Frontend_ScanFileServer) error {
217233
}
218234
}
219235

220-
startQueueTask := time.Now()
236+
startQueueScanFileTaskTime := time.Now()
221237
if err := s.coordinator.cli.ZAdd(
222238
stream.Context(),
223239
"tasks",
@@ -226,19 +242,19 @@ func (s *server) ScanFile(stream strelka.Frontend_ScanFileServer) error {
226242
Member: id,
227243
},
228244
).Err(); err != nil {
229-
DebugLog("[VERBOSE] Failed to queue task",
245+
Trace("ScanFile task failed to queue",
230246
zap.String("request_id", req.Id),
231247
zap.String("strelka_id", id),
232-
zap.Duration("took", time.Since(startQueueTask)),
248+
zap.Int64("queue_time_ms", time.Since(startQueueScanFileTaskTime).Milliseconds()),
233249
zap.Error(err),
234250
)
235251
return fmt.Errorf("sending task: %w", err)
236252
} else {
237-
DebugLog("[VERBOSE] Queued task",
253+
Trace("ScanFile task queued",
238254
zap.String("request_id", req.Id),
239255
zap.String("strelka_id", id),
240-
zap.Time("deadline", deadline),
241-
zap.Duration("took", time.Since(startQueueTask)),
256+
zap.Int64("deadline", deadline.Unix()),
257+
zap.Int64("queue_time_ms", time.Since(startQueueScanFileTaskTime).Milliseconds()),
242258
)
243259
}
244260

@@ -249,21 +265,36 @@ func (s *server) ScanFile(stream strelka.Frontend_ScanFileServer) error {
249265
(*tx).Del(stream.Context(), sha)
250266
}
251267

252-
startAwaitResponse := time.Now()
268+
startResponseLoopTime := time.Now()
269+
lastResponseTime := time.Now()
270+
redisErrorCount := 0
253271
for {
254272
if err := stream.Context().Err(); err != nil {
255-
DebugLog("[VERBOSE] Stream error",
273+
Trace("stream closed",
256274
zap.String("request_id", req.Id),
257275
zap.String("strelka_id", id),
258-
zap.Duration("waited", time.Since(startAwaitResponse)),
276+
zap.Int64("since_last_response_ms", time.Since(lastResponseTime).Milliseconds()),
277+
zap.Int64("since_entering_loop_ms", time.Since(startResponseLoopTime).Milliseconds()),
278+
zap.Int("redis_error_count", redisErrorCount),
259279
zap.Error(err),
260280
)
261281
return fmt.Errorf("context closed: %w", err)
262282
}
263283

284+
startPopTime := time.Now()
264285
res, err := s.coordinator.cli.BLPop(stream.Context(), 5*time.Second, keye).Result()
265286
if err != nil {
266287
if err != redis.Nil {
288+
redisErrorCount += 1
289+
Trace("error performing response pop",
290+
zap.String("request_id", req.Id),
291+
zap.String("strelka_id", id),
292+
zap.Int64("since_last_response_ms", time.Since(lastResponseTime).Milliseconds()),
293+
zap.Int64("since_entering_loop_ms", time.Since(startResponseLoopTime).Milliseconds()),
294+
zap.Int64("response_pop_took_ms", time.Since(startPopTime).Milliseconds()),
295+
zap.Int("redis_error_count", redisErrorCount),
296+
zap.Error(err),
297+
)
267298
// Delay to prevent fast looping over errors
268299
time.Sleep(250 * time.Millisecond)
269300
}
@@ -276,19 +307,23 @@ func (s *server) ScanFile(stream strelka.Frontend_ScanFileServer) error {
276307

277308
lpop := res[1]
278309
if lpop == "FIN" {
279-
DebugLog("[VERBOSE] FIN response received",
310+
Trace("FIN response received",
280311
zap.String("request_id", req.Id),
281312
zap.String("strelka_id", id),
282-
zap.Duration("waited", time.Since(startAwaitResponse)),
313+
zap.Int64("since_last_response_ms", time.Since(lastResponseTime).Milliseconds()),
314+
zap.Int64("since_entering_loop_ms", time.Since(startResponseLoopTime).Milliseconds()),
315+
zap.Int("redis_error_count", redisErrorCount),
283316
)
284317
break
285318
}
286319

287-
DebugLog("[VERBOSE] Response received",
320+
Trace("response received",
288321
zap.String("request_id", req.Id),
289322
zap.String("strelka_id", id),
290-
zap.Duration("waited", time.Since(startAwaitResponse)),
323+
zap.Int64("since_last_response_ms", time.Since(lastResponseTime).Milliseconds()),
324+
zap.Int64("since_entering_loop_ms", time.Since(startResponseLoopTime).Milliseconds()),
291325
)
326+
lastResponseTime = time.Now()
292327

293328
if tx != nil {
294329
(*tx).RPush(stream.Context(), sha, lpop)
@@ -318,20 +353,20 @@ func (s *server) ScanFile(stream strelka.Frontend_ScanFileServer) error {
318353

319354
s.responses <- resp
320355

321-
startSendResponse := time.Now()
356+
startSendResponseTime := time.Now()
322357
if err := stream.Send(resp); err != nil {
323-
DebugLog("[VERBOSE] Error sending response",
358+
Trace("error sending response to caller",
324359
zap.String("request_id", req.Id),
325360
zap.String("strelka_id", id),
326-
zap.Duration("took", time.Since(startSendResponse)),
361+
zap.Int64("send_response_took_ms", time.Since(startSendResponseTime).Milliseconds()),
327362
zap.Error(err),
328363
)
329364
return fmt.Errorf("send stream: %w", err)
330365
} else {
331-
DebugLog("[VERBOSE] Sent response",
366+
Trace("response sent to caller",
332367
zap.String("request_id", req.Id),
333368
zap.String("strelka_id", id),
334-
zap.Duration("took", time.Since(startSendResponse)),
369+
zap.Int64("send_response_took_ms", time.Since(startSendResponseTime).Milliseconds()),
335370
)
336371
}
337372
}
@@ -416,7 +451,7 @@ func (s *server) CompileYara(stream strelka.Frontend_CompileYaraServer) error {
416451
if err != nil {
417452
if err != redis.Nil {
418453
// Delay to prevent fast looping over errors
419-
log.Printf("err: %v\n", err)
454+
Infof("err: %v\n", err)
420455
time.Sleep(250 * time.Millisecond)
421456
}
422457
continue
@@ -536,7 +571,7 @@ func (s *server) SyncYara(stream strelka.Frontend_SyncYaraServer) error {
536571
if err != nil {
537572
if err != redis.Nil {
538573
// Delay to prevent fast looping over errors
539-
log.Printf("err: %v\n", err)
574+
Infof("err: %v\n", err)
540575
time.Sleep(250 * time.Millisecond)
541576
}
542577
continue
@@ -684,7 +719,7 @@ func (s *server) SyncYaraV2(stream strelka.Frontend_SyncYaraV2Server) error {
684719
if err != nil {
685720
if err != redis.Nil {
686721
// Delay to prevent fast looping over errors
687-
log.Printf("err: %v\n", err)
722+
Infof("err: %v\n", err)
688723
time.Sleep(250 * time.Millisecond)
689724
}
690725
continue
@@ -870,17 +905,17 @@ func main() {
870905
go func() {
871906
rpc.LogResponses(responses, conf.Response.Log)
872907
}()
873-
log.Printf("responses will be logged to %v", conf.Response.Log)
908+
Infof("responses will be logged to %v", conf.Response.Log)
874909
} else if conf.Response.Report != 0 {
875910
go func() {
876911
rpc.ReportResponses(responses, conf.Response.Report)
877912
}()
878-
log.Printf("responses will be reported every %v", conf.Response.Report)
913+
Infof("responses will be reported every %v", conf.Response.Report)
879914
} else {
880915
go func() {
881916
rpc.DiscardResponses(responses)
882917
}()
883-
log.Println("responses will be discarded")
918+
Infof("responses will be discarded")
884919
}
885920

886921
cd := redis.NewClient(&redis.Options{
@@ -923,16 +958,16 @@ func main() {
923958
}
924959

925960
go func() {
926-
log.Printf("Waiting for shutdown\n")
961+
Infof("Waiting for shutdown\n")
927962
<-shutdownWorkersSig
928963
st := time.Now()
929-
log.Printf("Received shutdown signal, attempting graceful shutdown\n")
964+
Infof("Received shutdown signal, attempting graceful shutdown\n")
930965
s.GracefulStop()
931-
log.Printf("Graceful shutdown completed in %v\n", time.Since(st))
966+
Infof("Graceful shutdown completed in %v\n", time.Since(st))
932967
}()
933968

934969
strelka.RegisterFrontendServer(s, opts)
935970
grpc_health_v1.RegisterHealthServer(s, opts)
936971
err = s.Serve(listen)
937-
log.Printf("Shutting down. Serve err: %v\n", err)
972+
Infof("Shutting down. Serve err: %v\n", err)
938973
}

0 commit comments

Comments
 (0)