@@ -442,6 +442,29 @@ type entryDecoderV1 struct {
442
442
truncatedLastEntry bool
443
443
}
444
444
445
+ // entryDecoderV1ZipUpload is a specialized variant of entryDecoderV1 that uses
446
+ // bufio.Reader instead of bufio.Scanner. This implementation is designed for use
447
+ // in CLI tools (like debug zip upload) where:
448
+ // 1. The 64KB token size limitation of bufio.Scanner needs to be removed
449
+ // 2. Memory constraints are less critical for short-lived CLI processes
450
+ // 3. Complete data capture is essential for debug tools - no entries should be truncated
451
+ type entryDecoderV1ZipUpload struct {
452
+ reader * bufio.Reader
453
+ sensitiveEditor redactEditor
454
+ }
455
+
456
+ // Decode decodes the next log entry into the provided protobuf message.
457
+ func (d * entryDecoderV1ZipUpload ) Decode (entry * logpb.Entry ) error {
458
+ buf , err := d .reader .ReadBytes ('\n' )
459
+ if err == io .EOF && len (buf ) == 0 {
460
+ return io .EOF
461
+ } else if err != nil && ! errors .Is (err , bufio .ErrBufferFull ) && errors .Is (err , io .EOF ) {
462
+ return err
463
+ }
464
+
465
+ return parseEntryV1 (buf , entry , d .sensitiveEditor )
466
+ }
467
+
445
468
func decodeTimestamp (fragment []byte ) (unixNano int64 , err error ) {
446
469
timeFormat := MessageTimeFormat
447
470
if len (fragment ) > 7 && (fragment [len (fragment )- 7 ] == '+' || fragment [len (fragment )- 7 ] == '-' ) {
@@ -464,112 +487,127 @@ func (d *entryDecoderV1) Decode(entry *logpb.Entry) error {
464
487
}
465
488
return io .EOF
466
489
}
467
- b := d .scanner .Bytes ()
468
- m := entryREV1 .FindSubmatch (b )
469
- if m == nil {
470
- continue
471
- }
472
-
473
- // Erase all the fields, to be sure.
474
- * entry = logpb.Entry {}
475
490
476
- // Process the severity.
477
- entry .Severity = Severity (strings .IndexByte (severityChar , m [1 ][0 ]) + 1 )
491
+ if err := parseEntryV1 (d .scanner .Bytes (), entry , d .sensitiveEditor ); err != nil {
492
+ if errors .Is (err , io .EOF ) || errors .Is (err , errNoLogEntry ) {
493
+ continue
494
+ }
478
495
479
- // Process the timestamp.
480
- var err error
481
- entry .Time , err = decodeTimestamp (m [2 ])
482
- if err != nil {
483
496
return err
484
497
}
498
+ return nil
499
+ }
500
+ }
485
501
486
- // Process the goroutine ID.
487
- if len (m [3 ]) > 0 {
488
- goroutine , err := strconv .Atoi (string (m [3 ]))
489
- if err != nil {
490
- return err
491
- }
492
- entry .Goroutine = int64 (goroutine )
493
- }
502
+ var errNoLogEntry = errors .New ("no log entry found in buffer" )
494
503
495
- // Process the channel/file/line details.
496
- entry .File = string (m [4 ])
497
- if idx := strings .IndexByte (entry .File , '@' ); idx != - 1 {
498
- ch , err := strconv .Atoi (entry .File [:idx ])
499
- if err != nil {
500
- return err
501
- }
502
- entry .Channel = Channel (ch )
503
- entry .File = entry .File [idx + 1 :]
504
- }
504
+ // parseEntryV1 parses a log entry from a byte slice into the provided protobuf message.
505
+ // It contains the common parsing logic used by both decoder implementations.
506
+ func parseEntryV1 (buf []byte , entry * logpb.Entry , sensitiveEditor redactEditor ) error {
507
+ m := entryREV1 .FindSubmatch (buf )
508
+ if m == nil {
509
+ return errNoLogEntry
510
+ }
511
+
512
+ // Erase all the fields, to be sure.
513
+ * entry = logpb.Entry {}
514
+
515
+ // Process the severity.
516
+ entry .Severity = Severity (strings .IndexByte (severityChar , m [1 ][0 ]) + 1 )
517
+
518
+ // Process the timestamp.
519
+ var err error
520
+ entry .Time , err = decodeTimestamp (m [2 ])
521
+ if err != nil {
522
+ return err
523
+ }
505
524
506
- line , err := strconv .Atoi (string (m [5 ]))
525
+ // Process the goroutine ID.
526
+ if len (m [3 ]) > 0 {
527
+ goroutine , err := strconv .Atoi (string (m [3 ]))
507
528
if err != nil {
508
529
return err
509
530
}
510
- entry .Line = int64 (line )
511
-
512
- // Process the context tags.
513
- redactable := len (m [6 ]) != 0
514
- // Look for a tenant ID tag. Default to system otherwise.
515
- entry .TenantID = serverident .SystemTenantID
516
- tagsToProcess := m [7 ]
517
- entry .TenantID , entry .TenantName , tagsToProcess = maybeReadTenantDetails (tagsToProcess )
518
-
519
- // Process any remaining tags.
520
- if len (tagsToProcess ) != 0 {
521
- r := redactablePackage {
522
- msg : tagsToProcess ,
523
- redactable : redactable ,
524
- }
525
- r = d .sensitiveEditor (r )
526
- entry .Tags = string (r .msg )
527
- }
531
+ entry .Goroutine = int64 (goroutine )
532
+ }
528
533
529
- // If there's an entry counter at the start of the message, process it.
530
- msg := b [len (m [0 ]):]
531
- i := 0
532
- for ; i < len (msg ) && msg [i ] >= '0' && msg [i ] <= '9' ; i ++ {
533
- entry .Counter = entry .Counter * 10 + uint64 (msg [i ]- '0' )
534
- }
535
- if i > 0 && i < len (msg ) && msg [i ] == ' ' {
536
- // Only accept the entry counter if followed by a space. In all
537
- // other cases, the number was part of the message string.
538
- msg = msg [i + 1 :]
539
- } else {
540
- // This was not truly an entry counter. Ignore the work done previously.
541
- entry .Counter = 0
534
+ // Process the channel/file/line details.
535
+ entry .File = string (m [4 ])
536
+ if idx := strings .IndexByte (entry .File , '@' ); idx != - 1 {
537
+ ch , err := strconv .Atoi (entry .File [:idx ])
538
+ if err != nil {
539
+ return err
542
540
}
541
+ entry .Channel = Channel (ch )
542
+ entry .File = entry .File [idx + 1 :]
543
+ }
544
+
545
+ line , err := strconv .Atoi (string (m [5 ]))
546
+ if err != nil {
547
+ return err
548
+ }
549
+ entry .Line = int64 (line )
550
+
551
+ // Process the context tags.
552
+ redactable := len (m [6 ]) != 0
553
+ // Look for a tenant ID tag. Default to system otherwise.
554
+ entry .TenantID = serverident .SystemTenantID
555
+ tagsToProcess := m [7 ]
556
+ entry .TenantID , entry .TenantName , tagsToProcess = maybeReadTenantDetails (tagsToProcess )
543
557
544
- // Process the remainder of the log message.
558
+ // Process any remaining tags.
559
+ if len (tagsToProcess ) != 0 {
545
560
r := redactablePackage {
546
- msg : trimFinalNewLines ( msg ) ,
561
+ msg : tagsToProcess ,
547
562
redactable : redactable ,
548
563
}
549
- r = d .sensitiveEditor (r )
550
- entry .Message = string (r .msg )
551
- entry .Redactable = r .redactable
552
-
553
- if strings .HasPrefix (entry .Message , structuredEntryPrefix + "{" ) /* crdb-v1 prefix */ {
554
- // Note: we do not recognize the v2 marker here (" ={") because
555
- // v2 entries can be split across multiple lines.
556
- entry .StructuredStart = uint32 (len (structuredEntryPrefix ))
557
-
558
- if nl := strings .IndexByte (entry .Message , '\n' ); nl != - 1 {
559
- entry .StructuredEnd = uint32 (nl )
560
- entry .StackTraceStart = uint32 (nl + 1 )
561
- } else {
562
- entry .StructuredEnd = uint32 (len (entry .Message ))
563
- }
564
- }
565
- // Note: we only know how to populate entry.StackTraceStart upon
566
- // parse if the entry was structured (see above). If it is not
567
- // structured, we cannot distinguish where the message ends and
568
- // where the stack trace starts. This is another reason why the
569
- // crdb-v1 format is lossy.
564
+ r = sensitiveEditor (r )
565
+ entry .Tags = string (r .msg )
566
+ }
570
567
571
- return nil
568
+ // If there's an entry counter at the start of the message, process it.
569
+ msg := buf [len (m [0 ]):]
570
+ i := 0
571
+ for ; i < len (msg ) && msg [i ] >= '0' && msg [i ] <= '9' ; i ++ {
572
+ entry .Counter = entry .Counter * 10 + uint64 (msg [i ]- '0' )
573
+ }
574
+ if i > 0 && i < len (msg ) && msg [i ] == ' ' {
575
+ // Only accept the entry counter if followed by a space. In all
576
+ // other cases, the number was part of the message string.
577
+ msg = msg [i + 1 :]
578
+ } else {
579
+ // This was not truly an entry counter. Ignore the work done previously.
580
+ entry .Counter = 0
572
581
}
582
+
583
+ // Process the remainder of the log message.
584
+ r := redactablePackage {
585
+ msg : trimFinalNewLines (msg ),
586
+ redactable : redactable ,
587
+ }
588
+ r = sensitiveEditor (r )
589
+ entry .Message = string (r .msg )
590
+ entry .Redactable = r .redactable
591
+
592
+ if strings .HasPrefix (entry .Message , structuredEntryPrefix + "{" ) /* crdb-v1 prefix */ {
593
+ // Note: we do not recognize the v2 marker here (" ={") because
594
+ // v2 entries can be split across multiple lines.
595
+ entry .StructuredStart = uint32 (len (structuredEntryPrefix ))
596
+
597
+ if nl := strings .IndexByte (entry .Message , '\n' ); nl != - 1 {
598
+ entry .StructuredEnd = uint32 (nl )
599
+ entry .StackTraceStart = uint32 (nl + 1 )
600
+ } else {
601
+ entry .StructuredEnd = uint32 (len (entry .Message ))
602
+ }
603
+ }
604
+ // Note: we only know how to populate entry.StackTraceStart upon
605
+ // parse if the entry was structured (see above). If it is not
606
+ // structured, we cannot distinguish where the message ends and
607
+ // where the stack trace starts. This is another reason why the
608
+ // crdb-v1 format is lossy.
609
+
610
+ return nil
573
611
}
574
612
575
613
// maybeReadTenantDetails reads the tenant ID and name. If neither the
0 commit comments