cockroachdb
diff --git a/‎pkg/cli/debug.go‎
Lines changed: 4 additions & 1 deletion b/‎pkg/cli/debug.go‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎pkg/cli/zip_upload.go‎
Lines changed: 19 additions & 2 deletions b/‎pkg/cli/zip_upload.go‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎pkg/util/log/format_crdb_v1.go‎
Lines changed: 126 additions & 88 deletions b/‎pkg/util/log/format_crdb_v1.go‎
Lines changed: 126 additions & 88 deletions
@@ -1566,14 +1566,17 @@ func init() {
 		"Name of the cluster to associate with the debug zip artifacts. This can be used to identify data in the upstream observability tool.")
 	f.Var(&debugZipUploadOpts.from, "from", "oldest timestamp to include (inclusive)")
 	f.Var(&debugZipUploadOpts.to, "to", "newest timestamp to include (inclusive)")
-	f.StringVar(&debugZipUploadOpts.logFormat, "log-format", "crdb-v1",
+	f.StringVar(&debugZipUploadOpts.logFormat, "log-format", "crdb-v1-zip-upload",
 		"log format of the input files")
 	// the log-format flag is depricated. It will
 	// eventually be removed completely. keeping it hidden for now incase we ever
 	// need to specify the log format
 	f.Lookup("log-format").Hidden = true
 	f.StringVar(&debugZipUploadOpts.gcpProjectID, "gcp-project-id",
 		defaultGCPProjectID, "GCP project ID to use to send debug.zip logs to GCS")
+	// --dry-run is a hidden flag that is only meant to be used for testing and diagnostics
+	f.BoolVar(&debugZipUploadOpts.dryRun, "dry-run", false, "run in dry-run mode without making any actual uploads")
+	f.Lookup("dry-run").Hidden = true
 
 	f = debugDecodeKeyCmd.Flags()
 	f.Var(&decodeKeyOptions.encoding, "encoding", "key argument encoding")
 
@@ -124,6 +124,7 @@ var debugZipUploadOpts = struct {
 	from, to             timestampValue
 	logFormat            string
 	maxConcurrentUploads int
+	dryRun               bool
 }{
 	maxConcurrentUploads: system.NumCPU() * 4,
 }
@@ -217,6 +218,10 @@ func runDebugZipUpload(cmd *cobra.Command, args []string) error {
 		artifactsToUpload = debugZipUploadOpts.include
 	}
 
+	if debugZipUploadOpts.dryRun {
+		fmt.Println("DRY RUN MODE: No actual uploads will be performed")
+	}
+
 	// run the upload functions for each artifact type. This can run sequentially.
 	// All the concurrency is contained within the upload functions.
 	for _, artType := range artifactsToUpload {
@@ -238,6 +243,10 @@ func validateZipUploadReadiness() error {
 		artifactsToUpload = zipArtifactTypes
 	)
 
+	if debugZipUploadOpts.dryRun {
+		return nil
+	}
+
 	if len(debugZipUploadOpts.include) > 0 {
 		artifactsToUpload = debugZipUploadOpts.include
 	}
@@ -474,7 +483,7 @@ func processLogFile(
 			debugZipUploadOpts.tags..., // user provided tags
 		), getUploadType(currentTimestamp))
 		if err != nil {
-			fmt.Println(err)
+			fmt.Println("logEntryToJSON:", err)
 			continue
 		}
 
@@ -896,6 +905,11 @@ func startWriterPool(
 // writing to GCS. The concurrency has to be handled by the caller.
 // This function implements the logUploadFunc signature.
 var gcsLogUpload = func(ctx context.Context, sig logUploadSig) (int, error) {
+	data := bytes.Join(sig.logLines, []byte("\n"))
+	if debugZipUploadOpts.dryRun {
+		return len(data), nil
+	}
+
 	gcsClient, closeGCSClient, err := newGCSClient(ctx)
 	if err != nil {
 		return 0, err
@@ -910,7 +924,6 @@ var gcsLogUpload = func(ctx context.Context, sig logUploadSig) (int, error) {
 	retryOpts := base.DefaultRetryOptions()
 	retryOpts.MaxRetries = zipUploadRetries
 
-	data := bytes.Join(sig.logLines, []byte("\n"))
 	for retry := retry.Start(retryOpts); retry.Next(); {
 		objectWriter := gcsClient.Bucket(ddArchiveBucketName).Object(filename).NewWriter(ctx)
 		w := gzip.NewWriter(objectWriter)
@@ -1137,6 +1150,10 @@ func makeDDTag(key, value string) string {
 // There is also some error handling logic in this function. This is a variable so that
 // we can mock this function in the tests.
 var doUploadReq = func(req *http.Request) ([]byte, error) {
+	if debugZipUploadOpts.dryRun {
+		return []byte("{}"), nil
+	}
+
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return nil, err
 
@@ -442,6 +442,29 @@ type entryDecoderV1 struct {
 	truncatedLastEntry bool
 }
 
+// entryDecoderV1ZipUpload is a specialized variant of entryDecoderV1 that uses
+// bufio.Reader instead of bufio.Scanner. This implementation is designed for use
+// in CLI tools (like debug zip upload) where:
+//  1. The 64KB token size limitation of bufio.Scanner needs to be removed
+//  2. Memory constraints are less critical for short-lived CLI processes
+//  3. Complete data capture is essential for debug tools - no entries should be truncated
+type entryDecoderV1ZipUpload struct {
+	reader          *bufio.Reader
+	sensitiveEditor redactEditor
+}
+
+// Decode decodes the next log entry into the provided protobuf message.
+func (d *entryDecoderV1ZipUpload) Decode(entry *logpb.Entry) error {
+	buf, err := d.reader.ReadBytes('\n')
+	if err == io.EOF && len(buf) == 0 {
+		return io.EOF
+	} else if err != nil && !errors.Is(err, bufio.ErrBufferFull) && errors.Is(err, io.EOF) {
+		return err
+	}
+
+	return parseEntryV1(buf, entry, d.sensitiveEditor)
+}
+
 func decodeTimestamp(fragment []byte) (unixNano int64, err error) {
 	timeFormat := MessageTimeFormat
 	if len(fragment) > 7 && (fragment[len(fragment)-7] == '+' || fragment[len(fragment)-7] == '-') {
@@ -464,112 +487,127 @@ func (d *entryDecoderV1) Decode(entry *logpb.Entry) error {
 			}
 			return io.EOF
 		}
-		b := d.scanner.Bytes()
-		m := entryREV1.FindSubmatch(b)
-		if m == nil {
-			continue
-		}
-
-		// Erase all the fields, to be sure.
-		*entry = logpb.Entry{}
 
-		// Process the severity.
-		entry.Severity = Severity(strings.IndexByte(severityChar, m[1][0]) + 1)
+		if err := parseEntryV1(d.scanner.Bytes(), entry, d.sensitiveEditor); err != nil {
+			if errors.Is(err, io.EOF) || errors.Is(err, errNoLogEntry) {
+				continue
+			}
 
-		// Process the timestamp.
-		var err error
-		entry.Time, err = decodeTimestamp(m[2])
-		if err != nil {
 			return err
 		}
+		return nil
+	}
+}
 
-		// Process the goroutine ID.
-		if len(m[3]) > 0 {
-			goroutine, err := strconv.Atoi(string(m[3]))
-			if err != nil {
-				return err
-			}
-			entry.Goroutine = int64(goroutine)
-		}
+var errNoLogEntry = errors.New("no log entry found in buffer")
 
-		// Process the channel/file/line details.
-		entry.File = string(m[4])
-		if idx := strings.IndexByte(entry.File, '@'); idx != -1 {
-			ch, err := strconv.Atoi(entry.File[:idx])
-			if err != nil {
-				return err
-			}
-			entry.Channel = Channel(ch)
-			entry.File = entry.File[idx+1:]
-		}
+// parseEntryV1 parses a log entry from a byte slice into the provided protobuf message.
+// It contains the common parsing logic used by both decoder implementations.
+func parseEntryV1(buf []byte, entry *logpb.Entry, sensitiveEditor redactEditor) error {
+	m := entryREV1.FindSubmatch(buf)
+	if m == nil {
+		return errNoLogEntry
+	}
+
+	// Erase all the fields, to be sure.
+	*entry = logpb.Entry{}
+
+	// Process the severity.
+	entry.Severity = Severity(strings.IndexByte(severityChar, m[1][0]) + 1)
+
+	// Process the timestamp.
+	var err error
+	entry.Time, err = decodeTimestamp(m[2])
+	if err != nil {
+		return err
+	}
 
-		line, err := strconv.Atoi(string(m[5]))
+	// Process the goroutine ID.
+	if len(m[3]) > 0 {
+		goroutine, err := strconv.Atoi(string(m[3]))
 		if err != nil {
 			return err
 		}
-		entry.Line = int64(line)
-
-		// Process the context tags.
-		redactable := len(m[6]) != 0
-		// Look for a tenant ID tag. Default to system otherwise.
-		entry.TenantID = serverident.SystemTenantID
-		tagsToProcess := m[7]
-		entry.TenantID, entry.TenantName, tagsToProcess = maybeReadTenantDetails(tagsToProcess)
-
-		// Process any remaining tags.
-		if len(tagsToProcess) != 0 {
-			r := redactablePackage{
-				msg:        tagsToProcess,
-				redactable: redactable,
-			}
-			r = d.sensitiveEditor(r)
-			entry.Tags = string(r.msg)
-		}
+		entry.Goroutine = int64(goroutine)
+	}
 
-		// If there's an entry counter at the start of the message, process it.
-		msg := b[len(m[0]):]
-		i := 0
-		for ; i < len(msg) && msg[i] >= '0' && msg[i] <= '9'; i++ {
-			entry.Counter = entry.Counter*10 + uint64(msg[i]-'0')
-		}
-		if i > 0 && i < len(msg) && msg[i] == ' ' {
-			// Only accept the entry counter if followed by a space. In all
-			// other cases, the number was part of the message string.
-			msg = msg[i+1:]
-		} else {
-			// This was not truly an entry counter. Ignore the work done previously.
-			entry.Counter = 0
+	// Process the channel/file/line details.
+	entry.File = string(m[4])
+	if idx := strings.IndexByte(entry.File, '@'); idx != -1 {
+		ch, err := strconv.Atoi(entry.File[:idx])
+		if err != nil {
+			return err
 		}
+		entry.Channel = Channel(ch)
+		entry.File = entry.File[idx+1:]
+	}
+
+	line, err := strconv.Atoi(string(m[5]))
+	if err != nil {
+		return err
+	}
+	entry.Line = int64(line)
+
+	// Process the context tags.
+	redactable := len(m[6]) != 0
+	// Look for a tenant ID tag. Default to system otherwise.
+	entry.TenantID = serverident.SystemTenantID
+	tagsToProcess := m[7]
+	entry.TenantID, entry.TenantName, tagsToProcess = maybeReadTenantDetails(tagsToProcess)
 
-		// Process the remainder of the log message.
+	// Process any remaining tags.
+	if len(tagsToProcess) != 0 {
 		r := redactablePackage{
-			msg:        trimFinalNewLines(msg),
+			msg:        tagsToProcess,
 			redactable: redactable,
 		}
-		r = d.sensitiveEditor(r)
-		entry.Message = string(r.msg)
-		entry.Redactable = r.redactable
-
-		if strings.HasPrefix(entry.Message, structuredEntryPrefix+"{") /* crdb-v1 prefix */ {
-			// Note: we do not recognize the v2 marker here (" ={") because
-			// v2 entries can be split across multiple lines.
-			entry.StructuredStart = uint32(len(structuredEntryPrefix))
-
-			if nl := strings.IndexByte(entry.Message, '\n'); nl != -1 {
-				entry.StructuredEnd = uint32(nl)
-				entry.StackTraceStart = uint32(nl + 1)
-			} else {
-				entry.StructuredEnd = uint32(len(entry.Message))
-			}
-		}
-		// Note: we only know how to populate entry.StackTraceStart upon
-		// parse if the entry was structured (see above). If it is not
-		// structured, we cannot distinguish where the message ends and
-		// where the stack trace starts. This is another reason why the
-		// crdb-v1 format is lossy.
+		r = sensitiveEditor(r)
+		entry.Tags = string(r.msg)
+	}
 
-		return nil
+	// If there's an entry counter at the start of the message, process it.
+	msg := buf[len(m[0]):]
+	i := 0
+	for ; i < len(msg) && msg[i] >= '0' && msg[i] <= '9'; i++ {
+		entry.Counter = entry.Counter*10 + uint64(msg[i]-'0')
+	}
+	if i > 0 && i < len(msg) && msg[i] == ' ' {
+		// Only accept the entry counter if followed by a space. In all
+		// other cases, the number was part of the message string.
+		msg = msg[i+1:]
+	} else {
+		// This was not truly an entry counter. Ignore the work done previously.
+		entry.Counter = 0
 	}
+
+	// Process the remainder of the log message.
+	r := redactablePackage{
+		msg:        trimFinalNewLines(msg),
+		redactable: redactable,
+	}
+	r = sensitiveEditor(r)
+	entry.Message = string(r.msg)
+	entry.Redactable = r.redactable
+
+	if strings.HasPrefix(entry.Message, structuredEntryPrefix+"{") /* crdb-v1 prefix */ {
+		// Note: we do not recognize the v2 marker here (" ={") because
+		// v2 entries can be split across multiple lines.
+		entry.StructuredStart = uint32(len(structuredEntryPrefix))
+
+		if nl := strings.IndexByte(entry.Message, '\n'); nl != -1 {
+			entry.StructuredEnd = uint32(nl)
+			entry.StackTraceStart = uint32(nl + 1)
+		} else {
+			entry.StructuredEnd = uint32(len(entry.Message))
+		}
+	}
+	// Note: we only know how to populate entry.StackTraceStart upon
+	// parse if the entry was structured (see above). If it is not
+	// structured, we cannot distinguish where the message ends and
+	// where the stack trace starts. This is another reason why the
+	// crdb-v1 format is lossy.
+
+	return nil
 }
 
 // maybeReadTenantDetails reads the tenant ID and name. If neither the