Hardcoded context timeouts in ListMessages cause failures

**Description:**

When Apache Kafka read/write throughput is slow, consuming messages based on a specific timestamp can exceed the default timeout (35s), leading to failures.

<img width="1533" height="670" alt="Image" src="https://github.com/user-attachments/assets/0807352e-cd31-4250-9df0-b8c9a40277f7" />

**Expected Behavior**

Make the timeout configurable instead of hardcoded.

https://github.com/redpanda-data/console/blob/6ee88a14c358a0810244c4a924cbd9b3dc110263/backend/pkg/api/connect/service/console/service.go#L51-L125

**Environment**

console version: v3.5.2 / 3.6.0 / 3.7.2


	func (api *Service) ListMessages(
	ctx context.Context,
	req *connect.Request[v1alpha.ListMessagesRequest],
	stream *connect.ServerStream[v1alpha.ListMessagesResponse],
	) error {
	lmq := httptypes.ListMessagesRequest{
	TopicName: req.Msg.GetTopic(),
	StartOffset: req.Msg.GetStartOffset(),
	StartTimestamp: req.Msg.GetStartTimestamp(),
	PartitionID: req.Msg.GetPartitionId(),
	MaxResults: int(req.Msg.GetMaxResults()),
	FilterInterpreterCode: req.Msg.GetFilterInterpreterCode(),
	Enterprise: req.Msg.GetEnterprise(),
	PageToken: req.Msg.GetPageToken(),
	}

	interpreterCode, err := lmq.DecodeInterpreterCode()
	if err != nil {
	return apierrors.NewConnectError(
	connect.CodeInvalidArgument,
	fmt.Errorf("failed decoding provided interpreter code: %w", err),
	apierrors.NewErrorInfo(commonv1alpha1.Reason_REASON_INVALID_INPUT.String()),
	)
	}

	// test compile
	code := fmt.Sprintf(`var isMessageOk = function() {%s}`, interpreterCode)
	_, err = goja.Compile("", code, true)
	if err != nil {
	return apierrors.NewConnectError(
	connect.CodeInvalidArgument,
	fmt.Errorf("failed to compile provided interpreter code: %w", err),
	apierrors.NewErrorInfo(commonv1alpha1.Reason_REASON_INVALID_INPUT.String()),
	)
	}

	// Request messages from kafka and return them once we got all the messages or the context is done
	listReq := console.ListMessageRequest{
	TopicName: lmq.TopicName,
	PartitionID: lmq.PartitionID,
	StartOffset: lmq.StartOffset,
	StartTimestamp: lmq.StartTimestamp,
	MessageCount: lmq.MaxResults,
	FilterInterpreterCode: interpreterCode,
	Troubleshoot: req.Msg.GetTroubleshoot(),
	IncludeRawPayload: req.Msg.GetIncludeOriginalRawPayload(),
	IgnoreMaxSizeLimit: req.Msg.GetIgnoreMaxSizeLimit(),
	KeyDeserializer: fromProtoEncoding(req.Msg.GetKeyDeserializer()),
	ValueDeserializer: fromProtoEncoding(req.Msg.GetValueDeserializer()),
	PageToken: lmq.PageToken,
	PageSize: int(req.Msg.GetPageSize()),
	}

	timeout := 35 * time.Second
	if req.Msg.GetFilterInterpreterCode() != "" \|\| req.Msg.GetStartOffset() == console.StartOffsetNewest {
	// Push-down filters and StartOffset = Newest may be long-running streams.
	// There's already a client-side provided timeout which we usually trust.
	// But additionally we want to ensure it never takes much longer than that.
	timeout = 31 * time.Minute
	}

	ctx, cancel := context.WithTimeoutCause(ctx, timeout, errors.New("list fetch timeout"))
	defer cancel()

	progress := &streamProgressReporter{
	logger: api.logger,
	request: &listReq,
	stream: stream,
	messagesConsumed: atomic.Int64{},
	bytesConsumed: atomic.Int64{},
	}
	progress.Start(ctx)

	return api.consoleSvc.ListMessages(ctx, listReq, progress)
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Hardcoded context timeouts in ListMessages cause failures #2432

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Hardcoded context timeouts in ListMessages cause failures #2432

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions