Skip to content

Cache Workers get Stuck on Failed Manual Refresh Calls #1551

@jasontheiler

Description

@jasontheiler

Bug Description

Hi there,

I've found out that cache workers appear to get stuck when they process a manual jwk.Cache.Refresh call that fails because the server responds with some error (e.g. 500 Internal Server Error). If you repeat the jwk.Cache.Refresh call as many times as the cache has workers (by default 5, so 5 times), then the automatic refresh will cease to work and any future manual refresh calls will also block indefinitely.

$ go version
go version go1.25.6 X:nodwarf5 linux/amd64

To Reproduce

I've created a minimal reproduction that you can just run. It outputs JSON logs of the automatic refresh attempts and manual refresh calls until all the workers are stuck.

Source: https://github.com/jasontheiler/jwx-v3-manual-refresh-bug

go.mod
module jwx-v3-manual-refresh-bug

go 1.25.3

require (
	github.com/lestrrat-go/httprc/v3 v3.0.3
	github.com/lestrrat-go/jwx/v3 v3.0.13
)

require (
	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
	github.com/goccy/go-json v0.10.3 // indirect
	github.com/lestrrat-go/blackmagic v1.0.4 // indirect
	github.com/lestrrat-go/httpcc v1.0.1 // indirect
	github.com/lestrrat-go/option/v2 v2.0.0 // indirect
	github.com/segmentio/asm v1.2.1 // indirect
	golang.org/x/sys v0.39.0 // indirect
)
main.go
package main

import (
	"context"
	"log/slog"
	"net/http"
	"net/http/httptest"
	"os"
	"time"

	"github.com/lestrrat-go/httprc/v3"
	"github.com/lestrrat-go/jwx/v3/jwk"
)

const (
	// Any number of workers is okay. The more workers, the longer it will take
	// to get all of them stuck.
	numWorkers = 3
	// Delay between manual refresh attempts to allow the automatic refresh to
	// make attempts, too.
	delayBetweenManualRefreshes = 5 * time.Second
)

func main() {
	ctx := context.Background()
	logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))

	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
		logger.DebugContext(ctx, "received JWK set request")
		w.WriteHeader(http.StatusInternalServerError)
	}))
	defer server.Close()

	errorSink := &jwkCacheErrorSink{
		logger: logger,
	}

	// Create a new JWK cache with the specified number of workers.
	cache, err := jwk.NewCache(ctx, httprc.NewClient(httprc.WithErrorSink(errorSink), httprc.WithWorkers(numWorkers)))
	if err != nil {
		panic(err)
	}

	// Don't wait for the automatic cache refresh to warm the cache and do it
	// manually instead further below.
	err = cache.Register(ctx, server.URL, jwk.WithWaitReady(false), jwk.WithConstantInterval(time.Second))
	if err != nil {
		panic(err)
	}

	// Do as many manual cache refresh attempts as there are workers.
	for range numWorkers {
		time.Sleep(delayBetweenManualRefreshes)

		if _, err := cache.Refresh(ctx, server.URL); err != nil {
			logger.ErrorContext(ctx, "failed to refresh JWK cache manually")
		}
	}

	// After the manual cache refresh attempts all cache workers are stuck and
	// no further cache refreshes (automatic or manual) can be done.

	_, err = cache.Refresh(ctx, server.URL)
	// This will never run, since all cache workers are stuck.
	panic(err)
}

type jwkCacheErrorSink struct {
	logger *slog.Logger
}

func (es *jwkCacheErrorSink) Put(ctx context.Context, err error) {
	es.logger.WarnContext(ctx, "failed to refresh JWK cache automatically", "error", err)
}
Output
$ go run ./main.go
{ "time": "2026-02-05T14:42:31.479631012+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:31.479735048+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:32.480176686+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:32.480329082+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:33.480751385+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:33.480988109+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:34.481411555+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:34.481601282+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:35.482246455+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:35.482448084+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:36.480431841+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:36.480523613+01:00", "level": "ERROR", "msg": "failed to refresh JWK cache manually" }
{ "time": "2026-02-05T14:42:38.483941131+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:38.484107603+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:39.48531158+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:39.485406058+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:40.486493427+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:40.486561415+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:41.481145832+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:41.481278391+01:00", "level": "ERROR", "msg": "failed to refresh JWK cache manually" }
{ "time": "2026-02-05T14:42:43.48785339+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:43.488120581+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:44.489282154+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:44.489475687+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:45.489842879+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:45.489985898+01:00", "level": "WARN", "msg": "failed to refresh JWK cache automatically", "error": "httprc.Resource.Sync: unexpected status code (status code=500, url="http://127.0.0.1:41699")" }
{ "time": "2026-02-05T14:42:46.482261434+01:00", "level": "DEBUG", "msg": "received JWK set request" }
{ "time": "2026-02-05T14:42:46.482406767+01:00", "level": "ERROR", "msg": "failed to refresh JWK cache manually" }

# Should continue to show DEBUG and WARN logs, but it's completely stuck.

Expected Behavior

I'd expect the cache workers to continue the automatic refresh and any future manual refresh calls, regardless of how many workers there are and how many failed attempts to refresh the cache there were.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions