diff --git a/.github/workflows/aiwatchdog.yml b/.github/workflows/aiwatchdog.yml index b2580d0a72f..c7956e63b48 100644 --- a/.github/workflows/aiwatchdog.yml +++ b/.github/workflows/aiwatchdog.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 - name: AI Watchdog - uses: bep/ai-watchdog@995475dd81767f8035dc2c943e9230918d06caca # temporary fork (probably) + uses: bep/ai-watchdog@e85b3674edb3f174950e7622b87350ad54b2013e # temporary fork (probably) with: openai-api-key: ${{ secrets.OPENAI_API_KEY }} pr-label: AI Suspect diff --git a/cache/filecache/filecache.go b/cache/filecache/filecache.go index c64f23914ae..1d5ba765082 100644 --- a/cache/filecache/filecache.go +++ b/cache/filecache/filecache.go @@ -1,4 +1,4 @@ -// Copyright 2024 The Hugo Authors. All rights reserved. +// Copyright 2026 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -46,17 +46,9 @@ const ( type Cache struct { Fs afero.Fs - // Max age for items in this cache. Negative duration means forever, - // 0 is effectively turning this cache off. - maxAge time.Duration + cfg FileCacheConfig - // When set, we just remove this entire root directory on expiration. - pruneAllRootDir string - - nlocker *lockTracker - - isResourceDir bool - baseDir string + entryLocker *lockTracker initOnce sync.Once isInited bool @@ -93,12 +85,15 @@ type ItemInfo struct { } // NewCache creates a new file cache with the given filesystem and max age. -func NewCache(fs afero.Fs, maxAge time.Duration, pruneAllRootDir string) *Cache { +func NewCache(fs afero.Fs, cfg FileCacheConfig) *Cache { + if err := cfg.init(); err != nil { + panic(fmt.Sprintf("invalid cache config: %s", err)) + } + return &Cache{ - Fs: fs, - nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})}, - maxAge: maxAge, - pruneAllRootDir: pruneAllRootDir, + Fs: fs, + entryLocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})}, + cfg: cfg, } } @@ -117,6 +112,7 @@ func (c *Cache) init() error { if c == nil { panic("cache is nil") } + c.initOnce.Do(func() { c.isInited = true // Create the base dir if it does not exist. @@ -136,19 +132,19 @@ func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) { } id = cleanID(id) - c.nlocker.Lock(id) + c.entryLocker.Lock(id) info := ItemInfo{Name: id} f, err := helpers.OpenFileForWriting(c.Fs, id) if err != nil { - c.nlocker.Unlock(id) + c.entryLocker.Unlock(id) return info, nil, err } return info, &lockedFile{ File: f, - unlock: func() { c.nlocker.Unlock(id) }, + unlock: func() { c.entryLocker.Unlock(id) }, }, nil } @@ -166,8 +162,8 @@ func (c *Cache) ReadOrCreate(id string, id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) info = ItemInfo{Name: id} @@ -196,9 +192,9 @@ func (c *Cache) ReadOrCreate(id string, // NamedLock locks the given id. The lock is released when the returned function is called. func (c *Cache) NamedLock(id string) func() { id = cleanID(id) - c.nlocker.Lock(id) + c.entryLocker.Lock(id) return func() { - c.nlocker.Unlock(id) + c.entryLocker.Unlock(id) } } @@ -211,8 +207,8 @@ func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (It } id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) info := ItemInfo{Name: id} @@ -230,7 +226,7 @@ func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (It return info, nil, err } - if c.maxAge == 0 { + if c.cfg.MaxAge == 0 { // No caching. return info, hugio.ToReadCloser(r), nil } @@ -241,6 +237,40 @@ func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (It c.writeReader(id, io.TeeReader(r, &buff)) } +// AbsFilenameFromID returns the filename for the given id in the cache. +// This will be an absolute path. +func (c *Cache) AbsFilenameFromID(id string) string { + return filepath.Join(c.cfg.DirCompiled, cleanID(id)) +} + +// GetOrCreateInfo tries to get the item info with the given id from cache. If not found or expired, create will +// be invoked with the id. The create function is expected to create the cache item with the given id. The returned ItemInfo will have the id as Name. +// This method is protected by a named lock using the given id as identifier. +func (c *Cache) GetOrCreateInfo(id string, create func(id string) error) (ItemInfo, error) { + if err := c.init(); err != nil { + return ItemInfo{}, err + } + + id = cleanID(id) + + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) + + info := ItemInfo{Name: id} + + if !c.removeIfNeeded(id) { + // The file exists and is not expired, so we consider it a cache hit. + return info, nil + } + + if err := create(id); err != nil { + c.remove(id) + return info, err + } + + return info, nil +} + func (c *Cache) writeReader(id string, r io.Reader) error { dir := filepath.Dir(id) if dir != "" { @@ -264,8 +294,8 @@ func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (Item } id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) info := ItemInfo{Name: id} @@ -285,7 +315,7 @@ func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (Item return info, nil, err } - if c.maxAge == 0 { + if c.cfg.MaxAge == 0 { return info, b, nil } @@ -303,10 +333,10 @@ func (c *Cache) SetBytes(id string, data []byte) error { } id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) - if c.maxAge == 0 { + if c.cfg.MaxAge == 0 { // No caching. return nil } @@ -327,8 +357,8 @@ func (c *Cache) GetItemBytes(id string) (ItemInfo, []byte, error) { } id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) info := ItemInfo{Name: id} @@ -348,8 +378,8 @@ func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) { } id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) info := ItemInfo{Name: id} @@ -358,10 +388,23 @@ func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) { return info, r, nil } +// removeIfNeeded checks if the file with the given id should be re-created. +func (c *Cache) removeIfNeeded(id string) bool { + if c.cfg.MaxAge == 0 { + // No caching, remove. + c.remove(id) + return true + } + if removed, err := c.removeIfExpired(id); err != nil || removed { + return true + } + return false +} + // getOrRemove gets the file with the given id. If it's expired, it will // be removed. func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { - if c.maxAge == 0 { + if c.cfg.MaxAge == 0 { // No caching. return nil } @@ -379,7 +422,7 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser { } func (c *Cache) getBytesAndRemoveIfExpired(id string) ([]byte, bool) { - if c.maxAge == 0 { + if c.cfg.MaxAge == 0 { // No caching. return nil, false } @@ -404,7 +447,7 @@ func (c *Cache) getBytesAndRemoveIfExpired(id string) ([]byte, bool) { } func (c *Cache) removeIfExpired(id string) (bool, error) { - if c.maxAge <= 0 { + if c.cfg.MaxAge <= 0 { return false, nil } @@ -414,29 +457,37 @@ func (c *Cache) removeIfExpired(id string) (bool, error) { } if c.isExpired(fi.ModTime()) { - c.Fs.Remove(id) + c.remove(id) return true, nil } return false, nil } +func (c *Cache) remove(id string) { + if c.cfg.entryIsDir { + c.Fs.RemoveAll(id) + } else { + c.Fs.Remove(id) + } +} + func (c *Cache) isExpired(modTime time.Time) bool { - if c.maxAge < 0 { + if c.cfg.MaxAge < 0 { return false } // Note the use of time.Since here. // We cannot use Hugo's global Clock for this. - return c.maxAge == 0 || time.Since(modTime) > c.maxAge + return c.cfg.MaxAge == 0 || time.Since(modTime) > c.cfg.MaxAge } // For testing func (c *Cache) GetString(id string) string { id = cleanID(id) - c.nlocker.Lock(id) - defer c.nlocker.Unlock(id) + c.entryLocker.Lock(id) + defer c.entryLocker.Unlock(id) f, err := c.Fs.Open(id) if err != nil { @@ -453,11 +504,11 @@ type Caches map[string]*Cache func (f Caches) SetResourceFs(fs afero.Fs) { for _, c := range f { - if c.isResourceDir { + if c.cfg.IsResourceDir { if c.isInited { panic("cannot set resource fs after init") } - c.Fs = hugofs.NewBasePathFs(fs, c.baseDir) + c.Fs = hugofs.NewBasePathFs(fs, c.cfg.DirCompiled) } } } @@ -470,7 +521,6 @@ func (f Caches) Get(name string) *Cache { // NewCaches creates a new set of file caches from the given // configuration. func NewCaches(dcfg Configs, sourceFs afero.Fs) (Caches, error) { - // dcfg := p.Cfg.GetConfigSection("caches").(Configs) fs := sourceFs m := make(Caches) @@ -479,21 +529,10 @@ func NewCaches(dcfg Configs, sourceFs afero.Fs) (Caches, error) { if v.IsResourceDir { cfs = nil // Set later. TODO(bep) this needs to be cleanded up. } else { - cfs = fs - } - - baseDir := v.DirCompiled - - bfs := hugofs.NewBasePathFs(cfs, baseDir) - - var pruneAllRootDir string - if k == CacheKeyModules { - pruneAllRootDir = "pkg" + cfs = hugofs.NewBasePathFs(fs, v.DirCompiled) } - c := NewCache(bfs, v.MaxAge, pruneAllRootDir) - c.isResourceDir = v.IsResourceDir - c.baseDir = baseDir + c := NewCache(cfs, v) m[k] = c } @@ -524,7 +563,7 @@ func (h *httpCache) Get(id string) (resp []byte, ok bool) { } func (h *httpCache) Set(id string, resp []byte) { - if h.c.maxAge == 0 { + if h.c.cfg.MaxAge == 0 { return } diff --git a/cache/filecache/filecache_config.go b/cache/filecache/filecache_config.go index 1cfc8b1c2db..feaa07d494a 100644 --- a/cache/filecache/filecache_config.go +++ b/cache/filecache/filecache_config.go @@ -35,16 +35,12 @@ const ( cacheDirProject = ":cacheDir/:project" ) -var defaultCacheConfig = FileCacheConfig{ - MaxAge: -1, // Never expire - Dir: cacheDirProject, -} - const ( CacheKeyImages = "images" CacheKeyAssets = "assets" CacheKeyModules = "modules" CacheKeyModuleQueries = "modulequeries" + CacheKeyModuleGitInfo = "modulegitinfo" CacheKeyGetResource = "getresource" CacheKeyMisc = "misc" ) @@ -67,11 +63,22 @@ var defaultCacheConfigs = Configs{ CacheKeyModules: { MaxAge: -1, Dir: ":cacheDir/modules", + fileCacheConfigInternal: fileCacheConfigInternal{ + entryIsDir: true, + isReadOnly: true, // we need to make it writable when pruning. + }, }, CacheKeyModuleQueries: { MaxAge: 24 * time.Hour, Dir: ":cacheDir/modules", }, + CacheKeyModuleGitInfo: { + MaxAge: 24 * time.Hour, + Dir: ":cacheDir/modules", + fileCacheConfigInternal: fileCacheConfigInternal{ + entryIsDir: true, + }, + }, CacheKeyImages: { MaxAge: -1, Dir: resourcesGenDir, @@ -90,6 +97,13 @@ var defaultCacheConfigs = Configs{ }, } +func init() { + for k, v := range defaultCacheConfigs { + v.name = k + defaultCacheConfigs[k] = v + } +} + type FileCacheConfig struct { // Max age of cache entries in this cache. Any items older than this will // be removed and not returned from the cache. @@ -101,12 +115,33 @@ type FileCacheConfig struct { MaxAge time.Duration // The directory where files are stored. - Dir string - DirCompiled string `json:"-"` + Dir string + + fileCacheConfigInternal `json:"-"` +} + +func (cfg *FileCacheConfig) init() error { + if cfg.DirCompiled == "" { + // From unit tests. Just check that it does not contain any placeholders. + if strings.Contains(cfg.Dir, ":") { + return fmt.Errorf("cache dir %q contains unresolved placeholders", cfg.Dir) + } + cfg.DirCompiled = cfg.Dir + } + // Sanity check the config. + if len(cfg.DirCompiled) < 5 { + panic(fmt.Sprintf("invalid cache dir: %q", cfg.DirCompiled)) + } + return nil +} + +type fileCacheConfigInternal struct { + DirCompiled string - // Will resources/_gen will get its own composite filesystem that - // also checks any theme. - IsResourceDir bool `json:"-"` + name string // The name of this cache, e.g. "images", "modules" etc. + entryIsDir bool // when set, the cache entries represents directories directly below the base dir. + isReadOnly bool // when set, the cache is read only and needs to be pruned differently. This is used for the Go modules cache. + IsResourceDir bool // resources/_gen will get its own composite filesystem that also checks any theme. TODO(bep) unexport this. } // MarshalJSON marshals FileCacheConfig to JSON with MaxAge as a human-readable string. @@ -146,6 +181,15 @@ func (f Caches) ModuleQueriesCache() *Cache { return c } +// ModuleGitInfoCache gets the file cache for Hugo Module git info. +func (f Caches) ModuleGitInfoCache() *Cache { + c, ok := f[CacheKeyModuleGitInfo] + if !ok { + panic("module git info cache not set") + } + return c +} + // AssetsCache gets the file cache for assets (processed resources, SCSS etc.). func (f Caches) AssetsCache() *Cache { return f[CacheKeyAssets] @@ -176,7 +220,11 @@ func DecodeConfig(fs afero.Fs, bcfg config.BaseConfig, m map[string]any) (Config if _, ok := v.(hmaps.Params); !ok { continue } - cc := defaultCacheConfig + var ok bool + cc, ok := c[k] + if !ok { + return nil, fmt.Errorf("%q is not a valid cache name", k) + } dc := &mapstructure.DecoderConfig{ Result: &cc, @@ -197,12 +245,8 @@ func DecodeConfig(fs afero.Fs, bcfg config.BaseConfig, m map[string]any) (Config return c, errors.New("must provide cache Dir") } - name := strings.ToLower(k) - if !valid[name] { - return nil, fmt.Errorf("%q is not a valid cache name", name) - } + c[k] = cc - c[name] = cc } for k, v := range c { diff --git a/cache/filecache/filecache_config_test.go b/cache/filecache/filecache_config_test.go index e2ae63bda02..2215f13cdbb 100644 --- a/cache/filecache/filecache_config_test.go +++ b/cache/filecache/filecache_config_test.go @@ -57,7 +57,7 @@ dir = "/path/to/c4" c.Assert(err, qt.IsNil) fs := afero.NewMemMapFs() decoded := testconfig.GetTestConfigs(fs, cfg).Base.Caches - c.Assert(len(decoded), qt.Equals, 6) + c.Assert(len(decoded), qt.Equals, 7) c2 := decoded["misc"] c.Assert(c2.MaxAge.String(), qt.Equals, "10m0s") @@ -101,7 +101,7 @@ dir = "/path/to/c4" c.Assert(err, qt.IsNil) fs := afero.NewMemMapFs() decoded := testconfig.GetTestConfigs(fs, cfg).Base.Caches - c.Assert(len(decoded), qt.Equals, 6) + c.Assert(len(decoded), qt.Equals, 7) for _, v := range decoded { c.Assert(v.MaxAge, qt.Equals, time.Duration(0)) @@ -124,7 +124,7 @@ func TestDecodeConfigDefault(t *testing.T) { fs := afero.NewMemMapFs() decoded := testconfig.GetTestConfigs(fs, cfg).Base.Caches - c.Assert(len(decoded), qt.Equals, 6) + c.Assert(len(decoded), qt.Equals, 7) imgConfig := decoded[filecache.CacheKeyImages] miscConfig := decoded[filecache.CacheKeyMisc] @@ -153,6 +153,10 @@ func TestFileCacheConfigMarshalJSON(t *testing.T) { moduleQueriesConfig := decoded[filecache.CacheKeyModuleQueries] c.Assert(moduleQueriesConfig.MaxAge, qt.Equals, 24*time.Hour) + // Also verify the new moduleGitInfo cache. + moduleGitInfoConfig := decoded[filecache.CacheKeyModuleGitInfo] + c.Assert(moduleGitInfoConfig.MaxAge, qt.Equals, 24*time.Hour) + b, err := json.Marshal(moduleQueriesConfig) c.Assert(err, qt.IsNil) diff --git a/cache/filecache/filecache_pruner.go b/cache/filecache/filecache_pruner.go index 6f224cef4dd..890aee96598 100644 --- a/cache/filecache/filecache_pruner.go +++ b/cache/filecache/filecache_pruner.go @@ -50,8 +50,8 @@ func (c Caches) Prune() (int, error) { // Prune removes expired and unused items from this cache. // If force is set, everything will be removed not considering expiry time. func (c *Cache) Prune(force bool) (int, error) { - if c.pruneAllRootDir != "" { - return c.pruneRootDir(force) + if c.cfg.entryIsDir { + return c.pruneRootDirs(force) } if err := c.init(); err != nil { return 0, err @@ -93,9 +93,9 @@ func (c *Cache) Prune(force bool) (int, error) { shouldRemove := force || c.isExpired(info.ModTime()) - if !shouldRemove && len(c.nlocker.seen) > 0 { + if !shouldRemove && len(c.entryLocker.seen) > 0 { // Remove it if it's not been touched/used in the last build. - _, seen := c.nlocker.seen[name] + _, seen := c.entryLocker.seen[name] shouldRemove = !seen } @@ -117,11 +117,43 @@ func (c *Cache) Prune(force bool) (int, error) { return counter, err } -func (c *Cache) pruneRootDir(force bool) (int, error) { +func (c *Cache) pruneRootDirs(force bool) (int, error) { + dirs, err := afero.ReadDir(c.Fs, "") + if err != nil { + if herrors.IsNotExist(err) { + return 0, nil + } + return 0, err + } + + counter := 0 + + for _, dir := range dirs { + if !dir.IsDir() { + continue + } + + count, err := c.pruneRootDir(dir.Name(), force) + if err != nil { + return counter, err + } + counter += count + } + + return counter, nil +} + +func (c *Cache) pruneRootDir(dirname string, force bool) (int, error) { if err := c.init(); err != nil { return 0, err } - info, err := c.Fs.Stat(c.pruneAllRootDir) + + // Sanity check. + if dirname != "pkg" && len(dirname) < 5 { + panic(fmt.Sprintf("invalid cache dir name: %q", dirname)) + } + + info, err := c.Fs.Stat(dirname) if err != nil { if herrors.IsNotExist(err) { return 0, nil @@ -133,5 +165,9 @@ func (c *Cache) pruneRootDir(force bool) (int, error) { return 0, nil } - return hugofs.MakeReadableAndRemoveAllModulePkgDir(c.Fs, c.pruneAllRootDir) + if c.cfg.isReadOnly { + return hugofs.MakeReadableAndRemoveAllModulePkgDir(c.Fs, dirname) + } + + return 1, c.Fs.RemoveAll(dirname) } diff --git a/cache/filecache/filecache_test.go b/cache/filecache/filecache_test.go index 35f0eee03d0..5beb4054b7d 100644 --- a/cache/filecache/filecache_test.go +++ b/cache/filecache/filecache_test.go @@ -252,7 +252,12 @@ func TestFileCacheReadOrCreateErrorInRead(t *testing.T) { } } - cache := filecache.NewCache(afero.NewMemMapFs(), 100*time.Hour, "") + cfg := filecache.FileCacheConfig{ + MaxAge: 100 * time.Hour, + Dir: "cache/c", + } + + cache := filecache.NewCache(afero.NewMemMapFs(), cfg) const id = "a32" diff --git a/commands/mod.go b/commands/mod.go index 971257a1f8a..0c49565c061 100644 --- a/commands/mod.go +++ b/commands/mod.go @@ -166,7 +166,7 @@ Note that for vendored modules, that is the version listed and not the one from if all { modCache := h.ResourceSpec.FileCaches.ModulesCache() count, err := modCache.Prune(true) - r.Printf("Deleted %d files from module cache.", count) + r.Printf("Deleted %d directories from module cache.", count) return err } diff --git a/docs/content/en/functions/resources/GetRemote.md b/docs/content/en/functions/resources/GetRemote.md index 29768eedff9..2a3270e88e6 100644 --- a/docs/content/en/functions/resources/GetRemote.md +++ b/docs/content/en/functions/resources/GetRemote.md @@ -46,11 +46,12 @@ key method : (`string`) The action to perform on the requested resource, typically one of `GET`, `POST`, or `HEAD`. +timeout +: (`string`) Cancels the request if it does not complete within this duration (e.g. "30s"). + responseHeaders : {{< new-in 0.143.0 />}} -: (`[]string`) The headers to extract from the server's response, accessible through the resource's [`Data.Headers`] method. Header name matching is case-insensitive. - -[`Data.Headers`]: /methods/resource/data/#headers +: (`[]string`) The headers to extract from the server's response, accessible through the resource's [`Data.Headers`] method. Header name matching is case-insensitive.[`Data.Headers`]: /methods/resource/data/#headers ## Options examples @@ -110,6 +111,20 @@ To extract specific headers from the server's response: {{ $resource := resources.GetRemote $url $opts }} ``` +To set a per-request timeout (e.g. when fetching many feeds where a few slow ones should not stall the build): + +```go-html-template +{{ $url := "https://example.org/feed.rss" }} +{{ $opts := dict "timeout" "10s" }} +{{ with try (resources.GetRemote $url $opts) }} + {{ with .Err }} + {{ warnf "Failed to fetch feed: %s" . }} + {{ else with .Value }} + {{ $data = . | transform.Unmarshal }} + {{ end }} +{{ end }} +``` + ## Remote data When retrieving remote data, use the [`transform.Unmarshal`] function to [unmarshal](g) the response. @@ -224,4 +239,5 @@ Note that the entry above is: [`try`]: /functions/go-template/try [configure file caches]: /configuration/caches/ + [error handling]: #error-handling diff --git a/hugofs/fileinfo.go b/hugofs/fileinfo.go index f5cb865e198..d0d008317d9 100644 --- a/hugofs/fileinfo.go +++ b/hugofs/fileinfo.go @@ -44,6 +44,25 @@ func NewFileMeta() *FileMeta { return &FileMeta{} } +// A subset of modules.Module used to avoid circular dependencies. +// Note that it's safe to type assert to the full modules.Module when needed. +type Module interface { + // Returns the path to this module. + // This will either be the module path, e.g. "github.com/gohugoio/myshortcodes", + // or the path below your /theme folder, e.g. "mytheme". + Path() string + + // Directory holding files for this module. + Dir() string + + // Returns whether this is a Go Module. + IsGoMod() bool + + // The module version. + Version() string +} + +// FileMeta holds metadata about a file or directory. type FileMeta struct { PathInfo *paths.Path Name string @@ -51,7 +70,7 @@ type FileMeta struct { BaseDir string SourceRoot string - Module string + Module Module ModuleOrdinal int Component string @@ -130,6 +149,13 @@ func (f *FileMeta) JoinStat(name string) (FileMetaInfo, error) { return f.JoinStatFunc(name) } +func (m *FileMeta) ModulePath() string { + if m.Module == nil { + return "" + } + return m.Module.Path() +} + type FileMetaInfo interface { fs.DirEntry MetaProvider diff --git a/hugofs/rootmapping_fs.go b/hugofs/rootmapping_fs.go index 33791127a80..99c0b4cdc26 100644 --- a/hugofs/rootmapping_fs.go +++ b/hugofs/rootmapping_fs.go @@ -185,7 +185,7 @@ type RootMapping struct { FromBase string // The base directory of the virtual mount. To string // The source directory or file. ToBase string // The base of To. May be empty if an absolute path was provided. - Module string // The module path/ID. + Module Module // The module. ModuleOrdinal int // The module ordinal starting with 0 which is the project. IsProject bool // Whether this is a mount in the main project. Meta *FileMeta // File metadata (lang etc.) diff --git a/hugolib/filesystems/basefs.go b/hugolib/filesystems/basefs.go index 9fa561906d3..30c1b9e4dc1 100644 --- a/hugolib/filesystems/basefs.go +++ b/hugolib/filesystems/basefs.go @@ -777,7 +777,7 @@ func (b *sourceFilesystemsBuilder) createOverlayFs( From: mount.Target, To: filename, ToBase: base, - Module: md.Module.Path(), + Module: md.Module, ModuleOrdinal: md.ordinal, IsProject: md.isMainProject, Meta: &hugofs.FileMeta{ diff --git a/hugolib/gitinfo.go b/hugolib/gitinfo.go index 4786a95094d..07832fbdcf0 100644 --- a/hugolib/gitinfo.go +++ b/hugolib/gitinfo.go @@ -14,13 +14,21 @@ package hugolib import ( + "bytes" + "fmt" "io" + "os" "path/filepath" "strings" + "sync" "github.com/bep/gitmap" + "github.com/gohugoio/hugo/cache/filecache" + "github.com/gohugoio/hugo/common/hashing" "github.com/gohugoio/hugo/common/hexec" + "github.com/gohugoio/hugo/common/loggers" "github.com/gohugoio/hugo/deps" + "github.com/gohugoio/hugo/modules" "github.com/gohugoio/hugo/resources/page" "github.com/gohugoio/hugo/source" ) @@ -28,9 +36,35 @@ import ( type gitInfo struct { contentDir string repo *gitmap.GitRepo + + // Per-module git repos keyed by module dir. + moduleRepoLoaders map[string]func() (*gitmap.GitRepo, error) + + logger loggers.Logger } func (g *gitInfo) forPage(p page.Page) *source.GitInfo { + if p.File() == nil { + return nil + } + + mod := p.File().FileInfo().Meta().Module + if mod != nil && mod.IsGoMod() { + mm := mod.(modules.Module) + if loadRepo, ok := g.moduleRepoLoaders[mm.Dir()]; ok { + repo, err := loadRepo() + if err != nil { + g.logger.Warnf("Failed to load git repo for module %s: %v", mm.Path(), err) + return nil + } + return g.gitInfoFromModuleRepo(p, mm, repo) + } + } + + // Fall back to local git info. + if g.repo == nil { + return nil + } name := strings.TrimPrefix(filepath.ToSlash(p.File().Filename()), g.contentDir) name = strings.TrimPrefix(name, "/") gi, found := g.repo.Files[name] @@ -40,9 +74,145 @@ func (g *gitInfo) forPage(p page.Page) *source.GitInfo { return gi } -func newGitInfo(d *deps.Deps) (*gitInfo, error) { +func (g *gitInfo) gitInfoFromModuleRepo(p page.Page, mod modules.Module, repo *gitmap.GitRepo) *source.GitInfo { + filename := filepath.ToSlash(p.File().Filename()) + modDir := filepath.ToSlash(mod.Dir()) + filePath := strings.TrimPrefix(filename, modDir) + filePath = strings.TrimPrefix(filePath, "/") + + gi, found := repo.Files[filePath] + if !found { + return nil + } + return gi +} + +// isGitModule returns true if the module has a git VCS origin. +func isGitModule(m modules.Module) bool { + origin := m.Origin() + return origin.VCS == "git" && origin.URL != "" +} + +type gitInfoConfig struct { + Deps *deps.Deps + Modules modules.Modules + GitInfoCache *filecache.Cache + Logger loggers.Logger +} + +func newGitInfo(cfg gitInfoConfig) (*gitInfo, error) { + g := &gitInfo{ + moduleRepoLoaders: make(map[string]func() (*gitmap.GitRepo, error)), + logger: cfg.Logger, + } + + var hasGitModules bool + var projectIsGitModule bool + + for _, mod := range cfg.Modules { + if !isGitModule(mod) { + continue + } + hasGitModules = true + if mod.Owner() == nil { + projectIsGitModule = true + } + } + + if hasGitModules { + if err := g.loadModuleRepos(cfg); err != nil { + return nil, err + } + } + + if projectIsGitModule { + return g, nil + } + + // Load local git repo info. + gitRepo, err := mapLocalRepo(cfg) + if err != nil { + if hasGitModules { + cfg.Logger.Warnf("Failed to read local Git log: %v", err) + return g, nil + } + return nil, err + } + + g.contentDir = gitRepo.TopLevelAbsPath + g.repo = gitRepo + + return g, nil +} + +func (g *gitInfo) loadModuleRepos(cfg gitInfoConfig) error { + for _, mod := range cfg.Modules { + if !isGitModule(mod) { + continue + } + + loadGitInfo := sync.OnceValues(func() (*gitmap.GitRepo, error) { + origin := mod.Origin() + + cloneDir, err := ensureClone(cfg, origin) + if err != nil { + return nil, fmt.Errorf("failed to clone %s: %v", origin.URL, err) + } + + repo, err := mapModuleRepo(cfg, cloneDir, origin.Hash) + if err != nil { + return nil, fmt.Errorf("failed to map git repo for module %s: %v", mod.Path(), err) + } + return repo, nil + }) + + g.moduleRepoLoaders[mod.Dir()] = loadGitInfo + } + + return nil +} + +// ensureClone ensures a blobless clone of the module's origin repo exists in the cache. +func ensureClone(cfg gitInfoConfig, origin modules.ModuleOrigin) (string, error) { + key := "repo_" + hashing.XxHashFromStringHexEncoded(origin.URL) + info, err := cfg.GitInfoCache.GetOrCreateInfo(key, func(id string) error { + cloneDir := cfg.GitInfoCache.AbsFilenameFromID(id) + if err := os.MkdirAll(cloneDir, 0o777); err != nil { + return err + } + + var stderr bytes.Buffer + args := []any{ + "clone", + "--filter=blob:none", + "--no-checkout", + origin.URL, + cloneDir, + hexec.WithStdout(io.Discard), + hexec.WithStderr(&stderr), + } + + cfg.Logger.Infof("Cloning gitinfo for repo %s into cache", origin.URL) + + cmd, err := cfg.Deps.ExecHelper.New("git", args...) + if err != nil { + return fmt.Errorf("git clone: %w", err) + } + if err := cmd.Run(); err != nil { + return fmt.Errorf("git clone %s: %w: %s", origin.URL, err, stderr.String()) + } + return nil + }) + if err != nil { + return "", err + } + return cfg.GitInfoCache.AbsFilenameFromID(info.Name), nil +} + +func mapModuleRepo(cfg gitInfoConfig, repoDir, revision string) (*gitmap.GitRepo, error) { opts := gitmap.Options{ - Repository: d.Conf.BaseConfig().WorkingDir, + Repository: repoDir, + Revision: revision, GetGitCommandFunc: func(stdout, stderr io.Writer, args ...string) (gitmap.Runner, error) { var argsv []any for _, arg := range args { @@ -53,14 +223,27 @@ func newGitInfo(d *deps.Deps) (*gitInfo, error) { hexec.WithStdout(stdout), hexec.WithStderr(stderr), ) - return d.ExecHelper.New("git", argsv...) + return cfg.Deps.ExecHelper.New("git", argsv...) }, } + return gitmap.Map(opts) +} - gitRepo, err := gitmap.Map(opts) - if err != nil { - return nil, err +func mapLocalRepo(cfg gitInfoConfig) (*gitmap.GitRepo, error) { + opts := gitmap.Options{ + Repository: cfg.Deps.Conf.BaseConfig().WorkingDir, + GetGitCommandFunc: func(stdout, stderr io.Writer, args ...string) (gitmap.Runner, error) { + var argsv []any + for _, arg := range args { + argsv = append(argsv, arg) + } + argsv = append( + argsv, + hexec.WithStdout(stdout), + hexec.WithStderr(stderr), + ) + return cfg.Deps.ExecHelper.New("git", argsv...) + }, } - - return &gitInfo{contentDir: gitRepo.TopLevelAbsPath, repo: gitRepo}, nil + return gitmap.Map(opts) } diff --git a/hugolib/gitinfo_github_test.go b/hugolib/gitinfo_github_test.go new file mode 100644 index 00000000000..49e2a0f840b --- /dev/null +++ b/hugolib/gitinfo_github_test.go @@ -0,0 +1,65 @@ +// Copyright 2025 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hugolib + +import ( + "testing" +) + +func TestGitInfoFromGitModule(t *testing.T) { + t.Parallel() + + files := ` +-- hugo.toml -- +baseURL = "https://example.com/" +enableGitInfo = true + +[module] +[[module.imports]] +path = "github.com/bep/hugo-testing-git-versions" +version = "v3.0.0" +[[module.imports.mounts]] +source = "content" +target = "content" +[[module.imports]] +path = "github.com/bep/hugo-testing-git-versions" +version = "v3.0.1" +[[module.imports.mounts]] +source = "content" +target = "content/301" +-- layouts/page.html -- +Title: {{ .Title }}| +GitInfo: {{ with .GitInfo }}Hash: {{ .Hash }}|Subject: {{ .Subject }}|AuthorName: {{ .AuthorName }}{{ end }}| +Content: {{ .Content }}| +-- layouts/_default/list.html -- +List: {{ .Title }} +` + + b := Test(t, files, TestOptOsFs()) + + b.AssertFileContent("public/docs/functions/mean/index.html", + "Hash: 9769b63d4a4abdd406e333be5fb8b5d48737d3a9|", + "AuthorName: Bjørn Erik Pedersen|", + ) + + b.AssertFileContent("public/docs/functions/standard-deviation/index.html", + "Hash: 2d92492a7f1ec4968529ee12cf62ed652eb45950|Subject: v3.0.0 edits|", + "AuthorName: Bjørn Erik Pedersen|", + ) + + b.AssertFileContent("public/301/docs/functions/standard-deviation/index.html", + "Hash: 3e0f3930f1ec9a29a7442da5f1bfc0b7e58f167a|Subject: v3.0.1 edit|", + "AuthorName: Bjørn Erik Pedersen|", + ) +} diff --git a/hugolib/hugo_sites.go b/hugolib/hugo_sites.go index c9badaeb4b8..8b2eb46360f 100644 --- a/hugolib/hugo_sites.go +++ b/hugolib/hugo_sites.go @@ -546,7 +546,13 @@ func (h *HugoSites) GetContentPage(filename string) page.Page { func (h *HugoSites) loadGitInfo() error { if h.Configs.Base.EnableGitInfo { - gi, err := newGitInfo(h.Deps) + cfg := gitInfoConfig{ + Deps: h.Deps, + Modules: h.Configs.Modules, + GitInfoCache: h.Configs.FileCaches.ModuleGitInfoCache(), + Logger: h.Log, + } + gi, err := newGitInfo(cfg) if err != nil { h.Log.Errorln("Failed to read Git log:", err) } else { diff --git a/modules/client.go b/modules/client.go index 968d1050014..2c5faf41486 100644 --- a/modules/client.go +++ b/modules/client.go @@ -964,6 +964,15 @@ type goModule struct { Dir string // directory holding files for this module, if any GoMod string // path to go.mod file for this module, if any Error *goModuleError // error loading module + Origin *goModuleOrigin +} + +type goModuleOrigin struct { + VCS string // version control system, e.g. "git" + URL string // repository URL, e.g. "https://github.com/bep/hugo-testing-git-versions" + Hash string // commit hash + TagSum string + Ref string // e.g. "refs/tags/v3.0.1" } type goModuleError struct { diff --git a/modules/module.go b/modules/module.go index 5eb78e98e57..89219cefb87 100644 --- a/modules/module.go +++ b/modules/module.go @@ -79,6 +79,17 @@ type Module interface { // Whether this module's dir is a watch candidate. Watch() bool + + // Origin returns the module's origin info if available (VCS, URL, Hash, etc.). + Origin() ModuleOrigin +} + +// ModuleOrigin contains origin info for a Go module. +type ModuleOrigin struct { + VCS string // version control system, e.g. "git" + URL string // repository URL, e.g. "https://github.com/bep/hugo-testing-git-versions" + Hash string // commit hash + Ref string // e.g. "refs/tags/v3.0.1" } type Modules []Module @@ -219,3 +230,15 @@ func (m *moduleAdapter) Watch() bool { // That leaves modules inside the read-only module cache. return !m.gomod.Indirect } + +func (m *moduleAdapter) Origin() ModuleOrigin { + if !m.IsGoMod() || m.gomod.Origin == nil { + return ModuleOrigin{} + } + return ModuleOrigin{ + VCS: m.gomod.Origin.VCS, + URL: m.gomod.Origin.URL, + Hash: m.gomod.Origin.Hash, + Ref: m.gomod.Origin.Ref, + } +} diff --git a/modules/npm/package_builder.go b/modules/npm/package_builder.go index c9b6c9831d4..1a5062ac82e 100644 --- a/modules/npm/package_builder.go +++ b/modules/npm/package_builder.go @@ -83,7 +83,8 @@ func Pack(sourceFs, assetsWithDuplicatesPreservedFs afero.Fs) error { if err != nil { return fmt.Errorf("npm pack: failed to open package file: %w", err) } - b = newPackageBuilder(meta.Module, f) + + b = newPackageBuilder(meta.ModulePath(), f) f.Close() d, err := assetsWithDuplicatesPreservedFs.Open(files.FolderJSConfig) @@ -115,7 +116,7 @@ func Pack(sourceFs, assetsWithDuplicatesPreservedFs afero.Fs) error { if err != nil { return fmt.Errorf("npm pack: failed to open package file: %w", err) } - b.Add(meta.Module, f) + b.Add(meta.ModulePath(), f) f.Close() } diff --git a/resources/resource_factories/create/create_integration_test.go b/resources/resource_factories/create/create_integration_test.go index acb511d6d0b..b9c46bbcf26 100644 --- a/resources/resource_factories/create/create_integration_test.go +++ b/resources/resource_factories/create/create_integration_test.go @@ -20,6 +20,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" "github.com/gohugoio/hugo/htesting" "github.com/gohugoio/hugo/hugolib" @@ -175,3 +176,47 @@ mediaTypes = ['text/plain'] } }) } + +func TestGetRemotePerRequestTimeout(t *testing.T) { + t.Parallel() + htesting.SkipSlowTestUnlessCI(t) + + // A server that always sleeps longer than the per-request timeout. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.Header().Add("Content-Type", "text/plain") + w.Write([]byte("too late")) + })) + t.Cleanup(func() { srv.Close() }) + + files := ` +-- hugo.toml -- +timeout = "30s" +[security] +[security.http] +urls = ['.*'] +mediaTypes = ['text/plain'] +-- layouts/home.html -- +{{ $url := "URL" }} +{{ $opts := dict "timeout" "200ms" }} +{{ with try (resources.GetRemote $url $opts) }} + {{ with .Err }} + Err: {{ . }} + {{ else with .Value }} + Content: {{ .Content }} + {{ end }} +{{ end }} +` + files = strings.ReplaceAll(files, "URL", srv.URL) + + b := hugolib.NewIntegrationTestBuilder( + hugolib.IntegrationTestConfig{ + T: t, + TxtarString: files, + }, + ) + b.Build() + + // The per-request timeout of 200ms should fire well before the global 30s timeout. + b.AssertFileContent("public/index.html", "Err:") +} diff --git a/resources/resource_factories/create/remote.go b/resources/resource_factories/create/remote.go index 899ac8330df..22aaf3ce94a 100644 --- a/resources/resource_factories/create/remote.go +++ b/resources/resource_factories/create/remote.go @@ -27,6 +27,8 @@ import ( "strings" "time" + "github.com/spf13/cast" + "github.com/gohugoio/httpcache" "github.com/gohugoio/hugo/common/hashing" "github.com/gohugoio/hugo/common/hmaps" @@ -177,6 +179,19 @@ func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resou isHeadMethod := method == "HEAD" optionsm = maps.Clone(optionsm) + + // Extract timeout before computing cache keys: it only affects fetch behaviour, + // not the cached content, so it must not influence the cache key. + var perRequestTimeout time.Duration + if v, k, ok := hmaps.LookupEqualFold(optionsm, "timeout"); ok { + d, err := cast.ToDurationE(v) + if err != nil { + return nil, fmt.Errorf("invalid timeout for resource %s: %w", uri, err) + } + perRequestTimeout = d + delete(optionsm, k) + } + userKey, optionsKey := remoteResourceKeys(uri, optionsm) // A common pattern is to use the key in the options map as @@ -197,6 +212,11 @@ func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resou getRes := func() (*http.Response, error) { ctx := context.Background() + if perRequestTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, perRequestTimeout) + defer cancel() + } ctx = c.resourceIDDispatcher.Set(ctx, filecacheKey) req, err := options.NewRequest(uri) diff --git a/testscripts/commands/mod.txt b/testscripts/commands/mod.txt index 68c30cec1d8..8500336cc8a 100644 --- a/testscripts/commands/mod.txt +++ b/testscripts/commands/mod.txt @@ -18,8 +18,7 @@ hugo mod clean ! stderr . stdout 'hugo: removed 1 dirs in module cache for \"github.com/bep/empty-hugo-module\"' hugo mod clean --all -# Currently this is 299 on MacOS and 301 on Linux. -stdout 'Deleted (2|3)\d{2} files from module cache\.' +stdout 'Deleted (2|3)\d{2} directories from module cache\.' cd submod hugo mod init testsubmod cmpenv go.mod $WORK/golden/go.mod.testsubmod