Skip to content
13 changes: 6 additions & 7 deletions modules/actions/workflows.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,16 @@ func ListWorkflows(commit *git.Commit) (git.Entries, error) {
return nil, err
}

entries, err := tree.ListEntriesRecursiveFast()
if err != nil {
return nil, err
}

ret := make(git.Entries, 0, len(entries))
for _, entry := range entries {
ret := make(git.Entries, 0, 5)
if err := tree.IterateEntriesRecursive(func(entry *git.TreeEntry) error {
if strings.HasSuffix(entry.Name(), ".yml") || strings.HasSuffix(entry.Name(), ".yaml") {
ret = append(ret, entry)
}
return nil
}, nil); err != nil {
return nil, err
}

return ret, nil
}

Expand Down
15 changes: 12 additions & 3 deletions modules/git/parse_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
// parseTreeEntries FIXME this function's design is not right, it should make the caller read all data into memory
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
return entries, iterateTreeEntries(data, ptree, func(entry *TreeEntry) error {
entries = append(entries, entry)
return nil
})
}

func iterateTreeEntries(data []byte, ptree *Tree, f func(entry *TreeEntry) error) error {
for pos := 0; pos < len(data); {
posEnd := bytes.IndexByte(data[pos:], '\n')
if posEnd == -1 {
Expand All @@ -33,7 +40,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
line := data[pos:posEnd]
lsTreeLine, err := parseLsTreeLine(line)
if err != nil {
return nil, err
return err
}
entry := &TreeEntry{
ptree: ptree,
Expand All @@ -44,9 +51,11 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
sized: lsTreeLine.Size.Has(),
}
pos = posEnd + 1
entries = append(entries, entry)
if err := f(entry); err != nil {
return err
}
}
return entries, nil
return nil
}

func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) {
Expand Down
42 changes: 17 additions & 25 deletions modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

tree := commit.Tree

entries, err := tree.ListEntriesRecursiveWithSize()
if err != nil {
return nil, err
}

checker, deferable := repo.CheckAttributeReader(commitID)
defer deferable()

Expand All @@ -77,18 +72,12 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
firstExcludedLanguage := ""
firstExcludedLanguageSize := int64(0)

for _, f := range entries {
select {
case <-repo.Ctx.Done():
return sizes, repo.Ctx.Err()
default:
}

if err := tree.IterateEntriesRecursive(func(f *TreeEntry) error {
contentBuf.Reset()
content = contentBuf.Bytes()

if f.Size() == 0 {
continue
return nil
}

isVendored := optional.None[bool]()
Expand All @@ -101,22 +90,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if err == nil {
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
if isVendored.ValueOrDefault(false) {
continue
return nil
}

isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
if isGenerated.ValueOrDefault(false) {
continue
return nil
}

isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
if isDocumentation.ValueOrDefault(false) {
continue
return nil
}

isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
if !isDetectable.ValueOrDefault(true) {
continue
return nil
}

hasLanguage := TryReadLanguageAttribute(attrs)
Expand All @@ -131,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

// this language will always be added to the size
sizes[language] += f.Size()
continue
return nil
}
}
}
Expand All @@ -140,19 +129,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
enry.IsDotFile(f.Name()) ||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
enry.IsConfiguration(f.Name()) {
continue
return nil
}

// If content can not be read or file is too big just do detection by filename

if f.Size() <= bigFileSize {
if err := writeID(f.ID.String()); err != nil {
return nil, err
return err
}
_, _, size, err := ReadBatchLine(batchReader)
if err != nil {
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
return nil, err
return err
}

sizeToRead := size
Expand All @@ -164,22 +153,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead))
if err != nil {
return nil, err
return err
}
content = contentBuf.Bytes()
if err := DiscardFull(batchReader, discard); err != nil {
return nil, err
return err
}
}
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
continue
return nil
}

// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)
if language == "" {
continue
return nil
}

// group languages, such as Pug -> HTML; SCSS -> CSS
Expand All @@ -200,6 +189,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
firstExcludedLanguage = language
firstExcludedLanguageSize += f.Size()
}
return nil
}, TrustedCmdArgs{"--long"}); err != nil { // add --long to get size
return sizes, err
}

// If there are no included languages add the first excluded language
Expand Down
30 changes: 22 additions & 8 deletions modules/git/tree_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,31 @@

// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
var entries []*TreeEntry
if err := t.IterateEntriesRecursive(func(entry *TreeEntry) error {
entries = append(entries, convertedEntry)

Check failure on line 63 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / backend

undefined: convertedEntry

Check failure on line 63 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-sqlite

undefined: convertedEntry

Check failure on line 63 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-unit

undefined: convertedEntry
return nil
}, nil); err != nil {
return nil, err
}
return entries, nil
}

// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
return t.ListEntriesRecursiveWithSize()
}

// IterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees
// extraArgs could be "-l" to get the size, which is slower
func (t *Tree) IterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error {
if t.gogitTree == nil {
err := t.loadTreeObject()
if err != nil {
return nil, err

Check failure on line 82 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / backend

too many return values

Check failure on line 82 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-sqlite

too many return values

Check failure on line 82 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-unit

too many return values
}
}

var entries []*TreeEntry
seen := map[plumbing.Hash]bool{}
walker := object.NewTreeWalker(t.gogitTree, true, seen)
for {
Expand All @@ -74,7 +91,7 @@
break
}
if err != nil {
return nil, err

Check failure on line 94 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / backend

too many return values

Check failure on line 94 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-sqlite

too many return values

Check failure on line 94 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-unit

too many return values
}
if seen[entry.Hash] {
continue
Expand All @@ -86,13 +103,10 @@
ptree: t,
fullName: fullName,
}
entries = append(entries, convertedEntry)
if err := f(convertedEntry); err != nil {
return nil, err

Check failure on line 107 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / backend

too many return values

Check failure on line 107 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-sqlite

too many return values

Check failure on line 107 in modules/git/tree_gogit.go

View workflow job for this annotation

GitHub Actions / test-unit

too many return values
}
}

return entries, nil
}

// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
return t.ListEntriesRecursiveWithSize()
return nil
}
70 changes: 57 additions & 13 deletions modules/git/tree_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package git

import (
"bufio"
"io"
"strings"
)
Expand Down Expand Up @@ -96,21 +97,17 @@ func (t *Tree) listEntriesRecursive(extraArgs TrustedCmdArgs) (Entries, error) {
return t.entriesRecursive, nil
}

stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
AddArguments(extraArgs...).
AddDynamicArguments(t.ID.String()).
RunStdBytes(&RunOpts{Dir: t.repo.Path})
if runErr != nil {
return nil, runErr
}

var err error
t.entriesRecursive, err = parseTreeEntries(stdout, t)
if err == nil {
t.entriesRecursiveParsed = true
t.entriesRecursive = make([]*TreeEntry, 0)
if err := t.IterateEntriesRecursive(func(entry *TreeEntry) error {
t.entriesRecursive = append(t.entriesRecursive, entry)
return nil
}, extraArgs); err != nil {
t.entriesRecursive = nil
return nil, err
}

return t.entriesRecursive, err
t.entriesRecursiveParsed = true
return t.entriesRecursive, nil
}

// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
Expand All @@ -122,3 +119,50 @@ func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
return t.listEntriesRecursive(TrustedCmdArgs{"--long"})
}

// IterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees
// extraArgs could be "-l" to get the size, which is slower
func (t *Tree) IterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error {
reader, writer := io.Pipe()
done := make(chan error)

go func(t *Tree, done chan error, writer *io.PipeWriter) {
runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
AddArguments(extraArgs...).
AddDynamicArguments(t.ID.String()).
Run(&RunOpts{
Dir: t.repo.Path,
Stdout: writer,
})

_ = writer.Close()

done <- runErr
}(t, done, writer)

scanner := bufio.NewScanner(reader)
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return err
}

data := scanner.Bytes()
if err := iterateTreeEntries(data, t, func(entry *TreeEntry) error {
if err := f(entry); err != nil {
return err
}

select {
case <-t.repo.Ctx.Done():
return t.repo.Ctx.Err()
case runErr := <-done:
return runErr
default:
return nil
}
}); err != nil {
return err
}
}
return nil
}
Loading