@@ -7,6 +7,7 @@ package languagestats
77
88import (
99 "bytes"
10+ "context"
1011 "io"
1112
1213 "code.gitea.io/gitea/modules/analyze"
@@ -18,8 +19,8 @@ import (
1819 "github.com/go-enry/go-enry/v2"
1920)
2021
21- // GetLanguageStats calculates language stats for git repository at specified commit
22- func GetLanguageStats ( repo * git.Repository , commitID string ) (map [string ]int64 , error ) {
22+ // CalcLanguageStats calculates language stats for git repository at specified commit
23+ func CalcLanguageStats ( ctx context. Context , repo * git.Repository , commitID string ) (map [string ]int64 , error ) {
2324 // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
2425 // so let's create a batch stdin and stdout
2526 batchStdinWriter , batchReader , cancel , err := repo .CatFileBatch (repo .Ctx )
@@ -59,11 +60,6 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
5960
6061 tree := commit .Tree
6162
62- entries , err := tree .ListEntriesRecursiveWithSize ()
63- if err != nil {
64- return nil , err
65- }
66-
6763 checker , err := attribute .NewBatchChecker (repo , commitID , attribute .LinguistAttributes )
6864 if err != nil {
6965 return nil , err
@@ -82,18 +78,12 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
8278 firstExcludedLanguage := ""
8379 firstExcludedLanguageSize := int64 (0 )
8480
85- for _ , f := range entries {
86- select {
87- case <- repo .Ctx .Done ():
88- return sizes , repo .Ctx .Err ()
89- default :
90- }
91-
81+ if err := tree .IterateEntriesRecursive (ctx , func (ctx context.Context , f * git.TreeEntry ) error {
9282 contentBuf .Reset ()
9383 content = contentBuf .Bytes ()
9484
9585 if f .Size () == 0 {
96- continue
86+ return nil
9787 }
9888
9989 isVendored := optional .None [bool ]()
@@ -104,19 +94,19 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
10494 attrLinguistGenerated := optional .None [bool ]()
10595 if err == nil {
10696 if isVendored = attrs .GetVendored (); isVendored .ValueOrDefault (false ) {
107- continue
97+ return nil
10898 }
10999
110100 if attrLinguistGenerated = attrs .GetGenerated (); attrLinguistGenerated .ValueOrDefault (false ) {
111- continue
101+ return nil
112102 }
113103
114104 if isDocumentation = attrs .GetDocumentation (); isDocumentation .ValueOrDefault (false ) {
115- continue
105+ return nil
116106 }
117107
118108 if isDetectable = attrs .GetDetectable (); ! isDetectable .ValueOrDefault (true ) {
119- continue
109+ return nil
120110 }
121111
122112 if hasLanguage := attrs .GetLanguage (); hasLanguage .Value () != "" {
@@ -130,27 +120,27 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
130120
131121 // this language will always be added to the size
132122 sizes [language ] += f .Size ()
133- continue
123+ return nil
134124 }
135125 }
136126
137127 if (! isVendored .Has () && analyze .IsVendor (f .Name ())) ||
138128 enry .IsDotFile (f .Name ()) ||
139129 (! isDocumentation .Has () && enry .IsDocumentation (f .Name ())) ||
140130 enry .IsConfiguration (f .Name ()) {
141- continue
131+ return nil
142132 }
143133
144134 // If content can not be read or file is too big just do detection by filename
145135
146136 if f .Size () <= bigFileSize {
147137 if err := writeID (f .ID .String ()); err != nil {
148- return nil , err
138+ return err
149139 }
150140 _ , _ , size , err := git .ReadBatchLine (batchReader )
151141 if err != nil {
152142 log .Debug ("Error reading blob: %s Err: %v" , f .ID .String (), err )
153- return nil , err
143+ return err
154144 }
155145
156146 sizeToRead := size
@@ -162,11 +152,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
162152
163153 _ , err = contentBuf .ReadFrom (io .LimitReader (batchReader , sizeToRead ))
164154 if err != nil {
165- return nil , err
155+ return err
166156 }
167157 content = contentBuf .Bytes ()
168158 if err := git .DiscardFull (batchReader , discard ); err != nil {
169- return nil , err
159+ return err
170160 }
171161 }
172162
@@ -178,14 +168,14 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
178168 isGenerated = enry .IsGenerated (f .Name (), content )
179169 }
180170 if isGenerated {
181- continue
171+ return nil
182172 }
183173
184174 // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
185175 // - eg. do the all the detection tests using filename first before reading content.
186176 language := analyze .GetCodeLanguage (f .Name (), content )
187177 if language == "" {
188- continue
178+ return nil
189179 }
190180
191181 // group languages, such as Pug -> HTML; SCSS -> CSS
@@ -206,6 +196,9 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
206196 firstExcludedLanguage = language
207197 firstExcludedLanguageSize += f .Size ()
208198 }
199+ return nil
200+ }, git.TrustedCmdArgs {"--long" }); err != nil {
201+ return sizes , err
209202 }
210203
211204 // If there are no included languages add the first excluded language
0 commit comments