@@ -68,7 +68,6 @@ type Source struct {
68
68
scanOptions * git.ScanOptions
69
69
70
70
apiClient * HFClient
71
- log logr.Logger
72
71
conn * sourcespb.Huggingface
73
72
jobPool * errgroup.Group
74
73
resumeInfoMutex sync.Mutex
@@ -112,21 +111,21 @@ type filteredRepoCache struct {
112
111
include , exclude []glob.Glob
113
112
}
114
113
115
- func (s * Source ) newFilteredRepoCache (c cache.Cache [string ], include , exclude []string ) * filteredRepoCache {
114
+ func (s * Source ) newFilteredRepoCache (ctx context. Context , c cache.Cache [string ], include , exclude []string ) * filteredRepoCache {
116
115
includeGlobs := make ([]glob.Glob , 0 , len (include ))
117
116
excludeGlobs := make ([]glob.Glob , 0 , len (exclude ))
118
117
for _ , ig := range include {
119
118
g , err := glob .Compile (ig )
120
119
if err != nil {
121
- s . log .V (1 ).Info ("invalid include glob" , "include_value" , ig , "err" , err )
120
+ ctx . Logger () .V (1 ).Info ("invalid include glob" , "include_value" , ig , "err" , err )
122
121
continue
123
122
}
124
123
includeGlobs = append (includeGlobs , g )
125
124
}
126
125
for _ , eg := range exclude {
127
126
g , err := glob .Compile (eg )
128
127
if err != nil {
129
- s . log .V (1 ).Info ("invalid exclude glob" , "exclude_value" , eg , "err" , err )
128
+ ctx . Logger () .V (1 ).Info ("invalid exclude glob" , "exclude_value" , eg , "err" , err )
130
129
continue
131
130
}
132
131
excludeGlobs = append (excludeGlobs , g )
@@ -169,14 +168,12 @@ func (c *filteredRepoCache) includeRepo(s string) bool {
169
168
}
170
169
171
170
// Init returns an initialized HuggingFace source.
172
- func (s * Source ) Init (aCtx context.Context , name string , jobID sources.JobID , sourceID sources.SourceID , verify bool , connection * anypb.Any , concurrency int ) error {
171
+ func (s * Source ) Init (ctx context.Context , name string , jobID sources.JobID , sourceID sources.SourceID , verify bool , connection * anypb.Any , concurrency int ) error {
173
172
err := git .CmdCheck ()
174
173
if err != nil {
175
174
return err
176
175
}
177
176
178
- s .log = aCtx .Logger ()
179
-
180
177
s .name = name
181
178
s .sourceID = sourceID
182
179
s .jobID = jobID
@@ -208,17 +205,17 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
208
205
return err
209
206
}
210
207
211
- s .filteredModelsCache = s .newFilteredRepoCache (simple .NewCache [string ](),
208
+ s .filteredModelsCache = s .newFilteredRepoCache (ctx , simple .NewCache [string ](),
212
209
append (s .conn .GetModels (), s .conn .GetIncludeModels ()... ),
213
210
s .conn .GetIgnoreModels (),
214
211
)
215
212
216
- s .filteredSpacesCache = s .newFilteredRepoCache (simple .NewCache [string ](),
213
+ s .filteredSpacesCache = s .newFilteredRepoCache (ctx , simple .NewCache [string ](),
217
214
append (s .conn .GetSpaces (), s .conn .GetIncludeSpaces ()... ),
218
215
s .conn .GetIgnoreSpaces (),
219
216
)
220
217
221
- s .filteredDatasetsCache = s .newFilteredRepoCache (simple .NewCache [string ](),
218
+ s .filteredDatasetsCache = s .newFilteredRepoCache (ctx , simple .NewCache [string ](),
222
219
append (s .conn .GetDatasets (), s .conn .GetIncludeDatasets ()... ),
223
220
s .conn .GetIgnoreDatasets (),
224
221
)
@@ -249,8 +246,8 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
249
246
Link : giturl .GenerateLink (repository , commit , file , line ),
250
247
Timestamp : sanitizer .UTF8 (timestamp ),
251
248
Line : line ,
252
- Visibility : s .visibilityOf (aCtx , repository ),
253
- ResourceType : s .getResourceType (aCtx , repository ),
249
+ Visibility : s .visibilityOf (ctx , repository ),
250
+ ResourceType : s .getResourceType (ctx , repository ),
254
251
},
255
252
},
256
253
}
@@ -369,7 +366,7 @@ func (s *Source) enumerate(ctx context.Context) error {
369
366
}
370
367
}
371
368
372
- s . log .Info ("Completed enumeration" , "num_models" , len (s .models ), "num_spaces" , len (s .spaces ), "num_datasets" , len (s .datasets ))
369
+ ctx . Logger () .Info ("Completed enumeration" , "num_models" , len (s .models ), "num_spaces" , len (s .spaces ), "num_datasets" , len (s .datasets ))
373
370
374
371
// We must sort the repos so we can resume later if necessary.
375
372
sort .Strings (s .models )
@@ -507,7 +504,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
507
504
508
505
repos := s .getReposListByType (resourceType )
509
506
510
- s . log .V (2 ).Info ("Found " + resourceType + " to scan" , "count" , len (repos ))
507
+ ctx . Logger () .V (2 ).Info ("Found " + resourceType + " to scan" , "count" , len (repos ))
511
508
512
509
// If there is resume information available, limit this scan to only the repos that still need scanning.
513
510
reposToScan , progressIndexOffset := sources .FilterReposToResume (repos , s .GetProgress ().EncodedResumeInfo )
@@ -539,7 +536,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
539
536
if ! ok {
540
537
// This should never happen.
541
538
err := fmt .Errorf ("no repoInfo for URL: %s" , repoURL )
542
- s . log .Error (err , "failed to scan " + resourceType )
539
+ ctx . Logger () .Error (err , "failed to scan " + resourceType )
543
540
return nil
544
541
}
545
542
repoCtx := context .WithValues (ctx , resourceType , repoURL )
@@ -565,7 +562,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
565
562
566
563
_ = s .jobPool .Wait ()
567
564
if scanErrs .Count () > 0 {
568
- s . log .V (0 ).Info ("failed to scan some repositories" , "error_count" , scanErrs .Count (), "errors" , scanErrs .String ())
565
+ ctx . Logger () .V (0 ).Info ("failed to scan some repositories" , "error_count" , scanErrs .Count (), "errors" , scanErrs .String ())
569
566
}
570
567
s .SetProgressComplete (len (repos ), len (repos ), "Completed HuggingFace " + resourceType + " scan" , "" )
571
568
return nil
0 commit comments