Skip to content

Commit dbfe811

Browse files
Removed stored logger from HuggingFace source (#4328)
1 parent 143f2f5 commit dbfe811

File tree

3 files changed

+17
-18
lines changed

3 files changed

+17
-18
lines changed

pkg/sources/huggingface/client.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,11 @@ func (c *HFClient) get(ctx context.Context, url string, target interface{}) erro
150150
}
151151

152152
if resp.StatusCode == http.StatusUnauthorized {
153-
return errors.New("invalid API key.")
153+
return errors.New("invalid API key")
154154
}
155155

156156
if resp.StatusCode == http.StatusForbidden {
157-
return errors.New("access to this repo is restricted and you are not in the authorized list. Visit the repository to ask for access.")
157+
return errors.New("access to this repo is restricted and you are not in the authorized list. Visit the repository to ask for access")
158158
}
159159

160160
defer resp.Body.Close()

pkg/sources/huggingface/huggingface.go

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ type Source struct {
6868
scanOptions *git.ScanOptions
6969

7070
apiClient *HFClient
71-
log logr.Logger
7271
conn *sourcespb.Huggingface
7372
jobPool *errgroup.Group
7473
resumeInfoMutex sync.Mutex
@@ -112,21 +111,21 @@ type filteredRepoCache struct {
112111
include, exclude []glob.Glob
113112
}
114113

115-
func (s *Source) newFilteredRepoCache(c cache.Cache[string], include, exclude []string) *filteredRepoCache {
114+
func (s *Source) newFilteredRepoCache(ctx context.Context, c cache.Cache[string], include, exclude []string) *filteredRepoCache {
116115
includeGlobs := make([]glob.Glob, 0, len(include))
117116
excludeGlobs := make([]glob.Glob, 0, len(exclude))
118117
for _, ig := range include {
119118
g, err := glob.Compile(ig)
120119
if err != nil {
121-
s.log.V(1).Info("invalid include glob", "include_value", ig, "err", err)
120+
ctx.Logger().V(1).Info("invalid include glob", "include_value", ig, "err", err)
122121
continue
123122
}
124123
includeGlobs = append(includeGlobs, g)
125124
}
126125
for _, eg := range exclude {
127126
g, err := glob.Compile(eg)
128127
if err != nil {
129-
s.log.V(1).Info("invalid exclude glob", "exclude_value", eg, "err", err)
128+
ctx.Logger().V(1).Info("invalid exclude glob", "exclude_value", eg, "err", err)
130129
continue
131130
}
132131
excludeGlobs = append(excludeGlobs, g)
@@ -169,14 +168,12 @@ func (c *filteredRepoCache) includeRepo(s string) bool {
169168
}
170169

171170
// Init returns an initialized HuggingFace source.
172-
func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, sourceID sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
171+
func (s *Source) Init(ctx context.Context, name string, jobID sources.JobID, sourceID sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
173172
err := git.CmdCheck()
174173
if err != nil {
175174
return err
176175
}
177176

178-
s.log = aCtx.Logger()
179-
180177
s.name = name
181178
s.sourceID = sourceID
182179
s.jobID = jobID
@@ -208,17 +205,17 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
208205
return err
209206
}
210207

211-
s.filteredModelsCache = s.newFilteredRepoCache(simple.NewCache[string](),
208+
s.filteredModelsCache = s.newFilteredRepoCache(ctx, simple.NewCache[string](),
212209
append(s.conn.GetModels(), s.conn.GetIncludeModels()...),
213210
s.conn.GetIgnoreModels(),
214211
)
215212

216-
s.filteredSpacesCache = s.newFilteredRepoCache(simple.NewCache[string](),
213+
s.filteredSpacesCache = s.newFilteredRepoCache(ctx, simple.NewCache[string](),
217214
append(s.conn.GetSpaces(), s.conn.GetIncludeSpaces()...),
218215
s.conn.GetIgnoreSpaces(),
219216
)
220217

221-
s.filteredDatasetsCache = s.newFilteredRepoCache(simple.NewCache[string](),
218+
s.filteredDatasetsCache = s.newFilteredRepoCache(ctx, simple.NewCache[string](),
222219
append(s.conn.GetDatasets(), s.conn.GetIncludeDatasets()...),
223220
s.conn.GetIgnoreDatasets(),
224221
)
@@ -249,8 +246,8 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
249246
Link: giturl.GenerateLink(repository, commit, file, line),
250247
Timestamp: sanitizer.UTF8(timestamp),
251248
Line: line,
252-
Visibility: s.visibilityOf(aCtx, repository),
253-
ResourceType: s.getResourceType(aCtx, repository),
249+
Visibility: s.visibilityOf(ctx, repository),
250+
ResourceType: s.getResourceType(ctx, repository),
254251
},
255252
},
256253
}
@@ -369,7 +366,7 @@ func (s *Source) enumerate(ctx context.Context) error {
369366
}
370367
}
371368

372-
s.log.Info("Completed enumeration", "num_models", len(s.models), "num_spaces", len(s.spaces), "num_datasets", len(s.datasets))
369+
ctx.Logger().Info("Completed enumeration", "num_models", len(s.models), "num_spaces", len(s.spaces), "num_datasets", len(s.datasets))
373370

374371
// We must sort the repos so we can resume later if necessary.
375372
sort.Strings(s.models)
@@ -507,7 +504,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
507504

508505
repos := s.getReposListByType(resourceType)
509506

510-
s.log.V(2).Info("Found "+resourceType+" to scan", "count", len(repos))
507+
ctx.Logger().V(2).Info("Found "+resourceType+" to scan", "count", len(repos))
511508

512509
// If there is resume information available, limit this scan to only the repos that still need scanning.
513510
reposToScan, progressIndexOffset := sources.FilterReposToResume(repos, s.GetProgress().EncodedResumeInfo)
@@ -539,7 +536,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
539536
if !ok {
540537
// This should never happen.
541538
err := fmt.Errorf("no repoInfo for URL: %s", repoURL)
542-
s.log.Error(err, "failed to scan "+resourceType)
539+
ctx.Logger().Error(err, "failed to scan "+resourceType)
543540
return nil
544541
}
545542
repoCtx := context.WithValues(ctx, resourceType, repoURL)
@@ -565,7 +562,7 @@ func (s *Source) scanRepos(ctx context.Context, chunksChan chan *sources.Chunk,
565562

566563
_ = s.jobPool.Wait()
567564
if scanErrs.Count() > 0 {
568-
s.log.V(0).Info("failed to scan some repositories", "error_count", scanErrs.Count(), "errors", scanErrs.String())
565+
ctx.Logger().V(0).Info("failed to scan some repositories", "error_count", scanErrs.Count(), "errors", scanErrs.String())
569566
}
570567
s.SetProgressComplete(len(repos), len(repos), "Completed HuggingFace "+resourceType+" scan", "")
571568
return nil

pkg/sources/huggingface/repo.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,13 @@ func (s *Source) cloneRepo(
5656

5757
switch s.conn.GetCredential().(type) {
5858
case *sourcespb.Huggingface_Unauthenticated:
59+
ctx.Logger().V(2).Info("cloning repo without authentication", "repo_url", repoURL)
5960
path, repo, err = git.CloneRepoUsingUnauthenticated(ctx, repoURL)
6061
if err != nil {
6162
return "", nil, err
6263
}
6364
case *sourcespb.Huggingface_Token:
65+
ctx.Logger().V(2).Info("cloning repo with token authentication", "repo_url", repoURL)
6466
path, repo, err = git.CloneRepoUsingToken(ctx, s.huggingfaceToken, repoURL, "", true)
6567
if err != nil {
6668
return "", nil, err

0 commit comments

Comments
 (0)