@@ -13,6 +13,7 @@ package ingester
1313import (
1414 "context"
1515 "database/sql"
16+ "sync"
1617 "time"
1718
1819 "github.com/google/uuid"
@@ -33,6 +34,12 @@ type PackageSqlIngesterParams struct {
3334
3435type packageSqlIngester struct {
3536 deps PackageSqlIngesterParams
37+
38+ maintainerCacheMutex sync.Mutex
39+ maintainerIDCache map [string ]uuid.UUID
40+
41+ releaseDependencyCacheMutex sync.Mutex
42+ releaseDependencyIDCache map [string ]uuid.UUID
3643}
3744
3845type PackageSqlIngester interface {
@@ -104,13 +111,24 @@ func (s *packageSqlIngester) mapReleases(ctx context.Context, packageId uuid.UUI
104111 return releaseIds , nil
105112}
106113
114+ func releaseDependencyCacheKey (dep metadata.Dependency ) string {
115+ return dep .Name + dep .Version
116+ }
117+
107118func (s * packageSqlIngester ) mapReleaseDependencies (
108119 ctx context.Context ,
109120 releaseId uuid.UUID ,
110121 ds []metadata.Dependency ,
111122) ([]uuid.UUID , error ) {
112123 var releaseDependencyIds []uuid.UUID
113124 for _ , dep := range ds {
125+ // there are a lot of maintainer updates for a given package, so we try to cache them
126+ cacheKey := releaseDependencyCacheKey (dep )
127+ if cachedReleaseDependencyID , ok := s .maintainerIDCache [cacheKey ]; ok {
128+ releaseDependencyIds = append (releaseDependencyIds , cachedReleaseDependencyID )
129+ continue
130+ }
131+
114132 dependencyPackageId , err := upsertReleaseDependencyPackage (ctx , s .deps .DB , model.Package {
115133 Name : dep .Name ,
116134 PackageManager : mapper .NpmV ,
@@ -140,14 +158,29 @@ func (s *packageSqlIngester) mapReleaseDependencies(
140158 return releaseDependencyIds , nil
141159}
142160
161+ func packageMaintainerCacheKey (packageId uuid.UUID , pm metadata.Maintainer ) string {
162+ return packageId .String () + pm .Email + pm .Name
163+ }
164+
143165func (s * packageSqlIngester ) mapMaintainers (ctx context.Context , packageId uuid.UUID , p []metadata.Maintainer ) ([]uuid.UUID , error ) {
144166 var maintainerIds []uuid.UUID
145167 for _ , pm := range p {
168+ // there are a lot of maintainer updates for a given package, so we try to cache them
169+ cacheKey := packageMaintainerCacheKey (packageId , pm )
170+ if cachedMaintainerID , ok := s .maintainerIDCache [cacheKey ]; ok {
171+ maintainerIds = append (maintainerIds , cachedMaintainerID )
172+ continue
173+ }
174+
146175 insertedId , err := s .mapMaintainer (ctx , pm )
147176 if err != nil {
148177 return maintainerIds , err
149178 }
150179
180+ s .maintainerCacheMutex .Lock ()
181+ s .maintainerIDCache [cacheKey ] = insertedId
182+ s .maintainerCacheMutex .Unlock ()
183+
151184 err = upsertPackageMaintainer (ctx , s .deps .DB , model.PackageMaintainer {
152185 PackageID : packageId ,
153186 MaintainerID : insertedId ,
@@ -185,6 +218,7 @@ func (s *packageSqlIngester) Ingest(ctx context.Context, pkg *metadata.PackageMe
185218
186219func NewPackageSqlIngester (deps PackageSqlIngesterParams ) PackageSqlIngester {
187220 return & packageSqlIngester {
188- deps : deps ,
221+ deps : deps ,
222+ maintainerIDCache : map [string ]uuid.UUID {},
189223 }
190224}
0 commit comments