Skip to content

Commit 43724c4

Browse files
committed
feat: sync registry.json and popularity.json every 15 minutes with github stars and download info
1 parent bd5ac6d commit 43724c4

File tree

3 files changed

+5141
-5045
lines changed

3 files changed

+5141
-5045
lines changed

src/utils/sync.js

Lines changed: 85 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -62,31 +62,40 @@ export async function syncRegistryDBToS3JSON() {
6262
}
6363

6464

65-
const ONE_HOUR = 1000*60*60, HOURS_IN_DAY = 24, ONE_DAY = ONE_HOUR * HOURS_IN_DAY;
65+
const FIFTEEN_MINUTES = 1000*60*15, ONE_HOUR = 1000*60*60, HOURS_IN_DAY = 24, ONE_DAY = ONE_HOUR * HOURS_IN_DAY;
6666
let extensionsStarsCollectedToday = []; // will be reset every day, collect stars from GitHub once daily
6767

68+
async function _getExtensionInfoFromDB(extensionId) {
69+
const queryObj = {};
70+
queryObj[FIELD_EXTENSION_ID] = extensionId;
71+
let registryPKGJSON = await db.getFromIndex(EXTENSIONS_DETAILS_TABLE, queryObj);
72+
if(!registryPKGJSON.isSuccess){
73+
console.error("Error getting extensionPKG details from db: " + extensionId);
74+
// dont fail, continue with next repo
75+
return null;
76+
}
77+
if(registryPKGJSON.documents.length === 1){
78+
return registryPKGJSON.documents[0];
79+
}
80+
return null;
81+
}
82+
6883
async function _updateStargazerCount(owner, repo, extensionId) {
6984
let repoDetails = await getRepoDetails(owner, repo, false);
7085
if(repoDetails) {
71-
const queryObj = {};
72-
queryObj[FIELD_EXTENSION_ID] = extensionId;
73-
let registryPKGJSON = await db.getFromIndex(EXTENSIONS_DETAILS_TABLE, queryObj);
74-
if(!registryPKGJSON.isSuccess){
75-
console.error("Error getting extensionPKG details from db: " + extensionId);
86+
let document = await _getExtensionInfoFromDB(extensionId);
87+
if(!document){
7688
// dont fail, continue with next repo
7789
return;
7890
}
79-
if(registryPKGJSON.documents.length === 1){
80-
const document = registryPKGJSON.documents[0];
81-
const documentId = registryPKGJSON.documents[0].documentId;
82-
document.gihubStars = repoDetails.stargazers_count;
83-
let status = await db.update(EXTENSIONS_DETAILS_TABLE, documentId, document,
84-
`$.metadata.version='${document.metadata.version}'`);
85-
if(!status.isSuccess) {
86-
console.error("Error updating stars for extension in db: " + extensionId);
87-
// dont fail, continue with next repo
88-
return;
89-
}
91+
const documentId = document.documentId;
92+
document.gihubStars = repoDetails.stargazers_count;
93+
let status = await db.update(EXTENSIONS_DETAILS_TABLE, documentId, document,
94+
`$.metadata.version='${document.metadata.version}'`);
95+
if(!status.isSuccess) {
96+
console.error("Error updating stars for extension in db: " + extensionId);
97+
// dont fail, continue with next repo
98+
return;
9099
}
91100
}
92101
}
@@ -95,7 +104,7 @@ async function _updateStargazerCount(owner, repo, extensionId) {
95104
* Collects github star count every hour in batches considering GitHub throttles at 2000 GitHub Api requests per hour.
96105
*/
97106
export async function _collectStarsWorker() { // exported for tests only
98-
console.log("Number of extensions whose stars collected today: ", extensionsStarsCollectedToday.length);
107+
console.log("_collectStarsWorker: Number of extensions whose stars collected today: ", extensionsStarsCollectedToday.length);
99108
let registry = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, REGISTRY_FILE));
100109
let extensionIDs = Object.keys(registry);
101110
const numExtensionsToCollect = (extensionIDs.length/HOURS_IN_DAY) * 2; // so that the task completes in half day
@@ -123,10 +132,66 @@ export async function _collectStarsWorker() { // exported for tests only
123132
extensionsStarsCollectedToday.push(extensionID);
124133
collectedStarsForExtensions.push(extensionID);
125134
}
126-
console.log(`collecting stars for ${collectedStarsForExtensions.length} extensions of MAX allowed ${numExtensionsToCollect}`);
135+
console.log(`_collectStarsWorker: collecting stars for ${collectedStarsForExtensions.length} extensions of MAX allowed ${numExtensionsToCollect}`);
127136
return {collectedStarsForExtensions, extensionsStarsCollectedToday};
128137
}
129138

139+
/**
140+
* sets download count of extensions.
141+
* publishes registry.json and popularity.json into s3
142+
* does not increase registry_version.json
143+
* @private
144+
*/
145+
export async function _syncPopularityEvery15Minutes() { // exported for unit tests
146+
console.log("_syncPopularityEvery15Minutes: Downloading extension registry for _syncPopularityEvery15Minutes");
147+
let registry = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, REGISTRY_FILE));
148+
let popularity = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, POPULARITY_FILE));
149+
let extensionIDs = Object.keys(registry);
150+
let documentPromises = [];
151+
for(let extensionId of extensionIDs) {
152+
documentPromises.push(_getExtensionInfoFromDB(extensionId));
153+
}
154+
let allDocuments = await Promise.all(documentPromises);
155+
let somethingChanged = false;
156+
for(let document of allDocuments){
157+
if(document && (document.syncPending === 'Y' || !registry[document.metadata.name])) {
158+
// we dont sync if some extension is in the middle of publishing itself. retain the old values in registry
159+
// as is in the case.
160+
// so is if we cant find an extension in the registry file. This happens when a new extension is not
161+
// yet done publishing with publish api. Or if an extension is deleted/blacklisted from registry json.
162+
continue;
163+
}
164+
if(document){
165+
popularity[document.metadata.name] = popularity[document.metadata.name] || {};
166+
}
167+
if(document && document.gihubStars &&
168+
(registry[document.metadata.name].gihubStars !== document.gihubStars
169+
|| popularity[document.metadata.name].gihubStars !== document.gihubStars)){
170+
somethingChanged = true;
171+
registry[document.metadata.name].gihubStars = document.gihubStars;
172+
popularity[document.metadata.name].gihubStars = document.gihubStars;
173+
}
174+
if(document && document.totalDownloads && (
175+
registry[document.metadata.name].totalDownloads !== document.totalDownloads
176+
|| popularity[document.metadata.name].totalDownloads !== document.totalDownloads
177+
)){
178+
somethingChanged = true;
179+
registry[document.metadata.name].totalDownloads = document.totalDownloads;
180+
popularity[document.metadata.name].totalDownloads = document.totalDownloads;
181+
}
182+
}
183+
if(!somethingChanged){
184+
console.log("_syncPopularityEvery15Minutes: no changes to popularity. Syncing nothing.");
185+
return;
186+
}
187+
// now update all jsons in registry
188+
console.log("_syncPopularityEvery15Minutes: Writing main registry file: ", REGISTRY_FILE);
189+
await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(registry));
190+
// we dont increment registry version in this flow as this is just a popularity update
191+
console.log("_syncPopularityEvery15Minutes: Writing registry popularity file: ", POPULARITY_FILE);
192+
await S3.putObject(EXTENSIONS_BUCKET, POPULARITY_FILE, JSON.stringify(popularity));
193+
}
194+
130195
/* c8 ignore start */
131196
// not testing this as no time and is manually tested. If you are touching this code, manual test thoroughly
132197
let worker;
@@ -135,17 +200,9 @@ export function startCollectStarsWorker() {
135200
return;
136201
}
137202
worker = setInterval(_collectStarsWorker, ONE_HOUR);
203+
setInterval(_syncPopularityEvery15Minutes, FIFTEEN_MINUTES);
138204
setInterval(()=>{
139205
extensionsStarsCollectedToday = [];
140206
}, ONE_DAY);
141207
}
142208
/* c8 ignore end */
143-
/**
144-
* sets download count of extensions.
145-
* publishes registry.json and popularity.json into s3
146-
* does not increase registry_version.json
147-
* @private
148-
*/
149-
function _syncPopularityHourly() {
150-
151-
}

0 commit comments

Comments
 (0)