Skip to content

Commit 3141fe3

Browse files
committed
fix: large extension metadata in db and json appears ocationally brinking fleet
1 parent 9f00713 commit 3141fe3

File tree

3 files changed

+140
-28
lines changed

3 files changed

+140
-28
lines changed

src/api/changeOwnership.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
} from "../constants.js";
88
import db from "../db.js";
99
import {HTTP_STATUS_CODES} from "@aicore/libcommonutils";
10-
import {syncRegistryDBToS3JSON} from "../utils/sync.js";
10+
import {syncRegistryDBToS3JSON, trimAllStrings} from "../utils/sync.js";
1111
import {S3} from "../s3.js";
1212
import {createIssue} from "../github.js";
1313

@@ -74,6 +74,7 @@ async function _updateRegistryJSONinDB(registryPKGJSON) {
7474
// we need to update existing extension release only if no one updated the release while this change
7575
// was being published, so the conditional update with version check.
7676
console.log("updating extension", registryPKGJSON.EXTENSION_ID);
77+
registryPKGJSON = trimAllStrings(registryPKGJSON, 2048);
7778
status = await db.update(EXTENSIONS_DETAILS_TABLE, existingRegistryDocumentId,
7879
registryPKGJSON, `$.metadata.version='${registryPKGJSON.metadata.version}'`);
7980
if(!status.isSuccess) {

src/api/publishGithubRelease.js

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import {
1919
} from "../constants.js";
2020
import fs from "fs";
2121
import {S3} from "../s3.js";
22-
import {syncRegistryDBToS3JSON} from "../utils/sync.js";
22+
import {syncRegistryDBToS3JSON, trimAllStrings} from "../utils/sync.js";
2323

2424
const RELEASE_STATUS_PROCESSING = "processing",
2525
RELEASE_STATUS_FAILED = "failed",
@@ -415,6 +415,8 @@ async function _updateRegistryJSONinDB(existingRegistryPKGVersion, existingRegis
415415
let status;
416416
registryPKGJSON.syncPending = 'Y';// coco db doesnt support boolean queries yet
417417
registryPKGJSON.EXTENSION_ID = registryPKGJSON.metadata.name;
418+
// we dont want huge jsons in the registry
419+
registryPKGJSON = trimAllStrings(registryPKGJSON, 2048);
418420
if(existingRegistryDocumentId){
419421
// we need to update existing extension release only if no one updated the release while this release
420422
// was being published, so the conditional update with version check.
@@ -459,22 +461,31 @@ export async function publishGithubRelease(request, reply) {
459461
const extensionZipAsset = _validateGitHubReleaseAssets(newGithubReleaseDetails, issueMessages);
460462
const {extensionZipPath, existingRegistryPKGVersion, existingRegistryDocumentId, registryPKGJSON}=
461463
await _downloadAndValidateExtensionZip(githubReleaseTag, extensionZipAsset, repoDetails, issueMessages);
464+
const registryPKGJSONTrimmed = trimAllStrings(registryPKGJSON, 2048);
465+
const packageSize = registryPKGJSONTrimmed ? JSON.stringify(registryPKGJSONTrimmed).length : 0;
466+
if(packageSize > 10000){ // we dont accept large metadata
467+
issueMessages.push(`package.json metadata size too large.`);
468+
throw {status: HTTP_STATUS_CODES.BAD_REQUEST,
469+
updatePublishErrors: true,
470+
error: `package.json metadata size too large! Should be less than 9KB but was ${
471+
Math.round(packageSize/1024)}KB`};
472+
}
462473
_extensionZipPath = extensionZipPath;
463474
// we should also in the future do a virus scan, but will rely on av in users machine for the time being
464475
// https://developers.virustotal.com/reference/files-scan by Google Cloud is available for non-commercial apps.
465476

466477
await S3.uploadFile(EXTENSIONS_BUCKET,
467-
`extensions/${registryPKGJSON.metadata.name}-${registryPKGJSON.metadata.version}.zip`,
478+
`extensions/${registryPKGJSONTrimmed.metadata.name}-${registryPKGJSONTrimmed.metadata.version}.zip`,
468479
_extensionZipPath);
469480
fs.unlink(_extensionZipPath, console.error); // cleanup downloads. (But we don't check the result)
470481

471482
// publish new package json to registry db
472-
await _updateRegistryJSONinDB(existingRegistryPKGVersion, existingRegistryDocumentId, registryPKGJSON,
483+
await _updateRegistryJSONinDB(existingRegistryPKGVersion, existingRegistryDocumentId, registryPKGJSONTrimmed,
473484
issueMessages);
474485

475486
await syncRegistryDBToS3JSON();
476487

477-
await _UpdateReleaseSuccess(githubReleaseTag, existingReleaseInfo, registryPKGJSON);
488+
await _UpdateReleaseSuccess(githubReleaseTag, existingReleaseInfo, registryPKGJSONTrimmed);
478489

479490
const response = {
480491
message: "done"

src/utils/sync.js

Lines changed: 123 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,35 +10,130 @@ import db from "../db.js";
1010
import {S3} from "../s3.js";
1111
import {getRepoDetails} from "../github.js";
1212

13-
function _trimSize(value) {
13+
/**
14+
* trims ALL string values (including top-level strings)
15+
* to at most `trimLength` characters.
16+
*
17+
* Input can be: string | array | object
18+
* Output type always matches input type.
19+
*/
20+
function _trimAllStringsInPlace(input, trimLength) {
21+
if (typeof trimLength !== "number" || !Number.isFinite(trimLength) || trimLength < 0) {
22+
throw new Error("trimLength must be a finite non-negative number");
23+
}
24+
if(!input){
25+
return input;
26+
}
27+
28+
// Fast path for top-level string
29+
if (typeof input === "string") {
30+
return input.length > trimLength
31+
? input.slice(0, trimLength)
32+
: input;
33+
}
34+
35+
function trimInPlace(node) {
36+
if (node === null || node === undefined) {
37+
return;
38+
}
39+
40+
// Array (can be mixed)
41+
if (Array.isArray(node)) {
42+
for (let i = 0; i < node.length; i++) {
43+
const v = node[i];
44+
if (typeof v === "string") {
45+
if (v.length > trimLength) {
46+
node[i] = v.slice(0, trimLength);
47+
}
48+
} else {
49+
trimInPlace(v);
50+
}
51+
}
52+
return;
53+
}
54+
55+
// Object
56+
if (typeof node === "object") {
57+
for (const key of Object.keys(node)) {
58+
const v = node[key];
59+
if (typeof v === "string") {
60+
if (v.length > trimLength) {
61+
node[key] = v.slice(0, trimLength);
62+
}
63+
} else {
64+
trimInPlace(v);
65+
}
66+
}
67+
}
68+
}
69+
70+
trimInPlace(input);
71+
}
72+
73+
export function trimAllStrings(input, trimLength) {
74+
if (typeof trimLength !== "number" || !Number.isFinite(trimLength) || trimLength < 0) {
75+
throw new Error("trimLength must be a finite non-negative number");
76+
}
77+
if(!input){
78+
return input;
79+
}
80+
const clone = JSON.parse(JSON.stringify(input));
81+
_trimAllStringsInPlace(clone, trimLength);
82+
return clone;
83+
}
84+
85+
const LONG_DESCRIPTION_256 = 256;
86+
const TITLE_64 = 64;
87+
const SHORT_KEY_48 = 48;
88+
const LONG_AUTHOR_128 = 128;
89+
// todo we should further enforce this check by trimming out the stored vale in db.rn the user can
90+
// bomb us with large metadata.
91+
function _trimRegistryEntry(registryEntry) {
92+
1493
// we dont want huge metadata in the registry
15-
if (value.metadata['package-i18n']) {
16-
delete value.metadata['package-i18n'];
94+
if (registryEntry.metadata['package-i18n']) {
95+
delete registryEntry.metadata['package-i18n'];
1796
}
97+
registryEntry = trimAllStrings(registryEntry, 1024);
1898

1999
// Trim title and description to less than 1k characters
20-
if (typeof value.metadata.title === 'string') {
21-
value.metadata.title = value.metadata.title.slice(0, 64);
100+
if (registryEntry.metadata.title) {
101+
registryEntry.metadata.title = trimAllStrings(registryEntry.metadata.title, TITLE_64);
102+
}
103+
if (registryEntry.metadata.description) {
104+
registryEntry.metadata.description = trimAllStrings(registryEntry.metadata.description, LONG_DESCRIPTION_256);
105+
}
106+
107+
if (registryEntry.metadata.author) {
108+
registryEntry.metadata.author = trimAllStrings(registryEntry.metadata.author, LONG_AUTHOR_128);
109+
if (registryEntry.metadata.author.name) {
110+
registryEntry.metadata.author.name = trimAllStrings(registryEntry.metadata.author.name, TITLE_64);
111+
}
22112
}
23-
if (typeof value.metadata.description === 'string') {
24-
value.metadata.description = value.metadata.description.slice(0, 256);
113+
if (registryEntry.metadata.contributors) {
114+
registryEntry.metadata.contributors = trimAllStrings(registryEntry.metadata.contributors, LONG_AUTHOR_128);
25115
}
26116

27-
// Limit each keyword string to max 256 characters
28-
if (Array.isArray(value.metadata.keywords)) {
29-
value.metadata.keywords = value.metadata.keywords.map(keyword =>
30-
typeof keyword === 'string' ? keyword.slice(0, 48) : keyword
31-
);
117+
if (registryEntry.metadata.keywords) {
118+
registryEntry.metadata.keywords = trimAllStrings(registryEntry.metadata.keywords, SHORT_KEY_48);
119+
}
120+
if (registryEntry.metadata.categories) {
121+
registryEntry.metadata.categories = trimAllStrings(registryEntry.metadata.categories, SHORT_KEY_48);
122+
}
123+
if (registryEntry.metadata.i18n) {
124+
registryEntry.metadata.i18n = trimAllStrings(registryEntry.metadata.i18n, SHORT_KEY_48);
32125
}
33126

34-
// Handle metadata.author field
35-
if (value.metadata.author) {
36-
if (typeof value.metadata.author === 'string') {
37-
value.metadata.author = value.metadata.author.slice(0, 256);
38-
} else if (typeof value.metadata.author.name === 'string') {
39-
value.metadata.author.name = value.metadata.author.name.slice(0, 256);
40-
}
127+
return registryEntry;
128+
}
129+
130+
function _trimFullRegistry(registry) {
131+
const clone = JSON.parse(JSON.stringify(registry));
132+
let extensionIDs = Object.keys(clone);
133+
for(let extensionId of extensionIDs) {
134+
clone[extensionId] = _trimRegistryEntry(clone[extensionId]);
41135
}
136+
return clone;
42137
}
43138

44139
export async function syncRegistryDBToS3JSON() {
@@ -60,7 +155,7 @@ export async function syncRegistryDBToS3JSON() {
60155
let newDoc = structuredClone(document);
61156
delete newDoc.documentId;
62157
delete newDoc.syncPending;
63-
_trimSize(newDoc);
158+
newDoc = _trimRegistryEntry(newDoc);
64159
console.log("Updating Registry entry with[existing, new]: ", registry[newDoc.metadata.name], newDoc);
65160
registry[newDoc.metadata.name] = newDoc;
66161
popularity[newDoc.metadata.name]= {
@@ -70,7 +165,7 @@ export async function syncRegistryDBToS3JSON() {
70165
}
71166
// now update all jsons in registry
72167
console.log("Writing main registry file: ", REGISTRY_FILE);
73-
await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(registry));
168+
await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(_trimFullRegistry(registry)));
74169
let registryVersion = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, REGISTRY_VERSION_FILE));
75170
registryVersion.version = registryVersion.version + 1;
76171
console.log("Writing registry version file version: ", registryVersion.version, REGISTRY_VERSION_FILE);
@@ -86,6 +181,8 @@ export async function syncRegistryDBToS3JSON() {
86181
delete document.documentId;
87182
delete document.syncPending;
88183
console.log("Setting syncPending for: ", document.metadata.name, documentID);
184+
// we dont want huge jsons in the registry
185+
document = trimAllStrings(document, 2048);
89186
// conditional update to make sure than no new release happened while we were updating this release
90187
updatePromises.push(db.update(EXTENSIONS_DETAILS_TABLE, documentID, document,
91188
`$.metadata.version='${document.metadata.version}'`));
@@ -122,6 +219,8 @@ async function _updateStargazerCount(owner, repo, extensionId) {
122219
}
123220
const documentId = document.documentId;
124221
document.gihubStars = repoDetails.stargazers_count;
222+
// we dont want huge jsons in the registry
223+
document = trimAllStrings(document, 2048);
125224
let status = await db.update(EXTENSIONS_DETAILS_TABLE, documentId, document,
126225
`$.metadata.version='${document.metadata.version}'`);
127226
if(!status.isSuccess) {
@@ -194,6 +293,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
194293
continue;
195294
}
196295
if(document){
296+
document = _trimRegistryEntry(document);
197297
popularity[document.metadata.name] = popularity[document.metadata.name] || {};
198298
}
199299
if(document && document.gihubStars &&
@@ -218,7 +318,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
218318
}
219319
// now update all jsons in registry
220320
console.log("_syncPopularityEvery15Minutes: Writing main registry file: ", REGISTRY_FILE);
221-
await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(registry));
321+
await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(_trimFullRegistry(registry)));
222322
// we dont increment registry version in this flow as this is just a popularity update
223323
console.log("_syncPopularityEvery15Minutes: Writing registry popularity file: ", POPULARITY_FILE);
224324
await S3.putObject(EXTENSIONS_BUCKET, POPULARITY_FILE, JSON.stringify(popularity));
@@ -246,7 +346,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
246346
//
247347
// // now update all jsons in registry
248348
// console.log("Writing main registry file after extension removal: ", REGISTRY_FILE);
249-
// await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(registry));
349+
// await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(_trimFullRegistry(registry)));
250350
// let registryVersion = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, REGISTRY_VERSION_FILE));
251351
// registryVersion.version = registryVersion.version + 1;
252352
// console.log("Writing registry version after extension removal file version: ", registryVersion.version,

0 commit comments

Comments
 (0)