@@ -10,35 +10,130 @@ import db from "../db.js";
1010import { S3 } from "../s3.js" ;
1111import { getRepoDetails } from "../github.js" ;
1212
13- function _trimSize ( value ) {
13+ /**
14+ * trims ALL string values (including top-level strings)
15+ * to at most `trimLength` characters.
16+ *
17+ * Input can be: string | array | object
18+ * Output type always matches input type.
19+ */
20+ function _trimAllStringsInPlace ( input , trimLength ) {
21+ if ( typeof trimLength !== "number" || ! Number . isFinite ( trimLength ) || trimLength < 0 ) {
22+ throw new Error ( "trimLength must be a finite non-negative number" ) ;
23+ }
24+ if ( ! input ) {
25+ return input ;
26+ }
27+
28+ // Fast path for top-level string
29+ if ( typeof input === "string" ) {
30+ return input . length > trimLength
31+ ? input . slice ( 0 , trimLength )
32+ : input ;
33+ }
34+
35+ function trimInPlace ( node ) {
36+ if ( node === null || node === undefined ) {
37+ return ;
38+ }
39+
40+ // Array (can be mixed)
41+ if ( Array . isArray ( node ) ) {
42+ for ( let i = 0 ; i < node . length ; i ++ ) {
43+ const v = node [ i ] ;
44+ if ( typeof v === "string" ) {
45+ if ( v . length > trimLength ) {
46+ node [ i ] = v . slice ( 0 , trimLength ) ;
47+ }
48+ } else {
49+ trimInPlace ( v ) ;
50+ }
51+ }
52+ return ;
53+ }
54+
55+ // Object
56+ if ( typeof node === "object" ) {
57+ for ( const key of Object . keys ( node ) ) {
58+ const v = node [ key ] ;
59+ if ( typeof v === "string" ) {
60+ if ( v . length > trimLength ) {
61+ node [ key ] = v . slice ( 0 , trimLength ) ;
62+ }
63+ } else {
64+ trimInPlace ( v ) ;
65+ }
66+ }
67+ }
68+ }
69+
70+ trimInPlace ( input ) ;
71+ }
72+
73+ export function trimAllStrings ( input , trimLength ) {
74+ if ( typeof trimLength !== "number" || ! Number . isFinite ( trimLength ) || trimLength < 0 ) {
75+ throw new Error ( "trimLength must be a finite non-negative number" ) ;
76+ }
77+ if ( ! input ) {
78+ return input ;
79+ }
80+ const clone = JSON . parse ( JSON . stringify ( input ) ) ;
81+ _trimAllStringsInPlace ( clone , trimLength ) ;
82+ return clone ;
83+ }
84+
85+ const LONG_DESCRIPTION_256 = 256 ;
86+ const TITLE_64 = 64 ;
87+ const SHORT_KEY_48 = 48 ;
88+ const LONG_AUTHOR_128 = 128 ;
89+ // todo we should further enforce this check by trimming out the stored vale in db.rn the user can
90+ // bomb us with large metadata.
91+ function _trimRegistryEntry ( registryEntry ) {
92+
1493 // we dont want huge metadata in the registry
15- if ( value . metadata [ 'package-i18n' ] ) {
16- delete value . metadata [ 'package-i18n' ] ;
94+ if ( registryEntry . metadata [ 'package-i18n' ] ) {
95+ delete registryEntry . metadata [ 'package-i18n' ] ;
1796 }
97+ registryEntry = trimAllStrings ( registryEntry , 1024 ) ;
1898
1999 // Trim title and description to less than 1k characters
20- if ( typeof value . metadata . title === 'string' ) {
21- value . metadata . title = value . metadata . title . slice ( 0 , 64 ) ;
100+ if ( registryEntry . metadata . title ) {
101+ registryEntry . metadata . title = trimAllStrings ( registryEntry . metadata . title , TITLE_64 ) ;
102+ }
103+ if ( registryEntry . metadata . description ) {
104+ registryEntry . metadata . description = trimAllStrings ( registryEntry . metadata . description , LONG_DESCRIPTION_256 ) ;
105+ }
106+
107+ if ( registryEntry . metadata . author ) {
108+ registryEntry . metadata . author = trimAllStrings ( registryEntry . metadata . author , LONG_AUTHOR_128 ) ;
109+ if ( registryEntry . metadata . author . name ) {
110+ registryEntry . metadata . author . name = trimAllStrings ( registryEntry . metadata . author . name , TITLE_64 ) ;
111+ }
22112 }
23- if ( typeof value . metadata . description === 'string' ) {
24- value . metadata . description = value . metadata . description . slice ( 0 , 256 ) ;
113+ if ( registryEntry . metadata . contributors ) {
114+ registryEntry . metadata . contributors = trimAllStrings ( registryEntry . metadata . contributors , LONG_AUTHOR_128 ) ;
25115 }
26116
27- // Limit each keyword string to max 256 characters
28- if ( Array . isArray ( value . metadata . keywords ) ) {
29- value . metadata . keywords = value . metadata . keywords . map ( keyword =>
30- typeof keyword === 'string' ? keyword . slice ( 0 , 48 ) : keyword
31- ) ;
117+ if ( registryEntry . metadata . keywords ) {
118+ registryEntry . metadata . keywords = trimAllStrings ( registryEntry . metadata . keywords , SHORT_KEY_48 ) ;
119+ }
120+ if ( registryEntry . metadata . categories ) {
121+ registryEntry . metadata . categories = trimAllStrings ( registryEntry . metadata . categories , SHORT_KEY_48 ) ;
122+ }
123+ if ( registryEntry . metadata . i18n ) {
124+ registryEntry . metadata . i18n = trimAllStrings ( registryEntry . metadata . i18n , SHORT_KEY_48 ) ;
32125 }
33126
34- // Handle metadata.author field
35- if ( value . metadata . author ) {
36- if ( typeof value . metadata . author === 'string' ) {
37- value . metadata . author = value . metadata . author . slice ( 0 , 256 ) ;
38- } else if ( typeof value . metadata . author . name === 'string' ) {
39- value . metadata . author . name = value . metadata . author . name . slice ( 0 , 256 ) ;
40- }
127+ return registryEntry ;
128+ }
129+
130+ function _trimFullRegistry ( registry ) {
131+ const clone = JSON . parse ( JSON . stringify ( registry ) ) ;
132+ let extensionIDs = Object . keys ( clone ) ;
133+ for ( let extensionId of extensionIDs ) {
134+ clone [ extensionId ] = _trimRegistryEntry ( clone [ extensionId ] ) ;
41135 }
136+ return clone ;
42137}
43138
44139export async function syncRegistryDBToS3JSON ( ) {
@@ -60,7 +155,7 @@ export async function syncRegistryDBToS3JSON() {
60155 let newDoc = structuredClone ( document ) ;
61156 delete newDoc . documentId ;
62157 delete newDoc . syncPending ;
63- _trimSize ( newDoc ) ;
158+ newDoc = _trimRegistryEntry ( newDoc ) ;
64159 console . log ( "Updating Registry entry with[existing, new]: " , registry [ newDoc . metadata . name ] , newDoc ) ;
65160 registry [ newDoc . metadata . name ] = newDoc ;
66161 popularity [ newDoc . metadata . name ] = {
@@ -70,7 +165,7 @@ export async function syncRegistryDBToS3JSON() {
70165 }
71166 // now update all jsons in registry
72167 console . log ( "Writing main registry file: " , REGISTRY_FILE ) ;
73- await S3 . putObject ( EXTENSIONS_BUCKET , REGISTRY_FILE , JSON . stringify ( registry ) ) ;
168+ await S3 . putObject ( EXTENSIONS_BUCKET , REGISTRY_FILE , JSON . stringify ( _trimFullRegistry ( registry ) ) ) ;
74169 let registryVersion = JSON . parse ( await S3 . getObject ( EXTENSIONS_BUCKET , REGISTRY_VERSION_FILE ) ) ;
75170 registryVersion . version = registryVersion . version + 1 ;
76171 console . log ( "Writing registry version file version: " , registryVersion . version , REGISTRY_VERSION_FILE ) ;
@@ -86,6 +181,8 @@ export async function syncRegistryDBToS3JSON() {
86181 delete document . documentId ;
87182 delete document . syncPending ;
88183 console . log ( "Setting syncPending for: " , document . metadata . name , documentID ) ;
184+ // we dont want huge jsons in the registry
185+ document = trimAllStrings ( document , 2048 ) ;
89186 // conditional update to make sure than no new release happened while we were updating this release
90187 updatePromises . push ( db . update ( EXTENSIONS_DETAILS_TABLE , documentID , document ,
91188 `$.metadata.version='${ document . metadata . version } '` ) ) ;
@@ -122,6 +219,8 @@ async function _updateStargazerCount(owner, repo, extensionId) {
122219 }
123220 const documentId = document . documentId ;
124221 document . gihubStars = repoDetails . stargazers_count ;
222+ // we dont want huge jsons in the registry
223+ document = trimAllStrings ( document , 2048 ) ;
125224 let status = await db . update ( EXTENSIONS_DETAILS_TABLE , documentId , document ,
126225 `$.metadata.version='${ document . metadata . version } '` ) ;
127226 if ( ! status . isSuccess ) {
@@ -194,6 +293,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
194293 continue ;
195294 }
196295 if ( document ) {
296+ document = _trimRegistryEntry ( document ) ;
197297 popularity [ document . metadata . name ] = popularity [ document . metadata . name ] || { } ;
198298 }
199299 if ( document && document . gihubStars &&
@@ -218,7 +318,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
218318 }
219319 // now update all jsons in registry
220320 console . log ( "_syncPopularityEvery15Minutes: Writing main registry file: " , REGISTRY_FILE ) ;
221- await S3 . putObject ( EXTENSIONS_BUCKET , REGISTRY_FILE , JSON . stringify ( registry ) ) ;
321+ await S3 . putObject ( EXTENSIONS_BUCKET , REGISTRY_FILE , JSON . stringify ( _trimFullRegistry ( registry ) ) ) ;
222322 // we dont increment registry version in this flow as this is just a popularity update
223323 console . log ( "_syncPopularityEvery15Minutes: Writing registry popularity file: " , POPULARITY_FILE ) ;
224324 await S3 . putObject ( EXTENSIONS_BUCKET , POPULARITY_FILE , JSON . stringify ( popularity ) ) ;
@@ -246,7 +346,7 @@ export async function _syncPopularityEvery15Minutes() { // exported for unit tes
246346//
247347// // now update all jsons in registry
248348// console.log("Writing main registry file after extension removal: ", REGISTRY_FILE);
249- // await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(registry));
349+ // await S3.putObject(EXTENSIONS_BUCKET, REGISTRY_FILE, JSON.stringify(_trimFullRegistry( registry) ));
250350// let registryVersion = JSON.parse(await S3.getObject(EXTENSIONS_BUCKET, REGISTRY_VERSION_FILE));
251351// registryVersion.version = registryVersion.version + 1;
252352// console.log("Writing registry version after extension removal file version: ", registryVersion.version,
0 commit comments