@@ -243,6 +243,7 @@ export default class PageContext extends ContentFeature {
243243 metaDescription : this . getMetaDescription ( ) ,
244244 content : mainContent ,
245245 truncated,
246+ fullContentLength : this . fullContentLength , // Include full content length before truncation
246247 headings : this . getHeadings ( ) ,
247248 links : this . getLinks ( ) ,
248249 images : this . getImages ( ) ,
@@ -272,7 +273,9 @@ export default class PageContext extends ContentFeature {
272273 }
273274
274275 getMainContent ( ) {
275- const maxLength = this . getFeatureSetting ( 'maxContentLength' ) || 950 ;
276+ const maxLength = this . getFeatureSetting ( 'maxContentLength' ) || 9500 ;
277+ // Used to avoid large content serialization
278+ const upperLimit = this . getFeatureSetting ( 'upperLimit' ) || 500000 ;
276279 let excludeSelectors = this . getFeatureSetting ( 'excludeSelectors' ) || [ '.ad' , '.sidebar' , '.footer' , '.nav' , '.header' ] ;
277280 excludeSelectors = excludeSelectors . concat ( [ 'script' , 'style' , 'link' , 'meta' , 'noscript' , 'svg' , 'canvas' ] ) ;
278281
@@ -296,16 +299,20 @@ export default class PageContext extends ContentFeature {
296299 } ) ;
297300
298301 this . log . info ( 'Calling domToMarkdown' , clone . innerHTML ) ;
299- content += domToMarkdown ( clone , maxLength ) ;
302+ content += domToMarkdown ( clone , upperLimit ) ;
300303 }
304+ content = content . trim ( ) ;
305+
306+ // Store the full content length before truncation
307+ this . fullContentLength = content . length ;
301308
302309 // Limit content length
303310 if ( content . length > maxLength ) {
304311 this . log . info ( 'Truncating content' , content ) ;
305312 content = content . substring ( 0 , maxLength ) + '...' ;
306313 }
307314
308- return content . trim ( ) ;
315+ return content ;
309316 }
310317
311318 getHeadings ( ) {
0 commit comments