@@ -24,6 +24,7 @@ import core, {
24
24
type Doc ,
25
25
docKey ,
26
26
type Domain ,
27
+ DOMAIN_COLLABORATOR ,
27
28
DOMAIN_MODEL ,
28
29
type FullTextSearchContext ,
29
30
getFullTextIndexableAttributes ,
@@ -247,6 +248,10 @@ export class FullTextIndexPipeline implements FullTextPipeline {
247
248
}
248
249
249
250
const byDomain = groupByArray ( allIndexed , ( it ) => this . hierarchy . getDomain ( it ) )
251
+
252
+ // Delete few domains
253
+ byDomain . delete ( DOMAIN_COLLABORATOR )
254
+
250
255
return Array . from ( byDomain . entries ( ) )
251
256
. sort ( ( a , b ) => {
252
257
const ap = domainPriorities [ a [ 0 ] ] ?? 0
@@ -265,21 +270,19 @@ export class FullTextIndexPipeline implements FullTextPipeline {
265
270
classes : Ref < Class < Doc > > [ ] ,
266
271
control ?: ConsumerControl
267
272
) : Promise < void > {
268
- ctx . warn ( 'verify document structure' , { workspace : this . workspace . uuid } )
273
+ ctx . warn ( 'reindex verify document structure' , { domain , workspace : this . workspace . uuid } )
269
274
270
275
let processed = 0
271
276
let processedCommunication = 0
272
277
let hasCards = false
273
- await ctx . with ( 'reindex- domain' , { domain } , async ( ctx ) => {
278
+ await ctx . with ( 'reindex domain' , { domain } , async ( ctx ) => {
274
279
// Iterate over all domain documents and add appropriate entries
275
280
const allDocs = this . storage . rawFind ( ctx , domain )
276
281
try {
277
- let lastPrint = 0
282
+ let lastPrint = platformNow ( )
278
283
const pushQueue = new ElasticPushQueue ( this . fulltextAdapter , this . workspace , ctx , control )
279
284
while ( true ) {
280
- if ( control !== undefined ) {
281
- await control ?. heartbeat ( )
282
- }
285
+ await control ?. heartbeat ( )
283
286
const docs = await allDocs . find ( ctx )
284
287
if ( docs . length === 0 ) {
285
288
break
@@ -307,7 +310,12 @@ export class FullTextIndexPipeline implements FullTextPipeline {
307
310
308
311
const now = platformNow ( )
309
312
if ( now - lastPrint > printThresholdMs ) {
310
- ctx . info ( 'processed' , { processed, elapsed : Math . round ( now - lastPrint ) , domain } )
313
+ ctx . info ( 'processed' , {
314
+ processed,
315
+ elapsed : Math . round ( now - lastPrint ) ,
316
+ domain,
317
+ workspace : this . workspace . uuid
318
+ } )
311
319
lastPrint = now
312
320
}
313
321
}
@@ -441,7 +449,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
441
449
}
442
450
const indexedDoc = createIndexedDoc ( doc , this . hierarchy . findAllMixins ( doc ) , doc . space )
443
451
444
- await rateLimit . exec ( async ( ) => {
452
+ await rateLimit . add ( async ( ) => {
445
453
await ctx . with ( 'process-document' , { _class : doc . _class } , async ( ctx ) => {
446
454
try {
447
455
// Collect all indexable values
@@ -452,7 +460,10 @@ export class FullTextIndexPipeline implements FullTextPipeline {
452
460
453
461
for ( const [ , v ] of Object . entries ( content ) ) {
454
462
if ( v . attr . type . _class === core . class . TypeBlob ) {
455
- await this . processBlob ( ctx , v , doc , indexedDoc )
463
+ await ctx . with ( 'process-blob' , { } , ( ctx ) => this . processBlob ( ctx , v , doc , indexedDoc ) , {
464
+ attr : v . attr . name ,
465
+ value : v . value
466
+ } )
456
467
continue
457
468
}
458
469
@@ -545,7 +556,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
545
556
let processed = 0
546
557
const cardsInfo = new Map < CardID , { space : Ref < Space > , _class : Ref < Class < Doc > > } > ( )
547
558
const rateLimit = new RateLimiter ( 10 )
548
- let lastPrint = 0
559
+ let lastPrint = platformNow ( )
549
560
await ctx . with ( 'process-message-groups' , { } , async ( ctx ) => {
550
561
let groups = await communicationApi . findMessagesGroups ( this . communicationSession , {
551
562
limit : messageGroupsLimit ,
@@ -590,7 +601,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
590
601
if ( message . removed ) {
591
602
continue
592
603
}
593
- await rateLimit . exec ( async ( ) => {
604
+ await rateLimit . add ( async ( ) => {
594
605
await this . processCommunicationMessage (
595
606
ctx ,
596
607
pushQueue ,
@@ -603,7 +614,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
603
614
processed += 1
604
615
const now = platformNow ( )
605
616
if ( now - lastPrint > printThresholdMs ) {
606
- ctx . info ( 'processed' , { processedCommunication : processed , elapsed : Math . round ( now - lastPrint ) } )
617
+ ctx . info ( 'processed' , {
618
+ processedCommunication : processed ,
619
+ elapsed : Math . round ( now - lastPrint ) ,
620
+ workspace : this . workspace . uuid
621
+ } )
607
622
lastPrint = now
608
623
}
609
624
}
@@ -652,7 +667,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
652
667
if ( this . cancelling ) {
653
668
return processed
654
669
}
655
- await rateLimit . exec ( async ( ) => {
670
+ await rateLimit . add ( async ( ) => {
656
671
await this . processCommunicationMessage (
657
672
ctx ,
658
673
pushQueue ,
@@ -672,7 +687,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
672
687
processed += 1
673
688
const now = platformNow ( )
674
689
if ( now - lastPrint > printThresholdMs ) {
675
- ctx . info ( 'processed' , { processedCommunication : processed , elapsed : Math . round ( now - lastPrint ) } )
690
+ ctx . info ( 'processed' , {
691
+ processedCommunication : processed ,
692
+ elapsed : Math . round ( now - lastPrint ) ,
693
+ workspace : this . workspace . uuid
694
+ } )
676
695
lastPrint = now
677
696
}
678
697
}
@@ -992,7 +1011,6 @@ export class FullTextIndexPipeline implements FullTextPipeline {
992
1011
}
993
1012
}
994
1013
995
- @withContext ( 'process-blob' )
996
1014
private async processBlob (
997
1015
ctx : MeasureContext < any > ,
998
1016
v : { value : any , attr : AnyAttribute } ,
@@ -1005,6 +1023,9 @@ export class FullTextIndexPipeline implements FullTextPipeline {
1005
1023
if ( ref === '' || ref . startsWith ( 'http://' ) || ref . startsWith ( 'https://' ) ) {
1006
1024
return
1007
1025
}
1026
+ if ( ref . startsWith ( '{' ) ) {
1027
+ return
1028
+ }
1008
1029
if ( v . attr . name === 'avatar' || v . attr . attributeOf === contactPlugin . class . Contact ) {
1009
1030
return
1010
1031
}
@@ -1126,10 +1147,8 @@ export class FullTextIndexPipeline implements FullTextPipeline {
1126
1147
// We have blob, we need to decode it to string.
1127
1148
const contentType = ( docInfo . contentType ?? defaultContentType ) . split ( ';' ) [ 0 ]
1128
1149
1129
- if (
1130
- ( contentType . includes ( 'text/' ) && contentType !== 'text/rtf' ) ||
1131
- contentType . includes ( 'application/vnd.github.VERSION.diff' )
1132
- ) {
1150
+ const ct = contentType . toLocaleLowerCase ( )
1151
+ if ( ( ct . includes ( 'text/' ) && contentType !== 'text/rtf' ) || ct . includes ( 'application/vnd.github.version.diff' ) ) {
1133
1152
await this . handleTextBlob ( ctx , docInfo , indexedDoc )
1134
1153
} else if ( isBlobAllowed ( contentType ) ) {
1135
1154
await this . handleBlob ( ctx , docInfo , indexedDoc )
@@ -1140,25 +1159,31 @@ export class FullTextIndexPipeline implements FullTextPipeline {
1140
1159
private async handleBlob ( ctx : MeasureContext < any > , docInfo : Blob | undefined , indexedDoc : IndexedDoc ) : Promise < void > {
1141
1160
if ( docInfo !== undefined ) {
1142
1161
const contentType = ( docInfo . contentType ?? '' ) . split ( ';' ) [ 0 ]
1143
- const readable = await this . storageAdapter ?. get ( ctx , this . workspace , docInfo . _id )
1144
1162
1145
- if ( readable !== undefined ) {
1146
- try {
1147
- let textContent = await ctx . with ( 'fetch' , { } , ( ) =>
1148
- this . contentAdapter . content ( ctx , this . workspace . uuid , docInfo . _id , contentType , readable )
1149
- )
1150
- textContent = textContent
1151
- . split ( / + | \t + | \f + / )
1152
- . filter ( ( it ) => it )
1153
- . join ( ' ' )
1154
- . split ( / \n \n + / )
1155
- . join ( '\n' )
1156
-
1157
- indexedDoc . fulltextSummary += '\n' + textContent
1158
- } finally {
1159
- readable ?. destroy ( )
1160
- }
1163
+ if ( docInfo . size > 30 * 1024 * 1024 ) {
1164
+ throw new Error ( 'Blob size exceeds limit of 30MB' )
1161
1165
}
1166
+ const buffer = Buffer . concat (
1167
+ await ctx . with ( 'fetch' , { } , ( ctx ) => this . storageAdapter ?. read ( ctx , this . workspace , docInfo . _id ) )
1168
+ )
1169
+ let textContent = await ctx . with (
1170
+ 'to-text' ,
1171
+ { } ,
1172
+ ( ctx ) => this . contentAdapter . content ( ctx , this . workspace . uuid , docInfo . _id , contentType , buffer ) ,
1173
+ {
1174
+ workspace : this . workspace . uuid ,
1175
+ blobId : docInfo . _id ,
1176
+ contentType
1177
+ }
1178
+ )
1179
+ textContent = textContent
1180
+ . split ( / + | \t + | \f + / )
1181
+ . filter ( ( it ) => it )
1182
+ . join ( ' ' )
1183
+ . split ( / \n \n + / )
1184
+ . join ( '\n' )
1185
+
1186
+ indexedDoc . fulltextSummary += '\n' + textContent
1162
1187
}
1163
1188
}
1164
1189
@@ -1190,6 +1215,7 @@ function isBlobAllowed (contentType: string): boolean {
1190
1215
! contentType . includes ( 'binary/octet-stream' ) &&
1191
1216
! contentType . includes ( 'application/octet-stream' ) &&
1192
1217
! contentType . includes ( 'application/zip' ) &&
1193
- ! contentType . includes ( 'application/x-zip-compressed' )
1218
+ ! contentType . includes ( 'application/x-zip-compressed' ) &&
1219
+ ! contentType . includes ( 'application/link-preview' )
1194
1220
)
1195
1221
}
0 commit comments