@@ -387,24 +387,31 @@ export class PostgresSyncRulesStorage
387387 checkpoint : InternalOpId ,
388388 dataBuckets : Map < string , InternalOpId > ,
389389 options ?: storage . BucketDataBatchOptions
390- ) : AsyncIterable < storage . SyncBucketDataBatch > {
390+ ) : AsyncIterable < storage . SyncBucketDataChunk > {
391391 if ( dataBuckets . size == 0 ) {
392392 return ;
393393 }
394394
395+ // Internal naming:
396+ // We do a query for one "batch", which may be returend in multiple "chunks".
397+ // Each chunk is limited to single bucket, and is limited in length and size.
398+ // There are also overall batch length and size limits.
399+ // Each batch query batch are streamed in separate sets of rows, which may or may
400+ // not match up with chunks.
401+
395402 const end = checkpoint ?? BIGINT_MAX ;
396403 const filters = Array . from ( dataBuckets . entries ( ) ) . map ( ( [ name , start ] ) => ( {
397404 bucket_name : name ,
398405 start : start
399406 } ) ) ;
400407
401- const rowLimit = options ?. limit ?? storage . DEFAULT_DOCUMENT_BATCH_LIMIT ;
402- const sizeLimit = options ?. chunkLimitBytes ?? storage . DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES ;
408+ const batchRowLimit = options ?. limit ?? storage . DEFAULT_DOCUMENT_BATCH_LIMIT ;
409+ const chunkSizeLimitBytes = options ?. chunkLimitBytes ?? storage . DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES ;
403410
404- let batchSize = 0 ;
405- let currentBatch : utils . SyncBucketData | null = null ;
411+ let chunkSizeBytes = 0 ;
412+ let currentChunk : utils . SyncBucketData | null = null ;
406413 let targetOp : InternalOpId | null = null ;
407- let rowCount = 0 ;
414+ let batchRowCount = 0 ;
408415
409416 /**
410417 * It is possible to perform this query with JSONB join. e.g.
@@ -458,7 +465,7 @@ export class PostgresSyncRulesStorage
458465 params : [
459466 { type : 'int4' , value : this . group_id } ,
460467 { type : 'int8' , value : end } ,
461- { type : 'int4' , value : rowLimit + 1 } ,
468+ { type : 'int4' , value : batchRowLimit } ,
462469 ...filters . flatMap ( ( f ) => [
463470 { type : 'varchar' as const , value : f . bucket_name } ,
464471 { type : 'int8' as const , value : f . start } satisfies StatementParam
@@ -469,28 +476,27 @@ export class PostgresSyncRulesStorage
469476
470477 for ( const row of decodedRows ) {
471478 const { bucket_name } = row ;
472- const rowSize = row . data ? row . data . length : 0 ;
473-
474- if (
475- currentBatch == null ||
476- currentBatch . bucket != bucket_name ||
477- batchSize >= sizeLimit ||
478- ( currentBatch ?. data . length && batchSize + rowSize > sizeLimit ) ||
479- currentBatch . data . length >= rowLimit
480- ) {
479+ const rowSizeBytes = row . data ? row . data . length : 0 ;
480+
481+ const sizeExceeded =
482+ chunkSizeBytes >= chunkSizeLimitBytes ||
483+ ( currentChunk ?. data . length && chunkSizeBytes + rowSizeBytes > chunkSizeLimitBytes ) ||
484+ ( currentChunk ?. data . length ?? 0 ) >= batchRowLimit ;
485+
486+ if ( currentChunk == null || currentChunk . bucket != bucket_name || sizeExceeded ) {
481487 let start : string | undefined = undefined ;
482- if ( currentBatch != null ) {
483- if ( currentBatch . bucket == bucket_name ) {
484- currentBatch . has_more = true ;
488+ if ( currentChunk != null ) {
489+ if ( currentChunk . bucket == bucket_name ) {
490+ currentChunk . has_more = true ;
491+ start = currentChunk . next_after ;
485492 }
486493
487- const yieldBatch = currentBatch ;
488- start = currentBatch . after ;
489- currentBatch = null ;
490- batchSize = 0 ;
491- yield { batch : yieldBatch , targetOp : targetOp } ;
494+ const yieldChunk = currentChunk ;
495+ currentChunk = null ;
496+ chunkSizeBytes = 0 ;
497+ yield { chunkData : yieldChunk , targetOp : targetOp } ;
492498 targetOp = null ;
493- if ( rowCount >= rowLimit ) {
499+ if ( batchRowCount >= batchRowLimit ) {
494500 // We've yielded all the requested rows
495501 break ;
496502 }
@@ -503,11 +509,13 @@ export class PostgresSyncRulesStorage
503509 }
504510 start = internalToExternalOpId ( startOpId ) ;
505511 }
506- currentBatch = {
512+ currentChunk = {
507513 bucket : bucket_name ,
508514 after : start ,
515+ // this is updated when we yield the batch
509516 has_more : false ,
510517 data : [ ] ,
518+ // this is updated incrementally
511519 next_after : start
512520 } ;
513521 targetOp = null ;
@@ -527,20 +535,25 @@ export class PostgresSyncRulesStorage
527535 }
528536 }
529537
530- currentBatch . data . push ( entry ) ;
531- currentBatch . next_after = entry . op_id ;
538+ currentChunk . data . push ( entry ) ;
539+ currentChunk . next_after = entry . op_id ;
532540
533- batchSize += rowSize ;
541+ chunkSizeBytes += rowSizeBytes ;
534542
535543 // Manually track the total rows yielded
536- rowCount ++ ;
544+ batchRowCount ++ ;
537545 }
538546 }
539547
540- if ( currentBatch != null ) {
541- const yieldBatch = currentBatch ;
542- currentBatch = null ;
543- yield { batch : yieldBatch , targetOp : targetOp } ;
548+ if ( currentChunk != null ) {
549+ const yieldChunk = currentChunk ;
550+ currentChunk = null ;
551+ // This is the final chunk in the batch.
552+ // There may be more data if and only if the batch we retrieved isn't complete.
553+ // If batchRowCount == batchRowLimit, we don't actually know whether there is more data,
554+ // but it is safe to return true in that case.
555+ yieldChunk . has_more = batchRowCount >= batchRowLimit ;
556+ yield { chunkData : yieldChunk , targetOp : targetOp } ;
544557 targetOp = null ;
545558 }
546559 }
0 commit comments