Skip to content

Commit bc3a1a8

Browse files
(tree) Add incremental summary optimzation opt-in to AnnotatedAllowedTypes (#25408)
## Description With this PR, a set of allowed types in a schema can be opted in to incremental summary optimization. These allowed types will be optimized during summary such that if they don't change across summaries, they will not be encoded and their content will not be included in the summary that is uploaded to the service. The usage pattern is described below. ## Usage pattern - Incremental summarization only works for forest type `ForestTypeOptimized` when the compression strategy is `TreeCompressionStrategyExtended.CompressedIncremental`. In addition, `SharedTreeOptionsInternal.shouldEncodeFieldIncrementally` must be passed when creating the tree. This callback function will be called for each allowed types in the schema to determine if it should be incrementally summarized. These configs can be passed in when creating a tree via the `configuredSharedTree` API. - A set of allowed types can be opted in to incremental summary optimization by adding an `incrementalSummaryHint`symbol as true to the `AllowedTypesMetadata.custom` property. - The helper function `shouldIncrementallySummarizeAllowedTypes` (added in this PR) can be used to implement the `SharedTreeOptionsInternal.shouldEncodeFieldIncrementally` callback function. It takes in the schema that contains the allowed types and the node identifier and field key of the target allowed types. The last two parameters are the same as the ones in the callback function. It will find the `AllowedTypesMetadata.custom` for the passed in allowed types and determine if the `incrementalSummaryHint` symbol is set to true. [AB#41866](https://dev.azure.com/fluidframework/235294da-091d-4c29-84fc-cdfc3d90890b/_workitems/edit/41866)
1 parent accd36a commit bc3a1a8

File tree

19 files changed

+333
-256
lines changed

19 files changed

+333
-256
lines changed

packages/dds/tree/src/core/forest/forest.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,14 @@ export interface IForestSubscription {
8484
clone(schema: TreeStoredSchemaSubscription, anchors: AnchorSet): IEditableForest;
8585

8686
/**
87-
* Generate a TreeChunk for the content in the given field cursor.
87+
* Generate a TreeChunk[] for the current field (and its children) of cursor.
8888
* This can be used to chunk data that is then inserted into the forest.
8989
*
9090
* @remarks
91-
* Like {@link chunkField}, but forces the results into a single TreeChunk.
92-
* While any TreeChunk is compatible with any forest, this method creates one optimized for this specific forest.
91+
* Similar to {@link chunkField} but it creates chunks optimized for this specific forest by using its compression policy.
9392
* The provided data must be compatible with the forest's current schema.
9493
*/
95-
chunkField(cursor: ITreeCursorSynchronous): TreeChunk;
94+
chunkField(cursor: ITreeCursorSynchronous): TreeChunk[];
9695

9796
/**
9897
* Allocates a cursor in the "cleared" state.

packages/dds/tree/src/feature-libraries/chunked-forest/chunkTree.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,15 @@ export function chunkFieldSingle(
197197
policy: ChunkCompressor,
198198
): TreeChunk {
199199
const chunks = chunkField(cursor, policy);
200+
return combineChunks(chunks);
201+
}
202+
203+
/**
204+
* Create a single TreeChunk from an array of TreeChunks.
205+
* @remarks
206+
* This takes ownership of the provided TreeChunk references, and returns an owned referenced.
207+
*/
208+
export function combineChunks(chunks: TreeChunk[]): TreeChunk {
200209
if (chunks.length === 1) {
201210
return chunks[0] ?? oob();
202211
}

packages/dds/tree/src/feature-libraries/chunked-forest/chunkedForest.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ import {
4444
} from "../../util/index.js";
4545

4646
import { BasicChunk, BasicChunkCursor, type SiblingsOrKey } from "./basicChunk.js";
47-
import { type IChunker, basicChunkTree, chunkFieldSingle, chunkTree } from "./chunkTree.js";
47+
import { type IChunker, basicChunkTree, chunkField, chunkTree } from "./chunkTree.js";
4848

4949
function makeRoot(): BasicChunk {
5050
return new BasicChunk(aboveRootPlaceholder, new Map());
@@ -90,8 +90,8 @@ export class ChunkedForest implements IEditableForest {
9090
return new ChunkedForest(this.roots, schema, this.chunker.clone(schema), anchors);
9191
}
9292

93-
public chunkField(cursor: ITreeCursorSynchronous): TreeChunk {
94-
return chunkFieldSingle(cursor, { idCompressor: this.idCompressor, policy: this.chunker });
93+
public chunkField(cursor: ITreeCursorSynchronous): TreeChunk[] {
94+
return chunkField(cursor, { idCompressor: this.idCompressor, policy: this.chunker });
9595
}
9696

9797
public forgetAnchor(anchor: Anchor): void {

packages/dds/tree/src/feature-libraries/chunked-forest/codec/compressedEncode.ts

Lines changed: 20 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import {
3636
SpecialField,
3737
version,
3838
} from "./format.js";
39-
import type { ChunkReferenceId, IncrementalEncoder } from "./codecs.js";
39+
import type { IncrementalEncoder } from "./codecs.js";
4040

4141
/**
4242
* Encode data from `FieldBatch` into an `EncodedFieldBatch`.
@@ -223,8 +223,8 @@ export const anyNodeEncoder: NodeEncoder = {
223223
outputBuffer: BufferFormat,
224224
): void {
225225
// TODO: Fast path uniform chunk content.
226-
const shape = context.nodeEncoderFromSchema(cursor.type);
227-
AnyShape.encodeNode(cursor, context, outputBuffer, shape);
226+
const nodeEncoder = context.nodeEncoderFromSchema(cursor.type);
227+
AnyShape.encodeNode(cursor, context, outputBuffer, nodeEncoder);
228228
},
229229

230230
shape: AnyShape.instance,
@@ -350,7 +350,7 @@ export class InlineArrayEncoder
350350
}
351351

352352
/**
353-
* Encodes the shape for a nested array as {@link EncodedNestedArray} shape.
353+
* Encodes the shape for a nested array as {@link EncodedNestedArrayShape} shape.
354354
*/
355355
export class NestedArrayShape extends ShapeGeneric<EncodedChunkShape> {
356356
/**
@@ -420,30 +420,9 @@ export class NestedArrayEncoder implements FieldEncoder {
420420
}
421421

422422
/**
423-
* Encodes a chunk with the {@link EncodedIncrementalChunkShape} shape.
424-
* This chunks will be encoded separately, i.e., the contents of the chunk will not be part of the main buffer.
425-
* A reference to the chunk will be stored in the main buffer as an {@link ChunkReferenceId}.
423+
* Encodes the shape for an incremental chunk as {@link EncodedIncrementalChunkShape} shape.
426424
*/
427425
export class IncrementalChunkShape extends ShapeGeneric<EncodedChunkShape> {
428-
/**
429-
* Encodes all the nodes in the chunk at the cursor position using `InlineArrayShape`.
430-
*/
431-
public static encodeChunk(chunk: TreeChunk, context: EncoderContext): BufferFormat {
432-
const chunkOutputBuffer: BufferFormat = [];
433-
const nodesEncoder = asNodesEncoder(anyNodeEncoder);
434-
const chunkCursor = chunk.cursor();
435-
chunkCursor.firstNode();
436-
const chunkLength = chunkCursor.chunkLength;
437-
for (let index = 0; index < chunkLength; index++) {
438-
nodesEncoder.encodeNodes(chunkCursor, context, chunkOutputBuffer);
439-
}
440-
assert(
441-
chunkCursor.mode === CursorLocationType.Fields,
442-
0xc29 /* should return to fields mode when finished encoding */,
443-
);
444-
return chunkOutputBuffer;
445-
}
446-
447426
public encodeShape(
448427
identifiers: DeduplicationTable<string>,
449428
shapes: DeduplicationTable<Shape>,
@@ -464,9 +443,10 @@ export class IncrementalChunkShape extends ShapeGeneric<EncodedChunkShape> {
464443
}
465444

466445
/**
467-
* Encodes an incremental field whose chunks are encoded separately and referenced by their {@link ChunkReferenceId}.
468-
* The shape of the content of this field is {@link NestedShape} where the items in the array are
469-
* the {@link ChunkReferenceId}s of the encoded chunks.
446+
* Encodes an incremental field whose tree chunks are encoded separately and referenced by their {@link ChunkReferenceId}.
447+
* The shape of the content of this field is {@link NestedArrayShape}.
448+
* The inner items of the array have shape {@link IncrementalChunkShape} and are {@link ChunkReferenceId}s
449+
* of the encoded chunks.
470450
*/
471451
export const incrementalFieldEncoder: FieldEncoder = {
472452
encodeField(
@@ -475,12 +455,13 @@ export const incrementalFieldEncoder: FieldEncoder = {
475455
outputBuffer: BufferFormat,
476456
): void {
477457
assert(
478-
context.shouldEncodeIncrementally,
479-
0xc2a /* incremental encoding must be enabled to use IncrementalFieldShape */,
458+
context.incrementalEncoder !== undefined,
459+
"incremental encoder must be defined to use incrementalFieldEncoder",
480460
);
481461

482-
const chunkReferenceIds = context.encodeIncrementalField(cursor, (chunk: TreeChunk) =>
483-
IncrementalChunkShape.encodeChunk(chunk, context),
462+
const chunkReferenceIds = context.incrementalEncoder.encodeIncrementalField(
463+
cursor,
464+
(chunk: TreeChunk) => compressedEncode([chunk.cursor()], context),
484465
);
485466
outputBuffer.push(chunkReferenceIds);
486467
},
@@ -540,7 +521,12 @@ export class EncoderContext implements NodeEncodeBuilder, FieldEncodeBuilder {
540521
private readonly fieldEncoderFromPolicy: FieldEncoderPolicy,
541522
public readonly fieldShapes: ReadonlyMap<FieldKindIdentifier, FlexFieldKind>,
542523
public readonly idCompressor: IIdCompressor,
543-
private readonly incrementalEncoder: IncrementalEncoder | undefined,
524+
/**
525+
* To be used to encode incremental chunks, if any.
526+
* @remarks
527+
* See {@link IncrementalEncoder} for more information.
528+
*/
529+
public readonly incrementalEncoder: IncrementalEncoder | undefined,
544530
) {}
545531

546532
public nodeEncoderFromSchema(schemaName: TreeNodeSchemaIdentifier): NodeEncoder {
@@ -556,30 +542,6 @@ export class EncoderContext implements NodeEncodeBuilder, FieldEncodeBuilder {
556542
public nestedArrayEncoder(inner: NodeEncoder): NestedArrayEncoder {
557543
return getOrCreate(this.nestedArrayEncoders, inner, () => new NestedArrayEncoder(inner));
558544
}
559-
560-
public get shouldEncodeIncrementally(): boolean {
561-
return this.incrementalEncoder !== undefined;
562-
}
563-
564-
/**
565-
* {@link IncrementalEncoder.encodeIncrementalField}
566-
*/
567-
public encodeIncrementalField(
568-
cursor: ITreeCursorSynchronous,
569-
encoder: (chunk: TreeChunk) => BufferFormat,
570-
): ChunkReferenceId[] {
571-
assert(
572-
this.incrementalEncoder !== undefined,
573-
0xc2b /* incremental encoding must be enabled */,
574-
);
575-
// Encoder for the chunk that encodes its data using the provided encoder function and
576-
// updates the encoded data for shapes and identifiers.
577-
const chunkEncoder = (chunk: TreeChunk): EncodedFieldBatch => {
578-
const chunkOutputBuffer = encoder(chunk);
579-
return updateShapesAndIdentifiersEncoding(version, [chunkOutputBuffer]);
580-
};
581-
return this.incrementalEncoder.encodeIncrementalField(cursor, chunkEncoder);
582-
}
583545
}
584546

585547
export interface NodeEncodeBuilder {

packages/dds/tree/src/feature-libraries/chunked-forest/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export {
1212
type IChunker,
1313
chunkFieldSingle,
1414
chunkField,
15+
combineChunks,
1516
} from "./chunkTree.js";
1617
export { buildChunkedForest } from "./chunkedForest.js";
1718
export {

packages/dds/tree/src/feature-libraries/flex-tree/lazyField.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import {
5353
import { LazyEntity } from "./lazyEntity.js";
5454
import { type LazyTreeNode, getOrCreateHydratedFlexTreeNode } from "./lazyNode.js";
5555
import { indexForAt, treeStatusFromAnchorCache } from "./utilities.js";
56+
import { combineChunks } from "../chunked-forest/index.js";
5657

5758
/**
5859
* Reuse fields.
@@ -247,7 +248,8 @@ export abstract class LazyField extends LazyEntity<FieldAnchor> implements FlexT
247248
protected getEditor(): IDefaultEditBuilder<ITreeCursorSynchronous> {
248249
return new MappedEditBuilder(
249250
this.context.checkout.editor,
250-
(cursor: ITreeCursorSynchronous) => this.context.checkout.forest.chunkField(cursor),
251+
(cursor: ITreeCursorSynchronous) =>
252+
combineChunks(this.context.checkout.forest.chunkField(cursor)),
251253
);
252254
}
253255
}

packages/dds/tree/src/feature-libraries/forest-summary/incrementalSummaryBuilder.ts

Lines changed: 64 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ export class ForestIncrementalSummaryBuilder implements IncrementalEncoderDecode
288288

289289
public constructor(
290290
private readonly enableIncrementalSummary: boolean,
291-
private readonly getChunkAtCursor: (cursor: ITreeCursorSynchronous) => TreeChunk,
291+
private readonly getChunkAtCursor: (cursor: ITreeCursorSynchronous) => TreeChunk[],
292292
public readonly shouldEncodeIncrementally: IncrementalEncodingPolicy,
293293
private readonly initialSequenceNumber: number,
294294
) {}
@@ -392,80 +392,74 @@ export class ForestIncrementalSummaryBuilder implements IncrementalEncoderDecode
392392
// Validate that a summary is currently being tracked and that the tracked summary properties are defined.
393393
validateTrackingSummary(this.forestSummaryState, this.trackedSummaryProperties);
394394

395-
if (cursor.getFieldLength() === 0) {
396-
return [];
397-
}
398-
399-
let chunkReferenceId: ChunkReferenceId;
400-
let chunkProperties: ChunkSummaryProperties;
395+
const chunkReferenceIds: ChunkReferenceId[] = [];
396+
const chunks = this.getChunkAtCursor(cursor);
397+
for (const chunk of chunks) {
398+
let chunkProperties: ChunkSummaryProperties;
399+
400+
// Try and get the properties of the chunk from the latest successful summary.
401+
// If it exists and the summary is not a full tree, use the properties to generate a summary handle.
402+
// If it does not exist, encode the chunk and generate new properties for it.
403+
const previousChunkProperties = tryGetFromNestedMap(
404+
this.chunkTrackingPropertiesMap,
405+
this.latestSummarySequenceNumber,
406+
chunk,
407+
);
408+
if (previousChunkProperties !== undefined && !this.trackedSummaryProperties.fullTree) {
409+
chunkProperties = previousChunkProperties;
410+
this.trackedSummaryProperties.parentSummaryBuilder.addHandle(
411+
`${chunkProperties.referenceId}`,
412+
SummaryType.Tree,
413+
`${this.trackedSummaryProperties.latestSummaryBasePath}/${chunkProperties.summaryPath}`,
414+
);
415+
} else {
416+
// Generate a new reference ID for the chunk.
417+
const newReferenceId: ChunkReferenceId = brand(this.nextReferenceId++);
418+
419+
// Add the reference ID of this chunk to the chunk summary path and use the path as the summary path
420+
// for the chunk in its summary properties.
421+
// This is done before encoding the chunk so that the summary path is updated correctly when encoding
422+
// any incremental chunks that are under this chunk.
423+
this.trackedSummaryProperties.chunkSummaryPath.push(newReferenceId);
424+
425+
chunkProperties = {
426+
referenceId: newReferenceId,
427+
summaryPath: this.trackedSummaryProperties.chunkSummaryPath.join("/"),
428+
};
429+
430+
const parentSummaryBuilder = this.trackedSummaryProperties.parentSummaryBuilder;
431+
// Create a new summary builder for this chunk to build its summary tree which will be stored in the
432+
// parent's summary tree under its reference ID.
433+
// Before encoding the chunk, set the parent summary builder to this chunk's summary builder so that
434+
// any incremental chunks in the subtree of this chunk will use that as their parent summary builder.
435+
const chunkSummaryBuilder = new SummaryTreeBuilder();
436+
this.trackedSummaryProperties.parentSummaryBuilder = chunkSummaryBuilder;
437+
chunkSummaryBuilder.addBlob(
438+
chunkContentsBlobKey,
439+
this.trackedSummaryProperties.stringify(chunkEncoder(chunk)),
440+
);
401441

402-
// An additional ref-count must be added to these chunks representing a reference from the summary tree to the chunk.
403-
// This will ensure that the blob's content never change and thus the reference stays accurate: instead of modifying it,
404-
// a copy will be created without the blob reference.
405-
// The "getChunkAtCursor" adds this additional ref-count.
406-
const chunk = this.getChunkAtCursor(cursor);
442+
// Add this chunk's summary tree to the parent's summary tree. The summary tree contains its encoded
443+
// contents and the summary trees of any incremental chunks under it.
444+
parentSummaryBuilder.addWithStats(
445+
`${newReferenceId}`,
446+
chunkSummaryBuilder.getSummaryTree(),
447+
);
407448

408-
// Try and get the properties of the chunk from the latest successful summary.
409-
// If it exists and the summary is not a full tree, use the properties to generate a summary handle.
410-
// If it does not exist, encode the chunk and generate new properties for it.
411-
const previousChunkProperties = tryGetFromNestedMap(
412-
this.chunkTrackingPropertiesMap,
413-
this.latestSummarySequenceNumber,
414-
chunk,
415-
);
416-
if (previousChunkProperties !== undefined && !this.trackedSummaryProperties.fullTree) {
417-
chunkProperties = previousChunkProperties;
418-
chunkReferenceId = previousChunkProperties.referenceId;
419-
this.trackedSummaryProperties.parentSummaryBuilder.addHandle(
420-
`${chunkReferenceId}`,
421-
SummaryType.Tree,
422-
`${this.trackedSummaryProperties.latestSummaryBasePath}/${previousChunkProperties.summaryPath}`,
423-
);
424-
} else {
425-
// Generate a new reference ID for the chunk.
426-
chunkReferenceId = brand(this.nextReferenceId++);
427-
// Add the reference ID of this chunk to the chunk summary path and use the path as the summary path
428-
// for the chunk in its summary properties.
429-
// This is done before encoding the chunk so that the summary path is updated correctly when encoding
430-
// any incremental chunks that are under this chunk.
431-
this.trackedSummaryProperties.chunkSummaryPath.push(chunkReferenceId);
432-
433-
chunkProperties = {
434-
referenceId: chunkReferenceId,
435-
summaryPath: this.trackedSummaryProperties.chunkSummaryPath.join("/"),
436-
};
437-
438-
const parentSummaryBuilder = this.trackedSummaryProperties.parentSummaryBuilder;
439-
// Create a new summary builder for this chunk to build its summary tree which will be stored in the
440-
// parent's summary tree under its reference ID.
441-
// Before encoding the chunk, set the parent summary builder to this chunk's summary builder so that
442-
// any incremental chunks in the subtree of this chunk will use that as their parent summary builder.
443-
const chunkSummaryBuilder = new SummaryTreeBuilder();
444-
this.trackedSummaryProperties.parentSummaryBuilder = chunkSummaryBuilder;
445-
chunkSummaryBuilder.addBlob(
446-
chunkContentsBlobKey,
447-
this.trackedSummaryProperties.stringify(chunkEncoder(chunk)),
448-
);
449+
// Restore the parent summary builder and chunk summary path.
450+
this.trackedSummaryProperties.parentSummaryBuilder = parentSummaryBuilder;
451+
this.trackedSummaryProperties.chunkSummaryPath.pop();
452+
}
449453

450-
// Add this chunk's summary tree to the parent's summary tree. The summary tree contains its encoded
451-
// contents and the summary trees of any incremental chunks under it.
452-
parentSummaryBuilder.addWithStats(
453-
`${chunkReferenceId}`,
454-
chunkSummaryBuilder.getSummaryTree(),
454+
setInNestedMap(
455+
this.chunkTrackingPropertiesMap,
456+
this.trackedSummaryProperties.summarySequenceNumber,
457+
chunk,
458+
chunkProperties,
455459
);
456-
457-
// Restore the parent summary builder and chunk summary path.
458-
this.trackedSummaryProperties.parentSummaryBuilder = parentSummaryBuilder;
459-
this.trackedSummaryProperties.chunkSummaryPath.pop();
460+
chunkReferenceIds.push(chunkProperties.referenceId);
460461
}
461-
462-
setInNestedMap(
463-
this.chunkTrackingPropertiesMap,
464-
this.trackedSummaryProperties.summarySequenceNumber,
465-
chunk,
466-
chunkProperties,
467-
);
468-
return [chunkReferenceId];
462+
return chunkReferenceIds;
469463
}
470464

471465
/**

packages/dds/tree/src/feature-libraries/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ export {
110110
fluidVersionToFieldBatchCodecWriteVersion,
111111
type FieldBatchEncodingContext,
112112
emptyChunk,
113+
combineChunks,
113114
type IncrementalEncodingPolicy,
114115
defaultIncrementalEncodingPolicy,
115116
} from "./chunked-forest/index.js";

packages/dds/tree/src/feature-libraries/object-forest/objectForest.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ import {
4949
type Breakable,
5050
type WithBreakable,
5151
} from "../../util/index.js";
52-
import { chunkFieldSingle, defaultChunkPolicy } from "../chunked-forest/index.js";
52+
import { chunkField, defaultChunkPolicy } from "../chunked-forest/index.js";
5353
import { cursorForMapTreeNode, mapTreeFromCursor } from "../mapTreeCursor.js";
5454
import { type CursorWithNode, SynchronousCursor } from "../treeCursorUtils.js";
5555
import {
@@ -128,8 +128,8 @@ export class ObjectForest implements IEditableForest, WithBreakable {
128128
return new ObjectForest(this.breaker, schema, anchors, this.additionalAsserts, this.roots);
129129
}
130130

131-
public chunkField(cursor: ITreeCursorSynchronous): TreeChunk {
132-
return chunkFieldSingle(cursor, { idCompressor: undefined, policy: defaultChunkPolicy });
131+
public chunkField(cursor: ITreeCursorSynchronous): TreeChunk[] {
132+
return chunkField(cursor, { idCompressor: undefined, policy: defaultChunkPolicy });
133133
}
134134

135135
public forgetAnchor(anchor: Anchor): void {

0 commit comments

Comments
 (0)