@@ -65,19 +65,18 @@ protected DocumentWriteOperation makeChunkDocument() {
6565 Element chunksElement = doc .createElementNS (chunkConfig .getXmlNamespace (), DEFAULT_CHUNKS_ELEMENT_NAME );
6666 root .appendChild (chunksElement );
6767
68- List <Chunk > chunks = new ArrayList <>();
68+ List <Chunk > addedChunks = new ArrayList <>();
6969 for (int i = 0 ; i < this .maxChunksPerDocument && hasNext (); i ++) {
7070 ChunkInputs chunkInputs = chunkInputsList .get (listIndex );
71- Element classificationResponseNode = chunkInputs .getClassification () != null ?
72- getClassificationResponseElement (chunkInputs .getClassification ()) : null ;
73- addChunk (doc , chunkInputs .getText (), chunksElement , chunks , classificationResponseNode , chunkInputs .getEmbedding (), chunkInputs .getModelName ());
71+ DOMChunk chunk = addChunk (doc , chunkInputs , chunksElement );
72+ addedChunks .add (chunk );
7473 listIndex ++;
7574 }
7675
7776 final String chunkDocumentUri = makeChunkDocumentUri (sourceDocument , "xml" );
7877 return new DocumentAndChunks (
7978 new DocumentWriteOperationImpl (chunkDocumentUri , chunkConfig .getMetadata (), new DOMHandle (doc )),
80- chunks
79+ addedChunks
8180 );
8281 }
8382
@@ -87,16 +86,15 @@ protected DocumentWriteOperation addChunksToSourceDocument() {
8786 Element chunksElement = doc .createElementNS (chunkConfig .getXmlNamespace (), determineChunksElementName (doc ));
8887 doc .getDocumentElement ().appendChild (chunksElement );
8988
90- List <Chunk > chunks = new ArrayList <>();
89+ List <Chunk > addedChunks = new ArrayList <>();
9190 for (ChunkInputs chunkInputs : chunkInputsList ) {
92- Element classificationResponseNode = chunkInputs .getClassification () != null ?
93- getClassificationResponseElement (chunkInputs .getClassification ()) : null ;
94- addChunk (doc , chunkInputs .getText (), chunksElement , chunks , classificationResponseNode , chunkInputs .getEmbedding (), chunkInputs .getModelName ());
91+ DOMChunk chunk = addChunk (doc , chunkInputs , chunksElement );
92+ addedChunks .add (chunk );
9593 }
9694
9795 return new DocumentAndChunks (
9896 new DocumentWriteOperationImpl (sourceDocument .getUri (), sourceDocument .getMetadata (), new DOMHandle (doc )),
99- chunks
97+ addedChunks
10098 );
10199 }
102100
@@ -110,15 +108,16 @@ private Element getClassificationResponseElement(byte[] classificationBytes) {
110108 }
111109 }
112110
113- private void addChunk (Document doc , String textSegment , Element chunksElement , List < Chunk > chunks , Element classificationResponse , float [] embedding , String modelName ) {
111+ private DOMChunk addChunk (Document doc , ChunkInputs chunkInputs , Element chunksElement ) {
114112 Element chunk = doc .createElementNS (chunkConfig .getXmlNamespace (), "chunk" );
115113 chunksElement .appendChild (chunk );
116114
117115 Element text = doc .createElementNS (chunkConfig .getXmlNamespace (), "text" );
118- text .setTextContent (textSegment );
116+ text .setTextContent (chunkInputs . getText () );
119117 chunk .appendChild (text );
120118
121- if (classificationResponse != null ) {
119+ if (chunkInputs .getClassification () != null ) {
120+ Element classificationResponse = getClassificationResponseElement (chunkInputs .getClassification ());
122121 Node classificationNode = doc .createElement ("classification" );
123122 chunk .appendChild (classificationNode );
124123 for (int i = 0 ; i < classificationResponse .getChildNodes ().getLength (); i ++) {
@@ -127,11 +126,21 @@ private void addChunk(Document doc, String textSegment, Element chunksElement, L
127126 }
128127 }
129128
129+ if (chunkInputs .getMetadata () != null ) {
130+ Element metadataElement = doc .createElementNS (chunkConfig .getXmlNamespace (), "chunk-metadata" );
131+ // Re: possibly converting JSON to XML - Copilot recommends using the serialized string, as there's no
132+ // "correct" way for converting JSON to XML, particularly in regard to arrays. If the user wants XML
133+ // documents, they can always e.g. use a REST transform to determine how they want to represent the JSON
134+ // as XML.
135+ metadataElement .setTextContent (chunkInputs .getMetadata ().toString ());
136+ chunk .appendChild (metadataElement );
137+ }
138+
130139 var domChunk = new DOMChunk (doc , chunk , this .xmlChunkConfig , this .xPathFactory );
131- if (embedding != null ) {
132- domChunk .addEmbedding (embedding , modelName );
140+ if (chunkInputs . getEmbedding () != null ) {
141+ domChunk .addEmbedding (chunkInputs . getEmbedding (), chunkInputs . getModelName () );
133142 }
134- chunks . add ( domChunk ) ;
143+ return domChunk ;
135144 }
136145
137146 private String determineChunksElementName (Document doc ) {
0 commit comments