@@ -42,7 +42,7 @@ protected List<String> splitText(String text) {
4242 List <String > chunks = new ArrayList <>();
4343
4444 chunks .add (text .substring (0 , chuckSize ));
45- chunks .add (text .substring (chuckSize , text . length () ));
45+ chunks .add (text .substring (chuckSize ));
4646
4747 return chunks ;
4848 }
@@ -213,4 +213,35 @@ public void pageWithChunkSplit() {
213213 () -> assertThat (splitedDocument .get (3 ).getMetadata ().get ("page_number" )).isEqualTo (3 ));
214214 }
215215
216+ @ Test
217+ public void testSplitTextWithNullMetadata () {
218+
219+ var contentFormatter = DefaultContentFormatter .defaultConfig ();
220+
221+ var doc = new Document ("In the end, writing arises when man realizes that memory is not enough." );
222+
223+ doc .getMetadata ().put ("key1" , "value1" );
224+ doc .getMetadata ().put ("key2" , null );
225+
226+ doc .setContentFormatter (contentFormatter );
227+
228+ List <Document > chunks = testTextSplitter .apply (List .of (doc ));
229+
230+ assertThat (testTextSplitter .isCopyContentFormatter ()).isTrue ();
231+
232+ assertThat (chunks ).hasSize (2 );
233+
234+ // Doc chunks:
235+ assertThat (chunks .get (0 ).getContent ()).isEqualTo ("In the end, writing arises when man" );
236+ assertThat (chunks .get (1 ).getContent ()).isEqualTo (" realizes that memory is not enough." );
237+
238+ // Verify that the same, merged metadata is copied to all chunks.
239+ assertThat (chunks .get (0 ).getMetadata ()).isEqualTo (chunks .get (1 ).getMetadata ());
240+ assertThat (chunks .get (1 ).getMetadata ()).containsKeys ("key1" );
241+
242+ // Verify that the content formatters are copied from the parents to the chunks.
243+ assertThat (chunks .get (0 ).getContentFormatter ()).isSameAs (contentFormatter );
244+ assertThat (chunks .get (1 ).getContentFormatter ()).isSameAs (contentFormatter );
245+ }
246+
216247}
0 commit comments