Skip to content

Commit 6fcafb3

Browse files
author
ogbozoyan
committed
fix: fixed possible NPE while splitting document chunks
1 parent 1252216 commit 6fcafb3

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

spring-ai-core/src/main/java/org/springframework/ai/transformer/splitter/TextSplitter.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,12 @@ private List<Document> createDocuments(List<String> texts, List<ContentFormatter
8787
for (String chunk : chunks) {
8888
// only primitive values are in here -
8989
Map<String, Object> metadataCopy = metadata.entrySet()
90-
.stream()
91-
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
90+
.stream()
91+
.filter(e -> e.getKey() != null && e.getValue() != null)
92+
.collect(Collectors.toMap(
93+
Map.Entry::getKey,
94+
Map.Entry::getValue
95+
));
9296
Document newDoc = new Document(chunk, metadataCopy);
9397

9498
if (this.copyContentFormatter) {

spring-ai-core/src/test/java/org/springframework/ai/transformer/splitter/TextSplitterTests.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ protected List<String> splitText(String text) {
4242
List<String> chunks = new ArrayList<>();
4343

4444
chunks.add(text.substring(0, chuckSize));
45-
chunks.add(text.substring(chuckSize, text.length()));
45+
chunks.add(text.substring(chuckSize));
4646

4747
return chunks;
4848
}
@@ -213,4 +213,35 @@ public void pageWithChunkSplit() {
213213
() -> assertThat(splitedDocument.get(3).getMetadata().get("page_number")).isEqualTo(3));
214214
}
215215

216+
@Test
217+
public void testSplitTextWithNullMetadata() {
218+
219+
var contentFormatter = DefaultContentFormatter.defaultConfig();
220+
221+
var doc = new Document("In the end, writing arises when man realizes that memory is not enough.");
222+
223+
doc.getMetadata().put("key1", "value1");
224+
doc.getMetadata().put("key2", null);
225+
226+
doc.setContentFormatter(contentFormatter);
227+
228+
List<Document> chunks = testTextSplitter.apply(List.of(doc));
229+
230+
assertThat(testTextSplitter.isCopyContentFormatter()).isTrue();
231+
232+
assertThat(chunks).hasSize(2);
233+
234+
// Doc chunks:
235+
assertThat(chunks.get(0).getContent()).isEqualTo("In the end, writing arises when man");
236+
assertThat(chunks.get(1).getContent()).isEqualTo(" realizes that memory is not enough.");
237+
238+
// Verify that the same, merged metadata is copied to all chunks.
239+
assertThat(chunks.get(0).getMetadata()).isEqualTo(chunks.get(1).getMetadata());
240+
assertThat(chunks.get(1).getMetadata()).containsKeys("key1");
241+
242+
// Verify that the content formatters are copied from the parents to the chunks.
243+
assertThat(chunks.get(0).getContentFormatter()).isSameAs(contentFormatter);
244+
assertThat(chunks.get(1).getContentFormatter()).isSameAs(contentFormatter);
245+
}
246+
216247
}

0 commit comments

Comments
 (0)