1616
1717package org .springframework .ai .document ;
1818
19- import java .util .ArrayList ;
20- import java .util .Collection ;
19+ import java .util .Collections ;
2120import java .util .HashMap ;
22- import java .util .List ;
2321import java .util .Map ;
2422import java .util .Objects ;
2523
3129import org .springframework .ai .document .id .IdGenerator ;
3230import org .springframework .ai .document .id .RandomIdGenerator ;
3331import org .springframework .ai .model .Media ;
34- import org .springframework .ai .model .MediaContent ;
3532import org .springframework .lang .Nullable ;
3633import org .springframework .util .Assert ;
3734import org .springframework .util .StringUtils ;
3835
3936/**
4037 * A document is a container for the content and metadata of a document. It also contains
4138 * the document's unique ID and an optional embedding.
39+ *
40+ * Either string based text or Media is the content.
4241 */
4342@ JsonIgnoreProperties ({ "contentFormatter" })
44- public class Document implements MediaContent {
43+ public class Document {
4544
4645 public static final ContentFormatter DEFAULT_CONTENT_FORMATTER = DefaultContentFormatter .defaultConfig ();
4746
48- public static final String EMPTY_TEXT = "" ;
49-
5047 /**
5148 * Unique ID
5249 */
5350 private final String id ;
5451
5552 /**
56- * Document content.
53+ * Document string content.
5754 */
58- private final String content ;
55+ private final String text ;
5956
60- private final Collection <Media > media ;
57+ /**
58+ * Document media content
59+ */
60+ private final Media media ;
6161
6262 /**
6363 * Metadata for the document. It should not be nested and values should be restricted
@@ -90,59 +90,63 @@ public Document(@JsonProperty("content") String content) {
9090 this (content , new HashMap <>());
9191 }
9292
93- public Document (String content , Map <String , Object > metadata ) {
94- this (content , metadata , new RandomIdGenerator ());
93+ public Document (String text , Map <String , Object > metadata ) {
94+ this (new RandomIdGenerator (). generateId (), text , null , metadata , null );
9595 }
9696
97- /**
98- * @deprecated Use builder instead: {@link Document#builder()}.
99- */
100- @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
101- public Document (String content , Collection <Media > media , Map <String , Object > metadata ) {
102- this (new RandomIdGenerator ().generateId (content , metadata ), content , media , metadata );
97+ public Document (String id , String text , Map <String , Object > metadata ) {
98+ this (id , text , null , metadata , null );
10399 }
104100
105- /**
106- * @deprecated Use builder instead: {@link Document#builder()}.
107- */
108- @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
109- public Document (String content , Map <String , Object > metadata , IdGenerator idGenerator ) {
110- this (idGenerator .generateId (content , metadata ), content , metadata );
101+ public Document (Media media , Map <String , Object > metadata ) {
102+ this (new RandomIdGenerator ().generateId (), null , media , metadata , null );
111103 }
112104
113- public Document (String id , String content , Map <String , Object > metadata ) {
114- this (id , content , List . of () , metadata );
105+ public Document (String id , Media media , Map <String , Object > metadata ) {
106+ this (id , null , media , metadata , null );
115107 }
116108
117- /**
118- * @deprecated Use builder instead: {@link Document#builder()}.
119- */
120- @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
121- public Document (String id , String content , Collection <Media > media , Map <String , Object > metadata ) {
122- this (id , content , media , metadata , null );
109+
110+ private Document (String id , String text , Media media , Map <String , Object > metadata , @ Nullable Double score ) {
111+ Assert .hasText (id , "id cannot be null or empty" );
112+ Assert .notNull (metadata , "metadata cannot be null" );
113+ Assert .noNullElements (metadata .keySet (), "metadata cannot have null keys" );
114+ Assert .noNullElements (metadata .values (), "metadata cannot have null values" );
115+ if (text == null && media == null ) {
116+ throw new IllegalArgumentException ("need to specify either text or media" );
117+ }
118+ if (text != null && media != null ) {
119+ throw new IllegalArgumentException ("can not specify both text and media" );
120+ }
121+ this .id = id ;
122+ this .text = text ;
123+ this .media = media ;
124+ this .metadata = new HashMap <>(metadata );
125+ this .score = score ;
123126 }
124127
128+
129+
125130 /**
126131 * @deprecated Use builder instead: {@link Document#builder()}.
127132 */
128133 @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
129- public Document (String id , String content , @ Nullable Collection < Media > media ,
130- @ Nullable Map <String , Object > metadata , @ Nullable Double score ) {
134+ public Document (String id , Media media ,
135+ Map <String , Object > metadata , @ Nullable Double score ) {
131136 Assert .hasText (id , "id cannot be null or empty" );
132- Assert .notNull (content , "content cannot be null" );
133137 Assert .notNull (media , "media cannot be null" );
134- Assert .noNullElements (media , "media cannot have null elements" );
135138 Assert .notNull (metadata , "metadata cannot be null" );
136139 Assert .noNullElements (metadata .keySet (), "metadata cannot have null keys" );
137140 Assert .noNullElements (metadata .values (), "metadata cannot have null values" );
138141
139142 this .id = id ;
140- this .content = content ;
141- this .media = media != null ? media : List . of () ;
142- this .metadata = metadata != null ? metadata : new HashMap <>( );
143+ this .text = null ;
144+ this .media = media ;
145+ this .metadata = Collections . unmodifiableMap ( metadata );
143146 this .score = score ;
144147 }
145148
149+
146150 public static Builder builder () {
147151 return new Builder ();
148152 }
@@ -151,13 +155,21 @@ public String getId() {
151155 return this .id ;
152156 }
153157
154- @ Override
158+ @ Deprecated
155159 public String getContent () {
156- return this .content ;
160+ return this .getText () ;
157161 }
158162
159- @ Override
160- public Collection <Media > getMedia () {
163+ public String getText () {
164+ return this .text ;
165+ }
166+
167+ public boolean isText () {
168+ return this .text != null ;
169+ }
170+
171+
172+ public Media getMedia () {
161173 return this .media ;
162174 }
163175
@@ -180,7 +192,7 @@ public String getFormattedContent(ContentFormatter formatter, MetadataMode metad
180192 return formatter .format (this , metadataMode );
181193 }
182194
183- @ Override
195+
184196 public Map <String , Object > getMetadata () {
185197 return this .metadata ;
186198 }
@@ -228,8 +240,8 @@ public void setContentFormatter(ContentFormatter contentFormatter) {
228240
229241 public Builder mutate () {
230242 return new Builder ().id (this .id )
231- .content (this .content )
232- .media (new ArrayList <>( this .media ) )
243+ .content (this .text )
244+ .media (this .media )
233245 .metadata (this .metadata )
234246 .score (this .score );
235247 }
@@ -240,29 +252,29 @@ public boolean equals(Object o) {
240252 return false ;
241253 }
242254 Document document = (Document ) o ;
243- return Objects .equals (this .id , document .id ) && Objects .equals (this .content , document .content )
255+ return Objects .equals (this .id , document .id ) && Objects .equals (this .text , document .text )
244256 && Objects .equals (this .media , document .media ) && Objects .equals (this .metadata , document .metadata )
245257 && Objects .equals (this .score , document .score );
246258 }
247259
248260 @ Override
249261 public int hashCode () {
250- return Objects .hash (this .id , this .content , this .media , this .metadata , this .score );
262+ return Objects .hash (this .id , this .text , this .media , this .metadata , this .score );
251263 }
252264
253265 @ Override
254266 public String toString () {
255- return "Document{" + "id='" + this .id + '\'' + ", content='" + this .content + '\'' + ", media=" + this . media
267+ return "Document{" + "id='" + this .id + '\'' + ", content='" + this .text + '\''
256268 + ", metadata=" + this .metadata + ", score=" + this .score + '}' ;
257269 }
258270
259271 public static class Builder {
260272
261273 private String id ;
262274
263- private String content = Document . EMPTY_TEXT ;
275+ private String text ;
264276
265- private List < Media > media = new ArrayList <>() ;
277+ private Media media ;
266278
267279 private Map <String , Object > metadata = new HashMap <>();
268280
@@ -285,23 +297,22 @@ public Builder id(String id) {
285297 return this ;
286298 }
287299
288- public Builder content (String content ) {
289- this .content = content ;
300+ public Builder text (String text ) {
301+ this .text = text ;
290302 return this ;
291303 }
292304
293- public Builder media (List <Media > media ) {
294- Assert .notNull (media , "media cannot be null" );
295- this .media .addAll (media );
305+ public Builder content (String text ) {
306+ this .text = text ;
296307 return this ;
297308 }
298309
299- public Builder media (Media ... media ) {
300- Assert .noNullElements (media , "media cannot contain null elements" );
301- this .media .addAll (List .of (media ));
310+ public Builder media (Media media ) {
311+ this .media = media ;
302312 return this ;
303313 }
304314
315+
305316 public Builder metadata (Map <String , Object > metadata ) {
306317 this .metadata = metadata ;
307318 return this ;
@@ -337,10 +348,6 @@ public Builder withContent(String content) {
337348 return content (content );
338349 }
339350
340- @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
341- public Builder withMedia (List <Media > media ) {
342- return media (media );
343- }
344351
345352 @ Deprecated (since = "1.0.0-M5" , forRemoval = true )
346353 public Builder withMedia (Media media ) {
@@ -359,9 +366,9 @@ public Builder withMetadata(String key, Object value) {
359366
360367 public Document build () {
361368 if (!StringUtils .hasText (this .id )) {
362- this .id = this .idGenerator .generateId (this .content , this .metadata );
369+ this .id = this .idGenerator .generateId (this .text , this .metadata ); // TODO Review if metadata should be included
363370 }
364- var document = new Document (this .id , this .content , this .media , this .metadata , this .score );
371+ var document = new Document (this .id , this .text , this .media , this .metadata , this .score );
365372 document .setEmbedding (this .embedding );
366373 return document ;
367374 }
0 commit comments