3232import jakarta .json .stream .JsonGenerator ;
3333import java .lang .Integer ;
3434import java .lang .String ;
35+ import java .util .List ;
3536import java .util .Objects ;
3637import java .util .function .Function ;
3738import javax .annotation .Nullable ;
6364public class ChunkingSettings implements JsonpSerializable {
6465 private final String strategy ;
6566
67+ private final String separatorGroup ;
68+
69+ private final List <String > separators ;
70+
6671 private final int maxChunkSize ;
6772
6873 @ Nullable
@@ -76,6 +81,8 @@ public class ChunkingSettings implements JsonpSerializable {
7681 private ChunkingSettings (Builder builder ) {
7782
7883 this .strategy = ApiTypeHelper .requireNonNull (builder .strategy , this , "strategy" );
84+ this .separatorGroup = ApiTypeHelper .requireNonNull (builder .separatorGroup , this , "separatorGroup" );
85+ this .separators = ApiTypeHelper .unmodifiableRequired (builder .separators , this , "separators" );
7986 this .maxChunkSize = ApiTypeHelper .requireNonNull (builder .maxChunkSize , this , "maxChunkSize" , 0 );
8087 this .overlap = builder .overlap ;
8188 this .sentenceOverlap = builder .sentenceOverlap ;
@@ -87,14 +94,60 @@ public static ChunkingSettings of(Function<Builder, ObjectBuilder<ChunkingSettin
8794 }
8895
8996 /**
90- * Required - The chunking strategy: <code>sentence</code> or <code>word</code>.
97+ * Required - The chunking strategy: <code>sentence</code>, <code>word</code>,
98+ * <code>none</code> or <code>recursive</code>.
99+ * <ul>
100+ * <li>If <code>strategy</code> is set to <code>recursive</code>, you must also
101+ * specify:</li>
102+ * </ul>
103+ * <ul>
104+ * <li><code>max_chunk_size</code></li>
105+ * <li>either <code>separators</code> or<code>separator_group</code></li>
106+ * </ul>
107+ * <p>
108+ * Learn more about different chunking strategies in the linked documentation.
91109 * <p>
92110 * API name: {@code strategy}
93111 */
94112 public final String strategy () {
95113 return this .strategy ;
96114 }
97115
116+ /**
117+ * Required - This parameter is only applicable when using the
118+ * <code>recursive</code> chunking strategy.
119+ * <p>
120+ * Sets a predefined list of separators in the saved chunking settings based on
121+ * the selected text type. Values can be <code>markdown</code> or
122+ * <code>plaintext</code>.
123+ * <p>
124+ * Using this parameter is an alternative to manually specifying a custom
125+ * <code>separators</code> list.
126+ * <p>
127+ * API name: {@code separator_group}
128+ */
129+ public final String separatorGroup () {
130+ return this .separatorGroup ;
131+ }
132+
133+ /**
134+ * Required - A list of strings used as possible split points when chunking text
135+ * with the <code>recursive</code> strategy.
136+ * <p>
137+ * Each string can be a plain string or a regular expression (regex) pattern.
138+ * The system tries each separator in order to split the text, starting from the
139+ * first item in the list.
140+ * <p>
141+ * After splitting, it attempts to recombine smaller pieces into larger chunks
142+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
143+ * number of chunks generated.
144+ * <p>
145+ * API name: {@code separators}
146+ */
147+ public final List <String > separators () {
148+ return this .separators ;
149+ }
150+
98151 /**
99152 * Required - The maximum size of a chunk in words. This value cannot be higher
100153 * than <code>300</code> or lower than <code>20</code> (for
@@ -145,6 +198,19 @@ protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {
145198 generator .writeKey ("strategy" );
146199 generator .write (this .strategy );
147200
201+ generator .writeKey ("separator_group" );
202+ generator .write (this .separatorGroup );
203+
204+ if (ApiTypeHelper .isDefined (this .separators )) {
205+ generator .writeKey ("separators" );
206+ generator .writeStartArray ();
207+ for (String item0 : this .separators ) {
208+ generator .write (item0 );
209+
210+ }
211+ generator .writeEnd ();
212+
213+ }
148214 generator .writeKey ("max_chunk_size" );
149215 generator .write (this .maxChunkSize );
150216
@@ -175,6 +241,10 @@ public String toString() {
175241 public static class Builder extends WithJsonObjectBuilderBase <Builder > implements ObjectBuilder <ChunkingSettings > {
176242 private String strategy ;
177243
244+ private String separatorGroup ;
245+
246+ private List <String > separators ;
247+
178248 private Integer maxChunkSize ;
179249
180250 @ Nullable
@@ -184,7 +254,18 @@ public static class Builder extends WithJsonObjectBuilderBase<Builder> implement
184254 private Integer sentenceOverlap ;
185255
186256 /**
187- * Required - The chunking strategy: <code>sentence</code> or <code>word</code>.
257+ * Required - The chunking strategy: <code>sentence</code>, <code>word</code>,
258+ * <code>none</code> or <code>recursive</code>.
259+ * <ul>
260+ * <li>If <code>strategy</code> is set to <code>recursive</code>, you must also
261+ * specify:</li>
262+ * </ul>
263+ * <ul>
264+ * <li><code>max_chunk_size</code></li>
265+ * <li>either <code>separators</code> or<code>separator_group</code></li>
266+ * </ul>
267+ * <p>
268+ * Learn more about different chunking strategies in the linked documentation.
188269 * <p>
189270 * API name: {@code strategy}
190271 */
@@ -193,6 +274,66 @@ public final Builder strategy(String value) {
193274 return this ;
194275 }
195276
277+ /**
278+ * Required - This parameter is only applicable when using the
279+ * <code>recursive</code> chunking strategy.
280+ * <p>
281+ * Sets a predefined list of separators in the saved chunking settings based on
282+ * the selected text type. Values can be <code>markdown</code> or
283+ * <code>plaintext</code>.
284+ * <p>
285+ * Using this parameter is an alternative to manually specifying a custom
286+ * <code>separators</code> list.
287+ * <p>
288+ * API name: {@code separator_group}
289+ */
290+ public final Builder separatorGroup (String value ) {
291+ this .separatorGroup = value ;
292+ return this ;
293+ }
294+
295+ /**
296+ * Required - A list of strings used as possible split points when chunking text
297+ * with the <code>recursive</code> strategy.
298+ * <p>
299+ * Each string can be a plain string or a regular expression (regex) pattern.
300+ * The system tries each separator in order to split the text, starting from the
301+ * first item in the list.
302+ * <p>
303+ * After splitting, it attempts to recombine smaller pieces into larger chunks
304+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
305+ * number of chunks generated.
306+ * <p>
307+ * API name: {@code separators}
308+ * <p>
309+ * Adds all elements of <code>list</code> to <code>separators</code>.
310+ */
311+ public final Builder separators (List <String > list ) {
312+ this .separators = _listAddAll (this .separators , list );
313+ return this ;
314+ }
315+
316+ /**
317+ * Required - A list of strings used as possible split points when chunking text
318+ * with the <code>recursive</code> strategy.
319+ * <p>
320+ * Each string can be a plain string or a regular expression (regex) pattern.
321+ * The system tries each separator in order to split the text, starting from the
322+ * first item in the list.
323+ * <p>
324+ * After splitting, it attempts to recombine smaller pieces into larger chunks
325+ * that stay within the <code>max_chunk_size</code> limit, to reduce the total
326+ * number of chunks generated.
327+ * <p>
328+ * API name: {@code separators}
329+ * <p>
330+ * Adds one or more values to <code>separators</code>.
331+ */
332+ public final Builder separators (String value , String ... values ) {
333+ this .separators = _listAdd (this .separators , value , values );
334+ return this ;
335+ }
336+
196337 /**
197338 * Required - The maximum size of a chunk in words. This value cannot be higher
198339 * than <code>300</code> or lower than <code>20</code> (for
@@ -259,6 +400,9 @@ public ChunkingSettings build() {
259400 protected static void setupChunkingSettingsDeserializer (ObjectDeserializer <ChunkingSettings .Builder > op ) {
260401
261402 op .add (Builder ::strategy , JsonpDeserializer .stringDeserializer (), "strategy" );
403+ op .add (Builder ::separatorGroup , JsonpDeserializer .stringDeserializer (), "separator_group" );
404+ op .add (Builder ::separators , JsonpDeserializer .arrayDeserializer (JsonpDeserializer .stringDeserializer ()),
405+ "separators" );
262406 op .add (Builder ::maxChunkSize , JsonpDeserializer .integerDeserializer (), "max_chunk_size" );
263407 op .add (Builder ::overlap , JsonpDeserializer .integerDeserializer (), "overlap" );
264408 op .add (Builder ::sentenceOverlap , JsonpDeserializer .integerDeserializer (), "sentence_overlap" );
0 commit comments