Skip to content

Commit 8e265df

Browse files
update docs with newer phrasing
1 parent b7735a2 commit 8e265df

File tree

1 file changed

+47
-5
lines changed

1 file changed

+47
-5
lines changed

src/settings.rs

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ pub struct HuggingFaceEmbedderSettings {
104104
/// ```
105105
#[serde(skip_serializing_if = "Option::is_none")]
106106
pub document_template: Option<String>,
107+
/// The maximum size of a rendered document template.
108+
//
109+
// Longer texts are truncated to fit the configured limit.
110+
/// Default: `400`
111+
#[serde(skip_serializing_if = "Option::is_none")]
112+
pub document_template_max_bytes: Option<usize>,
107113
}
108114

109115
/// Settings for configuring [OpenAI](https://openai.com/) embedders
@@ -126,14 +132,18 @@ pub struct HuggingFaceEmbedderSettings {
126132
#[serde(rename_all = "camelCase")]
127133
pub struct OpenAIEmbedderSettings {
128134
/// API key used to authorize against OpenAI.
135+
///
129136
/// [Generate an API key](https://platform.openai.com/api-keys) from your OpenAI account.
130137
/// Use [tier 2 keys](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-two) or above for optimal performance.
131138
pub api_key: String,
132139
/// The openapi model name
133140
/// Default: `text-embedding-3-small`
134141
#[serde(skip_serializing_if = "Option::is_none")]
135142
pub model: Option<String>,
136-
/// Defaults to the default for said model name
143+
/// Number of dimensions in the chosen model.
144+
///
145+
/// If not supplied, Meilisearch tries to infer this value.
146+
/// In most cases, dimensions should be the exact same value of your chosen model
137147
#[serde(skip_serializing_if = "Option::is_none")]
138148
pub dimensions: Option<usize>,
139149
/// Use it to customize the data you send to the embedder. It is highly recommended you configure a custom template for your documents.
@@ -155,6 +165,12 @@ pub struct OpenAIEmbedderSettings {
155165
/// ```
156166
#[serde(skip_serializing_if = "Option::is_none")]
157167
pub document_template: Option<String>,
168+
/// The maximum size of a rendered document template.
169+
//
170+
// Longer texts are truncated to fit the configured limit.
171+
/// Default: `400`
172+
#[serde(skip_serializing_if = "Option::is_none")]
173+
pub document_template_max_bytes: Option<usize>,
158174
}
159175

160176
/// Settings for configuring [Ollama](https://ollama.com/) embedders
@@ -215,6 +231,12 @@ pub struct OllamaEmbedderSettings {
215231
/// ```
216232
#[serde(skip_serializing_if = "Option::is_none")]
217233
pub document_template: Option<String>,
234+
/// The maximum size of a rendered document template.
235+
//
236+
// Longer texts are truncated to fit the configured limit.
237+
/// Default: `400`
238+
#[serde(skip_serializing_if = "Option::is_none")]
239+
pub document_template_max_bytes: Option<usize>,
218240
}
219241

220242
/// Settings for configuring generic [REST](https://en.wikipedia.org/wiki/REST) embedders
@@ -256,17 +278,22 @@ pub struct OllamaEmbedderSettings {
256278
#[serde(rename_all = "camelCase")]
257279
pub struct GenericRestEmbedderSettings {
258280
/// Mandatory, full URL to the embedding endpoint
281+
///
259282
/// Must be parseable as a URL.
260283
/// If not specified, [Meilisearch](https://www.meilisearch.com/) (**not the sdk you are currently using**) will try to fetch the `MEILI_OLLAMA_URL` environment variable
261284
/// Example: `"http://localhost:12345/api/v1/embed"`
262285
#[serde(skip_serializing_if = "Option::is_none")]
263286
pub url: Option<String>,
264-
/// Optional, passed as Bearer in the Authorization header
287+
/// Authentication token Meilisearch should send with each request to the embedder.
288+
///
289+
/// Is passed as Bearer in the Authorization header
265290
/// Example: `"187HFLDH97CNHN"`
266291
#[serde(skip_serializing_if = "Option::is_none")]
267292
pub api_key: Option<String>,
268-
/// Optional
269-
/// Inferred with a dummy request if missing
293+
/// Number of dimensions in the chosen model.
294+
///
295+
/// If not supplied, Meilisearch tries to infer this value.
296+
/// In most cases, dimensions should be the exact same value of your chosen model
270297
#[serde(skip_serializing_if = "Option::is_none")]
271298
pub dimensions: Option<usize>,
272299
/// Use it to customize the data you send to the embedder. It is highly recommended you configure a custom template for your documents.
@@ -298,6 +325,19 @@ pub struct GenericRestEmbedderSettings {
298325
/// "prompt": "{{text}}"
299326
/// }
300327
/// ```
328+
/// The maximum size of a rendered document template.
329+
//
330+
// Longer texts are truncated to fit the configured limit.
331+
/// Default: `400`
332+
#[serde(skip_serializing_if = "Option::is_none")]
333+
pub document_template_max_bytes: Option<usize>,
334+
/// JSON object with the same structure and data of the request you must send to your rest embedder.
335+
///
336+
/// The field containing the input text Meilisearch should send to the embedder must be replaced with `{{text}}`.
337+
/// Example:
338+
/// ```json
339+
/// {"prompt": "{{text}}"}
340+
/// ```
301341
#[serde(skip_serializing_if = "HashMap::is_empty")]
302342
pub request: HashMap<String, serde_json::Value>,
303343
/// A JSON value that represents a fragment of the response made by the remote embedder to Meilisearch.
@@ -329,7 +369,9 @@ pub struct GenericRestEmbedderSettings {
329369
/// When using a custom embedder, you must vectorize both your documents and user queries.
330370
#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq, Copy)]
331371
pub struct UserProvidedEmbedderSettings {
332-
/// dimensions of your custom embedding
372+
/// Number of dimensions in the user-provided model.
373+
///
374+
/// In most cases, dimensions should be the exact same value of your chosen model
333375
pub dimensions: usize,
334376
}
335377

0 commit comments

Comments
 (0)