From 612cacc8905e09e74ce126e5f566ddfc18d5098a Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:24:30 +0100 Subject: [PATCH 01/12] fix: add config path to Vale workflow --- .github/workflows/docs-website-vale.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docs-website-vale.yml b/.github/workflows/docs-website-vale.yml index 43e678b941..d963693dc0 100644 --- a/.github/workflows/docs-website-vale.yml +++ b/.github/workflows/docs-website-vale.yml @@ -32,6 +32,7 @@ jobs: - name: Vale uses: errata-ai/vale-action@v2 with: + config: docs-website/.vale.ini files: | docs-website/docs docs-website/versioned_docs From d1b737f2134abe3294b2cbe2b3e8e6e249bc4e73 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:24:47 +0100 Subject: [PATCH 02/12] test: add doc to test Vale linting --- docs-website/docs/test-vale.mdx | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs-website/docs/test-vale.mdx diff --git a/docs-website/docs/test-vale.mdx b/docs-website/docs/test-vale.mdx new file mode 100644 index 0000000000..69550c490e --- /dev/null +++ b/docs-website/docs/test-vale.mdx @@ -0,0 +1 @@ +I think this is just a simple test page. We will see if vale works. From 98541aaa8cee7aba48ea1a621a484e21d76802fb Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:31:53 +0100 Subject: [PATCH 03/12] Update docs-website-vale.yml --- .github/workflows/docs-website-vale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs-website-vale.yml b/.github/workflows/docs-website-vale.yml index d963693dc0..585d45cf65 100644 --- a/.github/workflows/docs-website-vale.yml +++ b/.github/workflows/docs-website-vale.yml @@ -32,7 +32,7 @@ jobs: - name: Vale uses: errata-ai/vale-action@v2 with: - config: docs-website/.vale.ini + vale_flags: "--config=docs-website/.vale.ini" files: | docs-website/docs docs-website/versioned_docs From f7e27111aa2e7cafdfdccebbd9753d84bfe4817c Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:35:38 +0100 Subject: [PATCH 04/12] test single docs folder --- .github/workflows/docs-website-vale.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/docs-website-vale.yml b/.github/workflows/docs-website-vale.yml index 585d45cf65..89b4d8fb37 100644 --- a/.github/workflows/docs-website-vale.yml +++ b/.github/workflows/docs-website-vale.yml @@ -33,9 +33,7 @@ jobs: uses: errata-ai/vale-action@v2 with: vale_flags: "--config=docs-website/.vale.ini" - files: | - docs-website/docs - docs-website/versioned_docs + files: docs-website/docs reporter: github-pr-review fail_on_error: false env: From 32e1025357bab39d74e03a5c549a1730b5976faf Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:47:48 +0100 Subject: [PATCH 05/12] adjust rules --- docs-website/.vale/styles/Google/Latin.yml | 2 ++ .../.vale/styles/MyStyle/Abbreviations.yml | 9 +++++++ .../.vale/styles/MyStyle/AvoidJust.yml | 7 ------ .../.vale/styles/MyStyle/AvoidPlease.yml | 7 ++++++ .../.vale/styles/MyStyle/Branding.yml | 7 +++--- .../.vale/styles/MyStyle/GUITerms.yml | 25 +++++++++++++++++++ docs-website/.vale/styles/MyStyle/Numbers.yml | 14 +++-------- .../styles/MyStyle/ParameterFormatting.yml | 6 ++--- .../styles/MyStyle/QuestionAnswering.yml | 9 +++++++ .../.vale/styles/MyStyle/TheFollowing.yml | 8 ++++++ .../.vale/styles/MyStyle/WeakWords.yml | 4 +++ .../.vale/styles/MyStyle/WordChoice.yml | 24 ++++++++++++++++++ 12 files changed, 97 insertions(+), 25 deletions(-) create mode 100644 docs-website/.vale/styles/MyStyle/Abbreviations.yml delete mode 100644 docs-website/.vale/styles/MyStyle/AvoidJust.yml create mode 100644 docs-website/.vale/styles/MyStyle/AvoidPlease.yml create mode 100644 docs-website/.vale/styles/MyStyle/GUITerms.yml create mode 100644 docs-website/.vale/styles/MyStyle/QuestionAnswering.yml create mode 100644 docs-website/.vale/styles/MyStyle/TheFollowing.yml create mode 100644 docs-website/.vale/styles/MyStyle/WordChoice.yml diff --git a/docs-website/.vale/styles/Google/Latin.yml b/docs-website/.vale/styles/Google/Latin.yml index d91700de3f..d64c184db4 100644 --- a/docs-website/.vale/styles/Google/Latin.yml +++ b/docs-website/.vale/styles/Google/Latin.yml @@ -9,3 +9,5 @@ action: swap: '\b(?:eg|e\.g\.)[\s,]': for example '\b(?:ie|i\.e\.)[\s,]': that is + '\betc\.': and so on + '\bvs\.': versus diff --git a/docs-website/.vale/styles/MyStyle/Abbreviations.yml b/docs-website/.vale/styles/MyStyle/Abbreviations.yml new file mode 100644 index 0000000000..1e4580392a --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/Abbreviations.yml @@ -0,0 +1,9 @@ +extends: substitution +message: "Avoid abbreviation '%s'. Use '%s' instead." +level: warning +ignorecase: true +action: + name: replace +swap: + '\bmisc\b': miscellaneous + '\bdoc\b': document diff --git a/docs-website/.vale/styles/MyStyle/AvoidJust.yml b/docs-website/.vale/styles/MyStyle/AvoidJust.yml deleted file mode 100644 index a4b028e58c..0000000000 --- a/docs-website/.vale/styles/MyStyle/AvoidJust.yml +++ /dev/null @@ -1,7 +0,0 @@ -extends: existence -message: "'just' is often unnecessary. Consider removing it for clarity." -level: suggestion -nonword: true -scope: text -tokens: - - just diff --git a/docs-website/.vale/styles/MyStyle/AvoidPlease.yml b/docs-website/.vale/styles/MyStyle/AvoidPlease.yml new file mode 100644 index 0000000000..1d207b0bd5 --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/AvoidPlease.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Avoid 'please' in instructions—it can make actions seem optional. Use imperative instead." +level: warning +ignorecase: true +scope: text +tokens: + - please diff --git a/docs-website/.vale/styles/MyStyle/Branding.yml b/docs-website/.vale/styles/MyStyle/Branding.yml index 1928f0ec2b..a210c9874a 100644 --- a/docs-website/.vale/styles/MyStyle/Branding.yml +++ b/docs-website/.vale/styles/MyStyle/Branding.yml @@ -1,12 +1,11 @@ extends: substitution message: "'%s' is not the correct branding. Use '%s' instead." level: error -ignorecase: true +ignorecase: false swap: Deepset: deepset + DEEPSET: deepset Huggingface: Hugging Face huggingface: Hugging Face HuggingFace: Hugging Face - Haystack: Haystack # Pass-through to suppress errors from wrong variants - haystack: Haystack - Deepset Platform: deepset AI Platform + HAYSTACK: Haystack diff --git a/docs-website/.vale/styles/MyStyle/GUITerms.yml b/docs-website/.vale/styles/MyStyle/GUITerms.yml new file mode 100644 index 0000000000..75ccbca9c9 --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/GUITerms.yml @@ -0,0 +1,25 @@ +extends: substitution +message: "Use '%s' instead of '%s'." +level: warning +ignorecase: true +action: + name: replace +swap: + drop-down list: list + drop-down: list + dropdown: list + radio button: option + radio buttons: options + text field: field + text fields: fields + text area: field + hyperlink: link + hyperlinks: links + pop-up menu: context menu + popup menu: context menu + push button: button + push buttons: buttons + list box: list + choice box: list + pull-down list: list + scroll bar: scroll diff --git a/docs-website/.vale/styles/MyStyle/Numbers.yml b/docs-website/.vale/styles/MyStyle/Numbers.yml index a9e4a0d3a2..aedad388d0 100644 --- a/docs-website/.vale/styles/MyStyle/Numbers.yml +++ b/docs-website/.vale/styles/MyStyle/Numbers.yml @@ -1,15 +1,7 @@ extends: existence -message: "Spell out numbers zero through nine unless part of time, measurement, or list." -level: warning +message: "Consider spelling out numbers zero through nine (exceptions: time, measurements, lists with larger numbers)." +level: suggestion scope: text nonword: true tokens: - - 1 - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - - 8 - - 9 + - '\b[1-9]\b' diff --git a/docs-website/.vale/styles/MyStyle/ParameterFormatting.yml b/docs-website/.vale/styles/MyStyle/ParameterFormatting.yml index d321203fad..738bb7f7ac 100644 --- a/docs-website/.vale/styles/MyStyle/ParameterFormatting.yml +++ b/docs-website/.vale/styles/MyStyle/ParameterFormatting.yml @@ -1,8 +1,8 @@ extends: existence message: "Wrap parameter, class, or method names like '%s' in backticks." -level: warning +level: suggestion scope: text raw: - - '\b[a-zA-Z_][a-zA-Z0-9_]*\(\)' # Methods like pipeline() - - '\b[A-Z][a-zA-Z0-9]*\b' # Class names like MyClass + - '\b[a-zA-Z_][a-zA-Z0-9_]*\(\)' # Methods like pipeline(), run() + - '\b[a-z]+_[a-z_]+\b' # snake_case like max_length - '\b[a-z]+[A-Z][a-zA-Z0-9]*\b' # camelCase like maxDepth diff --git a/docs-website/.vale/styles/MyStyle/QuestionAnswering.yml b/docs-website/.vale/styles/MyStyle/QuestionAnswering.yml new file mode 100644 index 0000000000..7bf9ce3b5e --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/QuestionAnswering.yml @@ -0,0 +1,9 @@ +extends: substitution +message: "Write '%s' without a hyphen: '%s'." +level: warning +ignorecase: true +action: + name: replace +swap: + question-answering: question answering + document-retrieval: document retrieval diff --git a/docs-website/.vale/styles/MyStyle/TheFollowing.yml b/docs-website/.vale/styles/MyStyle/TheFollowing.yml new file mode 100644 index 0000000000..73b33a5750 --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/TheFollowing.yml @@ -0,0 +1,8 @@ +extends: existence +message: "Avoid '%s'—it's implied by the colon. Rephrase or remove." +level: suggestion +ignorecase: true +scope: text +tokens: + - the following + - as follows diff --git a/docs-website/.vale/styles/MyStyle/WeakWords.yml b/docs-website/.vale/styles/MyStyle/WeakWords.yml index b750e67a1d..bf3c124e4d 100644 --- a/docs-website/.vale/styles/MyStyle/WeakWords.yml +++ b/docs-website/.vale/styles/MyStyle/WeakWords.yml @@ -8,3 +8,7 @@ tokens: - simply - basically - leverage + - very + - really + - extremely + - quite diff --git a/docs-website/.vale/styles/MyStyle/WordChoice.yml b/docs-website/.vale/styles/MyStyle/WordChoice.yml new file mode 100644 index 0000000000..17aa7dbbb5 --- /dev/null +++ b/docs-website/.vale/styles/MyStyle/WordChoice.yml @@ -0,0 +1,24 @@ +extends: substitution +message: "Use '%s' instead of '%s'." +level: warning +ignorecase: true +action: + name: replace +swap: + utilize: use + utilizes: uses + utilizing: using + utilized: used + purchase: buy + purchases: buys + purchasing: buying + purchased: bought + assist: help + assists: helps + assisting: helping + assisted: helped + info: information + leverage: use + leverages: uses + leveraging: using + leveraged: used From 201432fddecbf9632ce55f7a8236b2b789c3e88d Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:49:28 +0100 Subject: [PATCH 06/12] file for testing vale comments --- docs-website/docs/test-vale-rules.mdx | 51 +++++++++++++++++++++++++++ docs-website/docs/test-vale.mdx | 1 - 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 docs-website/docs/test-vale-rules.mdx delete mode 100644 docs-website/docs/test-vale.mdx diff --git a/docs-website/docs/test-vale-rules.mdx b/docs-website/docs/test-vale-rules.mdx new file mode 100644 index 0000000000..2fb156a980 --- /dev/null +++ b/docs-website/docs/test-vale-rules.mdx @@ -0,0 +1,51 @@ +# test vale rules page + +This doc is just a simple test to verify our Vale linter rules are working. + +## testing branding + +We use Deepset and huggingface models in haystack pipelines. + +## testing word choice + +Please utilize this guide to assist you in purchasing the right components. Here's some info about leveraging our tools. + +## testing weak words + +This is basically very simple. You can simply configure it and it will really work extremely well. + +## testing latin abbreviations + +You can use embedders, retrievers, etc. to build pipelines, e.g. for question-answering tasks, i.e. answering user queries. + +## testing gui terms + +Select an option from the drop-down list. Click on the radio button and enter text in the text field. Follow the hyperlink for more info. + +## testing voice and pronouns + +I think this allows you to build pipelines. The user can configure his settings. We will enable you to create great apps. + +## testing the following + +Configure the following parameters: +- Parameter one +- Parameter two + +As follows, we describe the steps: + +## testing numbers and time + +There are 3 steps to complete this in 5 minutes. The meeting is at 14:00. + +## testing formatting + +Use the pipeline() method and set the max_length parameter. Press enter to confirm. + +## testing file extensions + +Save your file as a json or pdf file. + +## testing abbreviations + +Check the misc settings in the doc. diff --git a/docs-website/docs/test-vale.mdx b/docs-website/docs/test-vale.mdx deleted file mode 100644 index 69550c490e..0000000000 --- a/docs-website/docs/test-vale.mdx +++ /dev/null @@ -1 +0,0 @@ -I think this is just a simple test page. We will see if vale works. From 8d1a70c13fffcc59ea5b9faebf2d89f0124a28a4 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:54:47 +0100 Subject: [PATCH 07/12] adjust rules --- docs-website/.vale.ini | 3 +++ docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml | 2 +- docs-website/.vale/styles/MyStyle/FileExtensions.yml | 2 +- docs-website/.vale/styles/MyStyle/Headings.yml | 2 +- docs-website/.vale/styles/MyStyle/KeyboardKeys.yml | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs-website/.vale.ini b/docs-website/.vale.ini index ed71526f60..8ab3f48358 100644 --- a/docs-website/.vale.ini +++ b/docs-website/.vale.ini @@ -5,3 +5,6 @@ MinAlertLevel = suggestion [*.{md,mdx}] BasedOnStyles = Vale, Google, MyStyle + +# Disable Vale's built-in spell checker (flags technical terms like "embedders") +Vale.Spelling = NO diff --git a/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml b/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml index 241a4665b5..eca7e7e6a0 100644 --- a/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml +++ b/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml @@ -1,5 +1,5 @@ extends: existence -message: "Prefer second person (e.g., 'you') instead of '%s'." +message: "Prefer second person ('you') instead of '%s'." level: warning nonword: true scope: text diff --git a/docs-website/.vale/styles/MyStyle/FileExtensions.yml b/docs-website/.vale/styles/MyStyle/FileExtensions.yml index 2ecf250316..6e34dbc192 100644 --- a/docs-website/.vale/styles/MyStyle/FileExtensions.yml +++ b/docs-website/.vale/styles/MyStyle/FileExtensions.yml @@ -1,5 +1,5 @@ extends: substitution -message: "Use uppercase for file extensions (e.g., '%s' → '%s')." +message: "Use uppercase for file extensions: '%s' → '%s'." level: warning ignorecase: false swap: diff --git a/docs-website/.vale/styles/MyStyle/Headings.yml b/docs-website/.vale/styles/MyStyle/Headings.yml index 6abe1b2a8a..8840d1d5ab 100644 --- a/docs-website/.vale/styles/MyStyle/Headings.yml +++ b/docs-website/.vale/styles/MyStyle/Headings.yml @@ -1,5 +1,5 @@ extends: capitalization -message: "Headings and titles should use title case (e.g., 'Create a Workspace in deepset Cloud')." +message: "Headings and titles should use title case." level: warning scope: heading style: AP diff --git a/docs-website/.vale/styles/MyStyle/KeyboardKeys.yml b/docs-website/.vale/styles/MyStyle/KeyboardKeys.yml index 1686014ad0..f1bdcd2ce4 100644 --- a/docs-website/.vale/styles/MyStyle/KeyboardKeys.yml +++ b/docs-website/.vale/styles/MyStyle/KeyboardKeys.yml @@ -1,5 +1,5 @@ extends: existence -message: "Keyboard keys should be capitalized (e.g., 'Delete', 'F2')." +message: "Keyboard keys should be capitalized ('Delete', 'Enter', 'F2')." level: warning scope: text tokens: From f43e0f435fe1c894332779c921f0b9783ae77ad0 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 19:59:39 +0100 Subject: [PATCH 08/12] abbreviation rule swap --- docs-website/.vale/styles/MyStyle/Abbreviations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs-website/.vale/styles/MyStyle/Abbreviations.yml b/docs-website/.vale/styles/MyStyle/Abbreviations.yml index 1e4580392a..795b6397d6 100644 --- a/docs-website/.vale/styles/MyStyle/Abbreviations.yml +++ b/docs-website/.vale/styles/MyStyle/Abbreviations.yml @@ -1,5 +1,5 @@ extends: substitution -message: "Avoid abbreviation '%s'. Use '%s' instead." +message: "Use '%s' instead of the abbreviation '%s'." level: warning ignorecase: true action: From d15b1bf8ca290e8d7e0ace72fa6d957d36c81494 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 20:04:18 +0100 Subject: [PATCH 09/12] simpler test file --- docs-website/docs/test-new-component-page.mdx | 100 ++++++++++++++++++ docs-website/docs/test-vale-rules.mdx | 51 --------- 2 files changed, 100 insertions(+), 51 deletions(-) create mode 100644 docs-website/docs/test-new-component-page.mdx delete mode 100644 docs-website/docs/test-vale-rules.mdx diff --git a/docs-website/docs/test-new-component-page.mdx b/docs-website/docs/test-new-component-page.mdx new file mode 100644 index 0000000000..6f5bb08ace --- /dev/null +++ b/docs-website/docs/test-new-component-page.mdx @@ -0,0 +1,100 @@ +--- +title: "ExampleRetriever" +id: exampleretriever +slug: "/exampleretriever" +description: "A retriever component for fetching documents from an example document store." +--- + +# ExampleRetriever + +A retriever component that allows you to fetch relevant documents from an ExampleDocumentStore. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | After a query embedder, before a reader or generator | +| **Mandatory init variables** | `document_store`: An instance of `ExampleDocumentStore` | +| **Mandatory run variables** | `query_embedding`: A list of floats representing the query vector | +| **Output variables** | `documents`: A list of relevant [`Document`](../../concepts/data-classes/document.mdx) objects | +| **API reference** | [Retrievers](/reference/retrievers-api) | + +
+ +## Overview + +The `ExampleRetriever` enables you to perform semantic search over documents stored in an ExampleDocumentStore. The user can configure various parameters to customize the retrieval behavior, e.g. the number of documents to return. + +This component utilizes vector similarity to find relevant documents. It supports the following distance metrics: + +- Cosine similarity +- Dot product +- Euclidean distance + +Please note that the retriever requires documents to be indexed with embeddings before retrieval can work. + +## Usage + +### On its own + +To use the retriever, the user needs to first initialize an ExampleDocumentStore and index some documents: + +```python +from haystack import Document +from haystack.document_stores import ExampleDocumentStore +from haystack.components.retrievers import ExampleRetriever + +# Initialize the document store +document_store = ExampleDocumentStore() + +# Index some documents +documents = [ + Document(content="haystack is an open-source framework for building AI applications."), + Document(content="You can build question-answering systems with Haystack."), +] +document_store.write_documents(documents) + +# Initialize the retriever +retriever = ExampleRetriever(document_store=document_store, top_k=3) + +# Run retrieval (assuming you have a query embedding) +result = retriever.run(query_embedding=[0.1, 0.2, 0.3]) +print(result["documents"]) +``` + +### In a Pipeline + +The retriever is typically used in a RAG pipeline after an embedder: + +```python +from haystack import Pipeline +from haystack.components.embedders import SentenceTransformersTextEmbedder +from haystack.components.retrievers import ExampleRetriever +from haystack.document_stores import ExampleDocumentStore + +document_store = ExampleDocumentStore() + +pipeline = Pipeline() +pipeline.add_component("embedder", SentenceTransformersTextEmbedder()) +pipeline.add_component("retriever", ExampleRetriever(document_store=document_store)) +pipeline.connect("embedder.embedding", "retriever.query_embedding") + +result = pipeline.run({"embedder": {"text": "What is Haystack?"}}) +``` + +## Configuration + +The retriever accepts the following initialization parameters: + +| Parameter | Type | Default | Description | +| --- | --- | --- | --- | +| `document_store` | `ExampleDocumentStore` | Required | The document store to retrieve from | +| `top_k` | `int` | `10` | Number of documents to return | +| `filters` | `dict` | `None` | Filters to apply during retrieval | + +You can export the configuration to a json file for later use. + +## Related Links + +- [ExampleDocumentStore documentation](../document-stores/exampledocumentstore.mdx) +- [Building a RAG Pipeline](../../tutorials/rag-pipeline.mdx) diff --git a/docs-website/docs/test-vale-rules.mdx b/docs-website/docs/test-vale-rules.mdx deleted file mode 100644 index 2fb156a980..0000000000 --- a/docs-website/docs/test-vale-rules.mdx +++ /dev/null @@ -1,51 +0,0 @@ -# test vale rules page - -This doc is just a simple test to verify our Vale linter rules are working. - -## testing branding - -We use Deepset and huggingface models in haystack pipelines. - -## testing word choice - -Please utilize this guide to assist you in purchasing the right components. Here's some info about leveraging our tools. - -## testing weak words - -This is basically very simple. You can simply configure it and it will really work extremely well. - -## testing latin abbreviations - -You can use embedders, retrievers, etc. to build pipelines, e.g. for question-answering tasks, i.e. answering user queries. - -## testing gui terms - -Select an option from the drop-down list. Click on the radio button and enter text in the text field. Follow the hyperlink for more info. - -## testing voice and pronouns - -I think this allows you to build pipelines. The user can configure his settings. We will enable you to create great apps. - -## testing the following - -Configure the following parameters: -- Parameter one -- Parameter two - -As follows, we describe the steps: - -## testing numbers and time - -There are 3 steps to complete this in 5 minutes. The meeting is at 14:00. - -## testing formatting - -Use the pipeline() method and set the max_length parameter. Press enter to confirm. - -## testing file extensions - -Save your file as a json or pdf file. - -## testing abbreviations - -Check the misc settings in the doc. From 73bb025dc126a3e03193c7900de93065fe77cc8c Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 20:15:05 +0100 Subject: [PATCH 10/12] adjust more rules after review --- docs-website/.vale.ini | 11 +++++++++++ .../.vale/styles/MyStyle/AvoidThirdPerson.yml | 9 ++++----- docs-website/.vale/styles/MyStyle/PassiveVoice.yml | 9 --------- 3 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 docs-website/.vale/styles/MyStyle/PassiveVoice.yml diff --git a/docs-website/.vale.ini b/docs-website/.vale.ini index 8ab3f48358..e615d9ece8 100644 --- a/docs-website/.vale.ini +++ b/docs-website/.vale.ini @@ -8,3 +8,14 @@ BasedOnStyles = Vale, Google, MyStyle # Disable Vale's built-in spell checker (flags technical terms like "embedders") Vale.Spelling = NO + +# Disable Google.Headings - conflicts with MyStyle.Headings (title case vs sentence case) +# and doesn't handle PascalCase component names well +Google.Headings = NO + +# Disable Google.Passive - too noisy for technical documentation where passive voice +# is often appropriate and hard to rephrase +Google.Passive = NO + +# Disable Google.Acronyms - AI/ML acronyms like RAG, LLM are well-known to our audience +Google.Acronyms = NO diff --git a/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml b/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml index eca7e7e6a0..e8b411b81e 100644 --- a/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml +++ b/docs-website/.vale/styles/MyStyle/AvoidThirdPerson.yml @@ -1,14 +1,13 @@ extends: existence message: "Prefer second person ('you') instead of '%s'." level: warning -nonword: true scope: text tokens: - the user - the customer - the client - - he - - she - - his - - her + - '\bhe\b' + - '\bshe\b' + - '\bhis\b' + - '\bher\b' - their system diff --git a/docs-website/.vale/styles/MyStyle/PassiveVoice.yml b/docs-website/.vale/styles/MyStyle/PassiveVoice.yml deleted file mode 100644 index 44a246e6d6..0000000000 --- a/docs-website/.vale/styles/MyStyle/PassiveVoice.yml +++ /dev/null @@ -1,9 +0,0 @@ -extends: existence -message: "Rephrase passive or indirect constructions like '%s' to be more active and direct." -level: suggestion -nonword: true -scope: text -tokens: - - allows you to - - enables you to - - is used to From 1eb1c3a983bba9c0d331913a6f25bf8da03b3054 Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 20:18:30 +0100 Subject: [PATCH 11/12] new testing mdx file --- docs-website/docs/test-component-page.mdx | 64 +++++++++++ docs-website/docs/test-new-component-page.mdx | 100 ------------------ 2 files changed, 64 insertions(+), 100 deletions(-) create mode 100644 docs-website/docs/test-component-page.mdx delete mode 100644 docs-website/docs/test-new-component-page.mdx diff --git a/docs-website/docs/test-component-page.mdx b/docs-website/docs/test-component-page.mdx new file mode 100644 index 0000000000..4f4e629952 --- /dev/null +++ b/docs-website/docs/test-component-page.mdx @@ -0,0 +1,64 @@ +--- +title: "CustomEmbedder" +id: customembedder +slug: "/customembedder" +description: "An embedder component for generating vector representations of text." +--- + +# CustomEmbedder + +An embedder component for generating vector representations of text using custom models. + +
+ +| | | +| --- | --- | +| **Most common position in a pipeline** | Before a retriever | +| **Mandatory init variables** | `model_name`: The name of the embedding model to use | +| **Mandatory run variables** | `text`: The text to embed | +| **Output variables** | `embedding`: A list of floats representing the vector | +| **API reference** | [Embedders](/reference/embedders-api) | + +
+ +## Overview + +`CustomEmbedder` generates dense vector representations of text. The user can configure the model and its parameters during initialization. + +This component supports multiple embedding providers, e.g. OpenAI, Cohere, and local models. + +## Usage + +### On Its Own + +```python +from haystack.components.embedders import CustomEmbedder + +embedder = CustomEmbedder(model_name="sentence-transformers/all-MiniLM-L6-v2") +result = embedder.run(text="What is haystack?") +print(result["embedding"]) +``` + +### In a Pipeline + +Please initialize the embedder before adding it to a pipeline: + +```python +from haystack import Pipeline +from haystack.components.embedders import CustomEmbedder +from haystack.components.retrievers import InMemoryEmbeddingRetriever + +pipeline = Pipeline() +pipeline.add_component("embedder", CustomEmbedder(model_name="all-MiniLM-L6-v2")) +pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=store)) +pipeline.connect("embedder.embedding", "retriever.query_embedding") +``` + +## Configuration + +| Parameter | Type | Default | Description | +| --- | --- | --- | --- | +| `model_name` | `str` | Required | The embedding model to utilize | +| `batch_size` | `int` | `32` | Number of texts to process at once | + +Export your configuration to a json file for reproducibility. diff --git a/docs-website/docs/test-new-component-page.mdx b/docs-website/docs/test-new-component-page.mdx deleted file mode 100644 index 6f5bb08ace..0000000000 --- a/docs-website/docs/test-new-component-page.mdx +++ /dev/null @@ -1,100 +0,0 @@ ---- -title: "ExampleRetriever" -id: exampleretriever -slug: "/exampleretriever" -description: "A retriever component for fetching documents from an example document store." ---- - -# ExampleRetriever - -A retriever component that allows you to fetch relevant documents from an ExampleDocumentStore. - -
- -| | | -| --- | --- | -| **Most common position in a pipeline** | After a query embedder, before a reader or generator | -| **Mandatory init variables** | `document_store`: An instance of `ExampleDocumentStore` | -| **Mandatory run variables** | `query_embedding`: A list of floats representing the query vector | -| **Output variables** | `documents`: A list of relevant [`Document`](../../concepts/data-classes/document.mdx) objects | -| **API reference** | [Retrievers](/reference/retrievers-api) | - -
- -## Overview - -The `ExampleRetriever` enables you to perform semantic search over documents stored in an ExampleDocumentStore. The user can configure various parameters to customize the retrieval behavior, e.g. the number of documents to return. - -This component utilizes vector similarity to find relevant documents. It supports the following distance metrics: - -- Cosine similarity -- Dot product -- Euclidean distance - -Please note that the retriever requires documents to be indexed with embeddings before retrieval can work. - -## Usage - -### On its own - -To use the retriever, the user needs to first initialize an ExampleDocumentStore and index some documents: - -```python -from haystack import Document -from haystack.document_stores import ExampleDocumentStore -from haystack.components.retrievers import ExampleRetriever - -# Initialize the document store -document_store = ExampleDocumentStore() - -# Index some documents -documents = [ - Document(content="haystack is an open-source framework for building AI applications."), - Document(content="You can build question-answering systems with Haystack."), -] -document_store.write_documents(documents) - -# Initialize the retriever -retriever = ExampleRetriever(document_store=document_store, top_k=3) - -# Run retrieval (assuming you have a query embedding) -result = retriever.run(query_embedding=[0.1, 0.2, 0.3]) -print(result["documents"]) -``` - -### In a Pipeline - -The retriever is typically used in a RAG pipeline after an embedder: - -```python -from haystack import Pipeline -from haystack.components.embedders import SentenceTransformersTextEmbedder -from haystack.components.retrievers import ExampleRetriever -from haystack.document_stores import ExampleDocumentStore - -document_store = ExampleDocumentStore() - -pipeline = Pipeline() -pipeline.add_component("embedder", SentenceTransformersTextEmbedder()) -pipeline.add_component("retriever", ExampleRetriever(document_store=document_store)) -pipeline.connect("embedder.embedding", "retriever.query_embedding") - -result = pipeline.run({"embedder": {"text": "What is Haystack?"}}) -``` - -## Configuration - -The retriever accepts the following initialization parameters: - -| Parameter | Type | Default | Description | -| --- | --- | --- | --- | -| `document_store` | `ExampleDocumentStore` | Required | The document store to retrieve from | -| `top_k` | `int` | `10` | Number of documents to return | -| `filters` | `dict` | `None` | Filters to apply during retrieval | - -You can export the configuration to a json file for later use. - -## Related Links - -- [ExampleDocumentStore documentation](../document-stores/exampledocumentstore.mdx) -- [Building a RAG Pipeline](../../tutorials/rag-pipeline.mdx) From baa554b334dd0dbc000de5d05b45b2ee2dae0a7a Mon Sep 17 00:00:00 2001 From: Darja Fokina Date: Tue, 9 Dec 2025 20:28:55 +0100 Subject: [PATCH 12/12] Delete test-component-page.mdx --- docs-website/docs/test-component-page.mdx | 64 ----------------------- 1 file changed, 64 deletions(-) delete mode 100644 docs-website/docs/test-component-page.mdx diff --git a/docs-website/docs/test-component-page.mdx b/docs-website/docs/test-component-page.mdx deleted file mode 100644 index 4f4e629952..0000000000 --- a/docs-website/docs/test-component-page.mdx +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: "CustomEmbedder" -id: customembedder -slug: "/customembedder" -description: "An embedder component for generating vector representations of text." ---- - -# CustomEmbedder - -An embedder component for generating vector representations of text using custom models. - -
- -| | | -| --- | --- | -| **Most common position in a pipeline** | Before a retriever | -| **Mandatory init variables** | `model_name`: The name of the embedding model to use | -| **Mandatory run variables** | `text`: The text to embed | -| **Output variables** | `embedding`: A list of floats representing the vector | -| **API reference** | [Embedders](/reference/embedders-api) | - -
- -## Overview - -`CustomEmbedder` generates dense vector representations of text. The user can configure the model and its parameters during initialization. - -This component supports multiple embedding providers, e.g. OpenAI, Cohere, and local models. - -## Usage - -### On Its Own - -```python -from haystack.components.embedders import CustomEmbedder - -embedder = CustomEmbedder(model_name="sentence-transformers/all-MiniLM-L6-v2") -result = embedder.run(text="What is haystack?") -print(result["embedding"]) -``` - -### In a Pipeline - -Please initialize the embedder before adding it to a pipeline: - -```python -from haystack import Pipeline -from haystack.components.embedders import CustomEmbedder -from haystack.components.retrievers import InMemoryEmbeddingRetriever - -pipeline = Pipeline() -pipeline.add_component("embedder", CustomEmbedder(model_name="all-MiniLM-L6-v2")) -pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=store)) -pipeline.connect("embedder.embedding", "retriever.query_embedding") -``` - -## Configuration - -| Parameter | Type | Default | Description | -| --- | --- | --- | --- | -| `model_name` | `str` | Required | The embedding model to utilize | -| `batch_size` | `int` | `32` | Number of texts to process at once | - -Export your configuration to a json file for reproducibility.