1+ # This YAML configuration file is used to set up and configure the Question Answering RAG template.
2+ # It defines various components such as data sources, language models, embedders, splitters, parsers, and retrievers.
3+ # Each section is configured to specify how the template should process and handle data for generating responses.
4+ # You can learn more about the YAML syntax here: https://pathway.com/developers/templates/configure-yaml
5+
6+
7+
8+ # $sources defines the data sources used to read the data which will be indexed in the RAG.
9+ # You can learn more how to configure data sources here:
10+ # https://pathway.com/developers/templates/yaml-examples/data-sources-examples
11+
112$sources :
13+ # File System connector, reading data locally.
214 - !pw.io.fs.read
315 path : data
416 format : binary
517 with_metadata : true
618
19+ # Uncomment to use the SharePoint connector
720 # - !pw.xpacks.connectors.sharepoint.read
821 # url: $SHAREPOINT_URL
922 # tenant: $SHAREPOINT_TENANT
@@ -14,6 +27,7 @@ $sources:
1427 # with_metadata: true
1528 # refresh_interval: 30
1629
30+ # Uncomment to use the Google Drive connector
1731 # - !pw.io.gdrive.read
1832 # object_id: $DRIVE_ID
1933 # service_user_credentials_file: gdrive_indexer.json
@@ -24,6 +38,13 @@ $sources:
2438 # with_metadata: true
2539 # refresh_interval: 30
2640
41+
42+ # Configures the LLM model settings for generating responses.
43+ # The list of available Pathway LLM wrappers is available here:
44+ # https://pathway.com/developers/api-docs/pathway-xpacks-llm/llms
45+ # You can learn more about those in our documentation:
46+ # https://pathway.com/developers/templates/rag-customization/llm-chats
47+
2748$llm : !pw.xpacks.llm.llms.OpenAIChat
2849 model : " gpt-4o"
2950 retry_strategy : !pw.udfs.ExponentialBackoffRetryStrategy
@@ -32,27 +53,37 @@ $llm: !pw.xpacks.llm.llms.OpenAIChat
3253 temperature : 0
3354 capacity : 8
3455
56+ # Specifies the embedder model for converting text into embeddings.
3557$embedder : !pw.xpacks.llm.embedders.OpenAIEmbedder
3658 model : " text-embedding-ada-002"
3759 cache_strategy : !pw.udfs.DefaultCache
3860
61+ # Defines the splitter settings for dividing text into smaller chunks.
3962$splitter : !pw.xpacks.llm.splitters.TokenCountSplitter
4063 max_tokens : 400
4164
65+ # Configures the parser for processing and extracting information from documents.
4266$parser : !pw.xpacks.llm.parsers.DoclingParser
4367 async_mode : " fully_async"
4468
69+ # Sets up the retriever factory for indexing and retrieving documents.
4570$retriever_factory : !pw.stdlib.indexing.UsearchKnnFactory
4671 reserved_space : 1000
4772 embedder : $embedder
4873 metric : !pw.stdlib.indexing.USearchMetricKind.COS
4974
75+ # Manages the storage and retrieval of documents for the RAG template.
5076$document_store : !pw.xpacks.llm.document_store.DocumentStore
5177 docs : $sources
5278 parser : $parser
5379 splitter : $splitter
5480 retriever_factory : $retriever_factory
5581
82+ # Configures the question-answering component using the RAG approach.
83+ # The component builds a RAG over an index.
84+ # You can interact with obtained RAG using a REST API.
85+ # You can learn more about the available operations here:
86+ # https://pathway.com/developers/templates/rag-customization/rest-api
5687question_answerer : !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer
5788 llm : $llm
5889 indexer : $document_store
@@ -63,11 +94,11 @@ question_answerer: !pw.xpacks.llm.question_answering.BaseRAGQuestionAnswerer
6394 # and `{context}` as a placeholder for context documents.
6495 # prompt_template: "Given these documents: {context}, please answer the question: {query}"
6596
66- # Change host and port by uncommenting these lines
97+ # Change host and port of the webserver by uncommenting these lines
6798# host: "0.0.0.0"
6899# port: $PATHWAY_PORT
69100
70- # Cache configuration
101+ # Activate on-disk caching for UDFs for which `cache_strategy` is set
71102# with_cache: true
72103
73104# If `terminate_on_error` is true then the program will terminate whenever any error is encountered.
0 commit comments