run-llama
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/.gitignore‎
Lines changed: 153 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/.gitignore‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/Makefile‎
Lines changed: 17 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/Makefile‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/README.md‎
Lines changed: 34 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/README.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/llama_index/readers/solr/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/llama_index/readers/solr/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llama-index-integrations/readers/llama-index-readers-solr/llama_index/readers/solr/base.py‎
Lines changed: 99 additions & 0 deletions b/‎llama-index-integrations/readers/llama-index-readers-solr/llama_index/readers/solr/base.py‎
Lines changed: 99 additions & 0 deletions
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
@@ -0,0 +1 @@
+# CHANGELOG
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Bloomberg Finance L.P.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
@@ -0,0 +1,34 @@
+# LlamaIndex Readers Integration: Solr
+
+## Overview
+
+Solr Reader retrieves documents through an existing Solr index. These documents can then be used in a downstream LlamaIndex data structure.
+
+### Installation
+
+You can install Solr Reader via pip:
+
+```bash
+pip install llama-index-readers-solr
+```
+
+## Usage
+
+```python
+from llama_index.readers.solr import SolrReader
+
+# Initialize SolrReader with the Solr URL. The Solr URL should include the path
+# to the core (if single node) or collection (if Solr Cloud).
+reader = SolrReader(endpoint="<Endpoint with full solr path>")
+
+# Load data from Solr index
+documents = reader.load_data(
+    query={"q": "*:*", "rows": 10},  # Solr query parameters
+    field="content_t",  # Only results with populated values in this field will be returned
+    metadata_fields=["title_t", "category_s"],
+)
+```
+
+This loader is designed to be used as a way to load data into
+[LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently
+used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent.
@@ -0,0 +1,3 @@
+from llama_index.readers.solr.base import SolrReader
+
+__all__ = ["SolrReader"]
@@ -0,0 +1,99 @@
+"""
+Solr reader over REST api.
+"""
+
+from typing import Any, Optional
+
+import pysolr
+
+from llama_index.core.bridge.pydantic import Field, PrivateAttr
+from llama_index.core.readers.base import BasePydanticReader
+from llama_index.core.schema import Document
+
+
+class SolrReader(BasePydanticReader):
+    """
+    Read documents from a Solr index.
+
+    These documents can then be used in a downstream Llama Index data structure.
+    """
+
+    endpoint: str = Field(description="Full endpoint, including collection info.")
+    _client: Any = PrivateAttr()
+
+    def __init__(
+        self,
+        endpoint: str,
+    ):
+        """Initialize with parameters."""
+        super().__init__(endpoint=endpoint)
+        self._client = pysolr.Solr(endpoint)
+
+    def load_data(
+        self,
+        query: dict[str, Any],
+        field: str,
+        id_field: str = "id",
+        metadata_fields: Optional[list[str]] = None,
+        embedding: Optional[str] = None,
+    ) -> list[Document]:
+        r"""
+        Read data from the Solr index. At least one field argument must be specified.
+
+        Args:
+            query (dict): The Solr query parameters.
+                - "q" is required.
+                - "rows" should be specified or will default to 10 by Solr.
+                - If "fl" is provided, it is respected exactly as given.
+                  If "fl" is NOT provided, a default `fl` is constructed from
+                  {id_field, field, embedding?, metadata_fields?}.
+            field (str): Field in Solr to retrieve as document text.
+            id_field (str): Field in Solr to retrieve as the document identifier. Defaults to "id".
+            metadata_fields (list[str], optional): Fields to include as metadata. Defaults to None.
+            embedding (str, optional): Field to use for embeddings. Defaults to None.
+
+        Raises:
+            ValueError: If the HTTP call to Solr fails.
+
+        Returns:
+            list[Document]: A list of retrieved documents where field is populated.
+
+        """
+        if "q" not in query:
+            raise ValueError("Query parameters must include a 'q' field for the query.")
+
+        fl_default = {}
+        if "fl" not in query:
+            fields = [id_field, field]
+            if embedding:
+                fields.append(embedding)
+            if metadata_fields:
+                fields.extend(metadata_fields)
+            fl_default = {"fl": ",".join(fields)}
+
+        try:
+            query_params = {
+                **query,
+                **fl_default,
+            }
+            results = self._client.search(**query_params)
+        except Exception as e:  # pragma: no cover
+            raise ValueError(f"Failed to query Solr endpoint: {e!s}") from e
+
+        documents: list[Document] = []
+        for doc in results.docs:
+            if field not in doc:
+                continue
+
+            doc_kwargs: dict[str, Any] = {
+                "id_": str(doc[id_field]),
+                "text": doc[field],
+                **({"embedding": doc.get(embedding)} if embedding else {}),
+                "metadata": {
+                    metadata_field: doc[metadata_field]
+                    for metadata_field in (metadata_fields or [])
+                    if metadata_field in doc
+                },
+            }
+            documents.append(Document(**doc_kwargs))
+        return documents
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from llama_index.readers.solr.base import SolrReader`
	`2`	`+`
	`3`	`+__all__ = ["SolrReader"]`