diff --git a/.gitignore b/.gitignore index 0e85fbb786..dfbe771e1e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ .pants.d/ dist/ migration_scripts/ +**data_wiki # IDEs .idea diff --git a/docs/docs/examples/node_postprocessor/REBELRerank.ipynb b/docs/docs/examples/node_postprocessor/REBELRerank.ipynb new file mode 100644 index 0000000000..74aa82cf96 --- /dev/null +++ b/docs/docs/examples/node_postprocessor/REBELRerank.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# REBEL Reranker Demonstration (Van Gogh Wiki)\n", + "\n", + "This demo showcases how to use REBEL to rerank passages. \n", + "\n", + "REBEL proposes a novel multi-criteria chain-of-thought approach to reranking, which uses a query-dependent meta-prompt to find the \"most useful\" passages (beyond just relevance).\n", + "\n", + "It compares query search results from Van Gogh’s wikipedia with just retrieval and retrieval+REBEL.\n", + "\n", + "\n", + "_______________________________\n", + "Paper: [OpenReview](https://openreview.net/pdf?id=bl884pjXhN)\n", + "\n", + "Code: [Experiments](https://github.com/microsoft/REBEL)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "\n", + "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", + "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n", + "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", + "from llama_index.llms.openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "OPENAI_API_KEY=\"sk-\"\n", + "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core import Settings\n", + "\n", + "Settings.llm = OpenAI(temperature=0, model=\"gpt-4o\")\n", + "Settings.chunk_size = 512" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data, Build Index" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import requests\n", + "from llama_index.embeddings.openai import OpenAIEmbedding\n", + "\n", + "# save documents from wikipedia with these title(s):\n", + "wiki_titles = [\n", + " \"Vincent van Gogh\",\n", + "]\n", + "\n", + "# pull documents from wikipedia\n", + "data_path = Path(\"data_wiki\")\n", + "for title in wiki_titles:\n", + " response = requests.get(\n", + " \"https://en.wikipedia.org/w/api.php\",\n", + " params={\n", + " \"action\": \"query\",\n", + " \"format\": \"json\",\n", + " \"titles\": title,\n", + " \"prop\": \"extracts\",\n", + " \"explaintext\": True,\n", + " },\n", + " ).json()\n", + " page = next(iter(response[\"query\"][\"pages\"].values()))\n", + " wiki_text = page[\"extract\"]\n", + "\n", + " if not data_path.exists():\n", + " Path.mkdir(data_path)\n", + "\n", + " with open(data_path / f\"{title}.txt\", \"w\") as fp:\n", + " fp.write(wiki_text)\n", + "\n", + "embed_model = OpenAIEmbedding(model=\"text-embedding-3-large\", api_key=OPENAI_API_KEY)\n", + "\n", + "# load documents\n", + "documents = SimpleDirectoryReader(\"./data_wiki/\").load_data()\n", + "# build index\n", + "index = VectorStoreIndex.from_documents(\n", + " documents, embed_model=embed_model\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n" + ] + } + ], + "source": [ + "from llama_index.core.retrievers import VectorIndexRetriever\n", + "from llama_index.core import QueryBundle\n", + "from llama_index.core.postprocessor import REBELRerank\n", + "\n", + "### You can define any query string you want here ###\n", + "query_str = \"Which date did Paul Gauguin arrive in Arles?\"\n", + "query_bundle = QueryBundle(query_str)\n", + "\n", + "# configure retriever\n", + "retriever = VectorIndexRetriever(\n", + " index=index,\n", + " similarity_top_k=50,\n", + ")\n", + "\n", + "# retrieve nodes\n", + "retrieved_nodes = retriever.retrieve(query_bundle)\n", + "\n", + "# configure reranker\n", + "reranked = REBELRerank(llm=OpenAI(model='gpt-4o'), top_n=10)\n", + "\n", + "# rerank nodes\n", + "reranked_nodes = reranked.postprocess_nodes(retrieved_nodes, query_bundle)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import torch\n", + "from IPython.display import display, HTML\n", + "\n", + "def pretty_print(df):\n", + " return display(HTML(df.to_html().replace(\"\\\\n\", \"
\")))\n", + "\n", + "\n", + "def visualize_nodes(nodes) -> None:\n", + " result_dicts = []\n", + " for node in nodes:\n", + " result_dict = {\"Score\": node.score, \"Text\": node.node.get_text()}\n", + " result_dicts.append(result_dict)\n", + "\n", + " pretty_print(pd.DataFrame(result_dicts))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Top 3 nodes from initial retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ScoreText
00.537058When he visited Saintes-Maries-de-la-Mer in June, he gave lessons to a Zouave second lieutenant – Paul-Eugène Milliet – and painted boats on the sea and the village. MacKnight introduced Van Gogh to Eugène Boch, a Belgian painter who sometimes stayed in Fontvieille, and the two exchanged visits in July.

\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t


==== Gauguin's visit (1888) ====


When Gauguin agreed to visit Arles in 1888, Van Gogh hoped for friendship and to realise his idea of an artists' collective. Van Gogh prepared for Gauguin's arrival by painting four versions of Sunflowers in one week. \"In the hope of living in a studio of our own with Gauguin,\" he wrote in a letter to Theo, \"I'd like to do a decoration for the studio. Nothing but large Sunflowers.\"
When Boch visited again, Van Gogh painted a portrait of him, as well as the study The Poet Against a Starry Sky.
In preparation for Gauguin's visit, Van Gogh bought two beds on advice from the station's postal supervisor Joseph Roulin, whose portrait he painted. On 17 September, he spent his first night in the still sparsely furnished Yellow House. When Gauguin consented to work and live in Arles with him, Van Gogh started to work on the Décoration for the Yellow House, probably the most ambitious effort he ever undertook. He completed two chair paintings: Van Gogh's Chair and Gauguin's Chair.
After much pleading from Van Gogh, Gauguin arrived in Arles on 23 October and, in November, the two painted together. Gauguin depicted Van Gogh in his The Painter of Sunflowers; Van Gogh painted pictures from memory, following Gauguin's suggestion. Among these \"imaginative\" paintings is Memory of the Garden at Etten. Their first joint outdoor venture was at the Alyscamps, when they produced the pendants Les Alyscamps. The single painting Gauguin completed during his visit was his portrait of Van Gogh.
10.532605In April, he was visited by the American artist Dodge MacKnight, who was living nearby at Fontvieille.
On 1 May 1888, Van Gogh signed a lease for four rooms at 2 Place Lamartine, Arles, which he later painted in The Yellow House. The rooms cost 15 francs per month, unfurnished; they had been uninhabited for months. Because the Yellow House had to be furnished before he could fully move in, Van Gogh moved from the Hôtel Carrel to the Café de la Gare on 7 May 1888. He had befriended the Yellow House's proprietors, Joseph and Marie Ginoux, and was able to use it as a studio. Van Gogh wanted a gallery to display his work and started a series of paintings that eventually included Van Gogh's Chair (1888), Bedroom in Arles (1888), The Night Café (1888), Café Terrace at Night (September 1888), Starry Night Over the Rhone (1888), and Still Life: Vase with Twelve Sunflowers (1888), all intended for the decoration for the Yellow House.
Van Gogh wrote that with The Night Café he tried \"to express the idea that the café is a place where one can ruin oneself, go mad, or commit a crime\". When he visited Saintes-Maries-de-la-Mer in June, he gave lessons to a Zouave second lieutenant – Paul-Eugène Milliet – and painted boats on the sea and the village. MacKnight introduced Van Gogh to Eugène Boch, a Belgian painter who sometimes stayed in Fontvieille, and the two exchanged visits in July.

\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t


==== Gauguin's visit (1888) ====


When Gauguin agreed to visit Arles in 1888, Van Gogh hoped for friendship and to realise his idea of an artists' collective. Van Gogh prepared for Gauguin's arrival by painting four versions of Sunflowers in one week. \"In the hope of living in a studio of our own with Gauguin,\" he wrote in a letter to Theo, \"I'd like to do a decoration for the studio. Nothing but large Sunflowers.\"
20.511364In November 1887, Theo and Vincent befriended Paul Gauguin who had just arrived in Paris. Towards the end of the year, Vincent arranged an exhibition alongside Bernard, Anquetin, and probably Toulouse-Lautrec, at the Grand-Bouillon Restaurant du Chalet, 43 avenue de Clichy, Montmartre. In a contemporary account, Bernard wrote that the exhibition was ahead of anything else in Paris. There, Bernard and Anquetin sold their first paintings, and Van Gogh exchanged work with Gauguin. Discussions on art, artists, and their social situations started during this exhibition, continued and expanded to include visitors to the show, like Camille Pissarro and his son Lucien, Signac and Seurat. In February 1888, feeling worn out from life in Paris, Van Gogh left, having painted more than 200 paintings during his two years there. Hours before his departure, accompanied by Theo, he paid his only visit to Seurat in his studio.


=== Artistic breakthrough ===


==== Arles (1888–89) ====

Ill from drink and suffering from smoker's cough, in February 1888, Van Gogh sought refuge in Arles. He seems to have moved with thoughts of founding an art colony. The Danish artist Christian Mourier-Petersen was his companion for two months and at first, Arles appeared exotic to Van Gogh. In a letter, he described it as a foreign country: \"The Zouaves, the brothels, the adorable little Arlésienne going to her First Communion, the priest in his surplice, who looks like a dangerous rhinoceros, the people drinking absinthe, all seem to me creatures from another world.\"
The time in Arles was one of Van Gogh's more prolific periods: he completed 200 paintings and more than 100 drawings and watercolours. He was energised by the local countryside and light; his works from this period are rich in yellow, ultramarine and mauve.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "visualize_nodes(retrieved_nodes[:3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### REBEL\n", + "\n", + "1. We view the reranking prompt REBEL generates for this query\n", + "2. We view the top 3 nodes scored by REBEL" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Inferred Properties:\n", + "\n", + "1. **Historical accuracy (0-5):** Higher if the document provides precise and correct historical details about Paul Gauguin's arrival in Arles. A 5 means it provides exact dates and context, a 0 means it lacks accuracy or is incorrect.\n", + "\n", + "2. **Authoritativeness (0-5):** Higher if the document is sourced from reputable historical texts, art history experts, or well-regarded publications. A 5 might be a citation from a recognized art historian or a reputable museum publication, a 0 might be an unsourced or dubious claim.\n", + "\n", + "3. **Contextual detail (0-5):** Higher if the document provides additional context about the significance of Gauguin's arrival in Arles, such as his relationship with Vincent van Gogh or the impact on his work. A 5 means it offers rich context, a 0 means it provides no additional information.\n", + "\n", + "Scoring Rubric:\n", + "\n", + "- **Relevance (0-10):** A 10 means the document directly addresses the date of Paul Gauguin's arrival in Arles. A 0 means it is off-topic.\n", + "- **Historical accuracy (0-5):** Assign based on the precision and correctness of the historical details.\n", + "- **Authoritativeness (0-5):** Assign based on the credibility and source quality.\n", + "- **Contextual detail (0-5):** Assign based on the richness of additional context provided.\n", + "\n", + "Weighted Composite Score:\n", + "\n", + "Final Score = Relevance + 0.5*(Historical accuracy) + 0.5*(Authoritativeness) + 0.5*(Contextual detail)\n", + "\n", + "Instructions:\n", + "\n", + "After this prompt, you will see:\n", + "\n", + "Document 1: \n", + "Document 2: \n", + "... \n", + "Document N: \n", + "Question: \"Which date did Paul Gauguin arrive in Arles?\"\n", + "\n", + "1. Assign Relevance to each document (0-10). Discard documents with Relevance < 3.\n", + "2. For remaining documents, assign Historical accuracy, Authoritativeness, and Contextual detail (each 0-5).\n", + "3. Compute Final Score as described above.\n", + "4. Sort all remaining documents by Final Score (descending).\n", + "5. If two documents have identical Final Scores, pick consistently, for example by preferring the one with higher Authoritativeness.\n", + "6. If no document remains, output nothing.\n", + "7. Output only: Doc: [number], Relevance: [score] for each selected document, no commentary or explanation, where [score] is actually the final score.\n", + "\n", + "\"Example format: \n", + "\" \n", + "\"Document 1: \n", + " \n", + "\" \n", + "\"Document 2: \n", + " \n", + "\" \n", + "\"... \n", + "\" \n", + "\"Document 10: \n", + " \n", + "\" \n", + "\"Question: \n", + "\" \n", + "\"Answer: \n", + "\" \n", + "\"Doc: 9, Relevance: 7 \n", + "\" \n", + "\"Doc: 3, Relevance: 4 \n", + "\" \n", + "\"Doc: 7, Relevance: 3 \n", + "\" \n", + "\"Let's try this now: \n", + "\" \n", + "\"{context_str} \n", + "\" \n", + "\"Question: {query_str} \n", + "\" \n", + "\"Answer: \n", + "\"" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import Markdown\n", + "Markdown(reranked.choice_select_prompt.template)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ScoreText
010.0When he visited Saintes-Maries-de-la-Mer in June, he gave lessons to a Zouave second lieutenant – Paul-Eugène Milliet – and painted boats on the sea and the village. MacKnight introduced Van Gogh to Eugène Boch, a Belgian painter who sometimes stayed in Fontvieille, and the two exchanged visits in July.

\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t


==== Gauguin's visit (1888) ====


When Gauguin agreed to visit Arles in 1888, Van Gogh hoped for friendship and to realise his idea of an artists' collective. Van Gogh prepared for Gauguin's arrival by painting four versions of Sunflowers in one week. \"In the hope of living in a studio of our own with Gauguin,\" he wrote in a letter to Theo, \"I'd like to do a decoration for the studio. Nothing but large Sunflowers.\"
When Boch visited again, Van Gogh painted a portrait of him, as well as the study The Poet Against a Starry Sky.
In preparation for Gauguin's visit, Van Gogh bought two beds on advice from the station's postal supervisor Joseph Roulin, whose portrait he painted. On 17 September, he spent his first night in the still sparsely furnished Yellow House. When Gauguin consented to work and live in Arles with him, Van Gogh started to work on the Décoration for the Yellow House, probably the most ambitious effort he ever undertook. He completed two chair paintings: Van Gogh's Chair and Gauguin's Chair.
After much pleading from Van Gogh, Gauguin arrived in Arles on 23 October and, in November, the two painted together. Gauguin depicted Van Gogh in his The Painter of Sunflowers; Van Gogh painted pictures from memory, following Gauguin's suggestion. Among these \"imaginative\" paintings is Memory of the Garden at Etten. Their first joint outdoor venture was at the Alyscamps, when they produced the pendants Les Alyscamps. The single painting Gauguin completed during his visit was his portrait of Van Gogh.
110.0When Gauguin consented to work and live in Arles with him, Van Gogh started to work on the Décoration for the Yellow House, probably the most ambitious effort he ever undertook. He completed two chair paintings: Van Gogh's Chair and Gauguin's Chair.
After much pleading from Van Gogh, Gauguin arrived in Arles on 23 October and, in November, the two painted together. Gauguin depicted Van Gogh in his The Painter of Sunflowers; Van Gogh painted pictures from memory, following Gauguin's suggestion. Among these \"imaginative\" paintings is Memory of the Garden at Etten. Their first joint outdoor venture was at the Alyscamps, when they produced the pendants Les Alyscamps. The single painting Gauguin completed during his visit was his portrait of Van Gogh.
Van Gogh and Gauguin visited Montpellier in December 1888, where they saw works by Courbet and Delacroix in the Musée Fabre. Their relationship began to deteriorate; Van Gogh admired Gauguin and wanted to be treated as his equal, but Gauguin was arrogant and domineering, which frustrated Van Gogh. They often quarrelled; Van Gogh increasingly feared that Gauguin was going to desert him, and the situation, which Van Gogh described as one of \"excessive tension\", rapidly headed towards crisis point.

\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t
\\t\\t
\\t\\t\\t
\\t\\t\\t
\\t\\t


==== Hospital in Arles (December 1888) ====

The exact sequence that led to the mutilation of Van Gogh's ear is not known. Gauguin said, fifteen years later, that the night followed several instances of physically threatening behaviour. Their relationship was complex and Theo may have owed money to Gauguin, who suspected the brothers were exploiting him financially. It seems likely that Vincent realised that Gauguin was planning to leave. The following days saw heavy rain, leading to the two men being shut in the Yellow House. Gauguin recalled that Van Gogh followed him after he left for a walk and \"rushed towards me, an open razor in his hand\".
27.0Later he drifted into ill-health and solitude. He was keenly aware of modernist trends in art and, while back with his parents, took up painting in 1881. His younger brother, Theo, supported him financially, and the two of them maintained a long correspondence.
Van Gogh's early works consist of mostly still lifes and depictions of peasant labourers. In 1886, he moved to Paris, where he met members of the artistic avant-garde, including Émile Bernard and Paul Gauguin, who were seeking new paths beyond Impressionism. Frustrated in Paris and inspired by a growing spirit of artistic change and collaboration, in February 1888 Van Gogh moved to Arles in southern France to establish an artistic retreat and commune. Once there, his paintings grew brighter and he turned his attention to the natural world, depicting local olive groves, wheat fields and sunflowers. Van Gogh invited Gauguin to join him in Arles and eagerly anticipated Gauguin's arrival in late 1888.
Van Gogh suffered from psychotic episodes and delusions. He worried about his mental stability, and often neglected his physical health, did not eat properly and drank heavily. His friendship with Gauguin ended after a confrontation with a razor when, in a rage, he mutilated his left ear. Van Gogh spent time in psychiatric hospitals, including a period at Saint-Rémy. After he discharged himself and moved to the Auberge Ravoux in Auvers-sur-Oise near Paris, he came under the care of the homeopathic doctor Paul Gachet. His depression persisted, and on 29 July 1890 Van Gogh died from his injuries after shooting himself in the chest with a revolver.
Van Gogh's work began to attract critical artistic attention in the last year of his life. After his death, his art and life story captured public imagination as an emblem of misunderstood genius, due in large part to the efforts of his widowed sister-in-law Johanna van Gogh-Bonger. His bold use of colour, expressive line and thick application of paint inspired avant-garde artistic groups like the Fauves and German Expressionists in the early 20th century.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "visualize_nodes(reranked_nodes[:3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "main", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/llama-index-core/llama_index/core/postprocessor/__init__.py b/llama-index-core/llama_index/core/postprocessor/__init__.py index 04f7b2a8b2..58e26d41e6 100644 --- a/llama-index-core/llama_index/core/postprocessor/__init__.py +++ b/llama-index-core/llama_index/core/postprocessor/__init__.py @@ -2,6 +2,7 @@ from llama_index.core.postprocessor.llm_rerank import LLMRerank +from llama_index.core.postprocessor.rebel_rerank import REBELRerank from llama_index.core.postprocessor.structured_llm_rerank import ( StructuredLLMRerank, DocumentWithRelevance, @@ -39,6 +40,7 @@ "PIINodePostprocessor", "NERPIINodePostprocessor", "LLMRerank", + "REBELRerank", "StructuredLLMRerank", "DocumentWithRelevance", "SentenceEmbeddingOptimizer", diff --git a/llama-index-core/llama_index/core/postprocessor/rebel_rerank.py b/llama-index-core/llama_index/core/postprocessor/rebel_rerank.py new file mode 100644 index 0000000000..89406ea2ea --- /dev/null +++ b/llama-index-core/llama_index/core/postprocessor/rebel_rerank.py @@ -0,0 +1,160 @@ +import logging +from typing import List, Optional, Callable + +from llama_index.core.bridge.pydantic import Field, PrivateAttr, SerializeAsAny +from llama_index.core.llms import LLM +from llama_index.core.postprocessor.types import BaseNodePostprocessor +from llama_index.core.prompts import BasePromptTemplate +from llama_index.core.prompts.base import PromptTemplate, PromptType +from llama_index.core.prompts.default_prompts import ( + DEFAULT_REBEL_META_PROMPT, + DEFAULT_REBEL_CHOICE_SELECT_PROMPT, +) +from llama_index.core.prompts.mixin import PromptDictType +from llama_index.core.schema import NodeWithScore, QueryBundle +from llama_index.core.settings import Settings +from llama_index.core.indices.utils import ( + default_format_node_batch_fn, + default_parse_choice_select_answer_fn, +) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + + +def get_default_llm() -> LLM: + from llama_index.llms.openai import OpenAI # pants: no-infer-dep + + return OpenAI(model="gpt-3.5-turbo-16k") + + +class REBELRerank(BaseNodePostprocessor): + """REBEL (Rerank Beyond Relevance) reranker.""" + + top_n: int = Field(description="Top N nodes to return.") + one_turn: bool = Field(description="Whether to use a one_turn reranking prompt") + meta_prompt: SerializeAsAny[BasePromptTemplate] = Field( + description="REBEL prompt that generates the choice selection prompt." + ) + choice_batch_size: int = Field(description="Batch size for choice select.") + llm: LLM = Field( + default_factory=get_default_llm, description="The LLM to rerank with." + ) + verbose: bool = Field( + default=False, description="Whether to print intermediate steps." + ) + choice_select_prompt: Optional[SerializeAsAny[BasePromptTemplate]] = Field( + default=None, description="Generated prompt for choice selection." + ) + + _format_node_batch_fn: Callable = PrivateAttr() + _parse_choice_select_answer_fn: Callable = PrivateAttr() + + def __init__( + self, + llm: Optional[LLM] = None, + meta_prompt: Optional[BasePromptTemplate] = None, + choice_select_prompt: Optional[BasePromptTemplate] = None, + choice_batch_size: int = 10, + format_node_batch_fn: Optional[Callable] = None, + parse_choice_select_answer_fn: Optional[Callable] = None, + top_n: int = 10, + one_turn: bool = False, + ) -> None: + """Initialize params.""" + meta_prompt = meta_prompt or DEFAULT_REBEL_META_PROMPT + choice_select_prompt = ( + choice_select_prompt or DEFAULT_REBEL_CHOICE_SELECT_PROMPT + ) + llm = llm or Settings.llm + + super().__init__( + llm=llm, + meta_prompt=meta_prompt, + choice_select_prompt=choice_select_prompt, + choice_batch_size=choice_batch_size, + top_n=top_n, + one_turn=one_turn, + ) + + self._format_node_batch_fn = ( + format_node_batch_fn or default_format_node_batch_fn + ) + self._parse_choice_select_answer_fn = ( + parse_choice_select_answer_fn or default_parse_choice_select_answer_fn + ) + + @classmethod + def class_name(cls) -> str: + return "REBELRerank" + + def _get_prompts(self) -> PromptDictType: + """Get prompts.""" + return { + "meta_prompt": self.meta_prompt, + "static_prompt": self.static_prompt, + "choice_select_prompt": self.choice_select_prompt, + } + + def _update_prompts(self, prompts: PromptDictType) -> None: + """Update prompts.""" + if "meta_prompt" in prompts: + self.meta_prompt = prompts["meta_prompt"] + if "choice_select_prompt" in prompts: + self.choice_select_prompt = prompts["choice_select_prompt"] + + def _postprocess_nodes( + self, + nodes: List[NodeWithScore], + query_bundle: Optional[QueryBundle] = None, + ) -> List[NodeWithScore]: + """Postprocess nodes.""" + if query_bundle is None: + raise ValueError("Query bundle must be provided.") + if len(nodes) == 0: + return [] + + query_str = query_bundle.query_str + + # In two_turn REBEL, the choice_select_prompt is created per-query from the meta prompt + if not self.one_turn: + self.choice_select_prompt = PromptTemplate( + self.llm.predict( + self.meta_prompt, + user_query=query_str, + ), + prompt_type=PromptType.CHOICE_SELECT, + ) + + initial_results: List[NodeWithScore] = [] + for idx in range(0, len(nodes), self.choice_batch_size): + nodes_batch = [ + node.node for node in nodes[idx : idx + self.choice_batch_size] + ] + fmt_batch_str = self._format_node_batch_fn(nodes_batch) + + response = self.llm.predict( + self.choice_select_prompt, + context_str=fmt_batch_str, + query_str=query_str, + ) + + raw_choices, relevances = self._parse_choice_select_answer_fn( + response, len(nodes_batch) + ) + + choice_idxs = [int(choice) - 1 for choice in raw_choices] + choice_nodes = [nodes_batch[i] for i in choice_idxs] + + initial_results.extend( + [ + NodeWithScore(node=node, score=relevance) + for node, relevance in zip(choice_nodes, relevances) + ] + ) + + final_results = sorted( + initial_results, key=lambda x: x.score or 0.0, reverse=True + )[: self.top_n] + + return final_results diff --git a/llama-index-core/llama_index/core/prompts/default_prompts.py b/llama-index-core/llama_index/core/prompts/default_prompts.py index 6dd7f1771b..aab6d1a7b8 100644 --- a/llama-index-core/llama_index/core/prompts/default_prompts.py +++ b/llama-index-core/llama_index/core/prompts/default_prompts.py @@ -543,3 +543,357 @@ DEFAULT_JSONALYZE_PROMPT = PromptTemplate( DEFAULT_JSONALYZE_PROMPT_TMPL, prompt_type=PromptType.TEXT_TO_SQL ) + +########################################### +# REBEL MetaPrompt Template +########################################### + +DEFAULT_REBEL_META_PROMPT_TMPL = ( + '''You are a prompt generator. You will receive only a user's query as input. Your task is to: + +Analyze the user's query and identify additional properties beyond basic relevance that would be desirable for selecting and ranking context documents. These properties should be inferred from the query's subject matter, without the user specifying them. Such properties may include: + +Domain appropriateness (e.g., technical accuracy, authoritative sourcing, correctness of information) +Perspective diversity (multiple viewpoints, ideological balance, different theoretical frameworks) +Temporal relevance (up-to-date information, recent data) +Depth of detail and specificity (thorough coverage, multi-faceted analysis, detailed examples) +Trustworthiness, neutrality, impartiality (reliable sources, unbiased accounts) +Reasoning depth or conceptual complexity +Authoritativeness (recognition of reputable experts, institutions, or high-quality sources) +After inferring these properties from the query, produce a final prompt that instructs a large-language model re-ranker on how to: + +Take the user's query and a set of candidate documents. +The documents and the query will appear after your instructions in this format: A list of documents is shown below. Each document has a number and a summary. The summaries may indicate the type of source, credibility level, publication date, or the nature of the information. After listing all documents, the user's query will be presented on a single line labeled "Question:". For example: Document 1: Document 2: ... Document N: Question: +Assign each document a Relevance score (0-10) and scores for each inferred property (0-5). +Compute a weighted composite score for each document. This composite score should not just be used to break ties, but to determine the final ordering. For instance, you may define a formula like: Final Score = Relevance + (Weight1 * Property1) + (Weight2 * Property2) + ... The weights should be specified by you. For example, if you have three properties, you might say: Final Score = Relevance + 0.5*(Property1) + 0.5*(Property2) + 0.5*(Property3) This ensures that documents which strongly exhibit the desired secondary properties can surpass documents with slightly higher relevance but weaker secondary property scores. +Filter out irrelevant documents first. For example, discard any document with Relevance < 3. +Rank all remaining documents by their Final Score (based on the chosen weights). +If two documents end up with the exact same Final Score, you may choose a consistent approach to pick one over the other (e.g., prefer the document with higher authoritativeness). +If no documents meet the relevance threshold, output nothing. +Produce only the final ranked list of chosen documents with their Final Score, in descending order of Final Score. The format for each selected document should be: Doc: [document number], Relevance: [score], where [score] is actually the final score - not the relevance score. +Include no commentary, explanation, or additional text beyond these lines. +Your final prompt should: + +Include the user's query verbatim. +Enumerate and define the inferred properties in detail, clearly stating their significance. +Provide the exact scoring rubric for Relevance (0-10) and each inferred property (0-5), explaining what high and low scores mean. +Specify the weighted composite score formula and list the weights for each property. +Give a step-by-step procedure: assign Relevance, assign property scores, discard low-relevance documents, compute Final Scores, sort by Final Score, handle ties if any, then output the final list. +State what to do if no documents qualify (output nothing). +Remind the re-ranker that the documents and query will be shown after this prompt, and that the only acceptable output is the final sorted list of documents and their relevance scores. +Your output should be a single prompt that can be given directly to the large-language model re-ranker. After this prompt, the re-ranker will receive the documents and the query and must follow the instructions to produce the final answer. + +At the end of your prompt, you should ALWAYS NO MATTER WHAT include the following: + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + +Below are 5 k-shot examples demonstrating the required level of detail and explicitness. Each example: + +Presents a user query. +Infers multiple properties and explains their relevance. +Provides a scoring rubric for Relevance and the inferred properties. +Defines a weighted composite scoring formula that incorporates Relevance and all secondary properties. +Gives step-by-step instructions for scoring, filtering, ranking, and outputting results. +Explains what to do if no suitable documents remain. +Instructs that the final output should only be lines of the form "Doc: [number], Relevance: [score]" with no extra text. +Example 1 User Query: "How do different countries' tax policies affect income inequality, and what arguments exist from various economic schools of thought?" + +Inferred Properties: + +Perspective diversity (0-5): Documents that mention or compare multiple economic theories or viewpoints score higher. A high score (5) means it covers several distinct schools of thought. A low score (0) means it is one-dimensional. +Authoritativeness (0-5): Documents from credible economists, reputable research institutes, or peer-reviewed studies score higher. A 5 might be a well-cited academic paper; a 0 might be an anonymous blog post. +Comparative breadth (0-5): Documents discussing tax policies in multiple countries score higher. A 5 means it covers several countries, a 0 means it focuses on just one or does not compare countries at all. +Scoring Rubric: Relevance (0-10): A 10 means the document directly addresses how tax policies influence income inequality and references arguments from different economic viewpoints. A 0 means it is off-topic. Perspective diversity (0-5): Assign based on how many distinct economic perspectives are included. Authoritativeness (0-5): Assign based on credibility and source quality. Comparative breadth (0-5): Assign based on the number of countries or breadth of international comparison. + +Weighted Composite Score: Final Score = Relevance + 0.5*(Perspective diversity) + 0.5*(Authoritativeness) + 0.5*(Comparative breadth) + +Instructions: After this prompt, you will see: Document 1: Document 2: ... Document N: Question: "How do different countries' tax policies affect income inequality, and what arguments exist from various economic schools of thought?" + +Assign Relevance to each document (0-10). Discard documents with Relevance < 3. +For remaining documents, assign Perspective diversity, Authoritativeness, and Comparative breadth (each 0-5). +Compute Final Score as described above. +Sort all remaining documents by Final Score (descending). +If two documents have identical Final Scores, pick consistently, for example by preferring the one with higher Authoritativeness. +If no document remains, output nothing. +Output only: Doc: [number], Relevance: [score] for each selected document, no commentary or explanation, where [score] is actually the final score. + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + + +Example 2 User Query: "What are the latest recommended treatments for chronic lower back pain according to recent medical research?" + +Inferred Properties: + +Recency (0-5): Higher if the document references recent studies, new clinical guidelines, or up-to-date research (within the last few years). A 5 means it is very recent, a 0 means outdated or no mention of timeliness. +Authoritativeness (0-5): Higher if sourced from reputable medical journals, recognized health organizations, or consensus guidelines. +Specificity (0-5): Higher if it focuses specifically on chronic lower back pain treatments. A 5 means it precisely addresses chronic lower back pain, a 0 means it only vaguely mentions pain or general treatments without specificity. +Scoring Rubric: Relevance (0-10): A 10 means the document explicitly discusses current recommended treatments for chronic lower back pain based on recent research. A 0 means off-topic. Recency (0-5) Authoritativeness (0-5) Specificity (0-5) + +Weighted Composite Score: Final Score = Relevance + 0.5*(Recency) + 0.5*(Authoritativeness) + 0.5*(Specificity) + +Instructions: After this prompt: Document 1: ... Document N: Question: "What are the latest recommended treatments for chronic lower back pain according to recent medical research?" + +Assign Relevance. Exclude Relevance < 3. +Assign Recency, Authoritativeness, Specificity. +Compute Final Score. +Sort by Final Score. +If tied, choose consistently (e.g., prefer higher Authoritativeness). +If none remain, output nothing. +Output only lines like: Doc: X, Relevance: Y, where Y is actually the final score. + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + + +Example 3 User Query: "How did the policies of Emperor Qin Shi Huang shape the political and cultural landscape of ancient China?" + +Inferred Properties: + +Historical depth (0-5): Higher if it provides detailed historical context, dates, and direct evidence. A 5 is richly detailed, a 0 is very superficial. +Perspective range (0-5): Higher if it references multiple historians or scholarly opinions. A 5 means multiple perspectives, a 0 is one-sided. +Cultural/political detail (0-5): Higher if it addresses both political structures and cultural changes. A 5 is comprehensive, a 0 is minimal detail. +Scoring Rubric: Relevance (0-10): A 10 means it explicitly discusses Qin Shi Huang's policies and their impact on both political and cultural aspects of ancient China. Historical depth (0-5) Perspective range (0-5) Cultural/political detail (0-5) + +Weighted Composite Score: Final Score = Relevance + 0.5*(Historical depth) + 0.5*(Perspective range) + 0.5*(Cultural/political detail) + +Instructions: After this prompt: Document 1: ... Document N: Question: "How did the policies of Emperor Qin Shi Huang shape the political and cultural landscape of ancient China?" + +Assign Relevance, discard < 3. +Assign Historical depth, Perspective range, Cultural/political detail. +Compute Final Score. +Sort by Final Score. +Tie-break by preferring more historically authoritative perspectives if still tied. +If none qualify, output nothing. +Only output: Doc: [number], Relevance: [score], where [score] is actually the final score. + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + + +Example 4 User Query: "What are the main differences between various machine learning frameworks like TensorFlow, PyTorch, and Scikit-learn?" + +Inferred Properties: + +Technical accuracy (0-5): Higher if the document correctly and specifically describes features, performance characteristics, or typical uses. A 5 means very accurate and specific. +Comparative breadth (0-5): Higher if the document compares multiple frameworks directly, ideally all three. A 5 means it covers all three well, a 0 means it only mentions one. +Authoritativeness (0-5): Higher if citing official documentation, known ML experts, or reputable evaluation sources. +Scoring Rubric: Relevance (0-10): A 10 means the document explicitly compares these ML frameworks in detail. Technical accuracy (0-5) Comparative breadth (0-5) Authoritativeness (0-5) + +Weighted Composite Score: Final Score = Relevance + 0.5*(Technical accuracy) + 0.5*(Comparative breadth) + 0.5*(Authoritativeness) + +Instructions: After prompt: Document 1: ... Document N: Question: "What are the main differences between various machine learning frameworks like TensorFlow, PyTorch, and Scikit-learn?" + +Assign Relevance, exclude < 3. +Assign Technical accuracy, Comparative breadth, Authoritativeness. +Compute Final Score. +Sort by Final Score. +Tie-break by preferring documents that are more authoritative or have greater comparative breadth. +If none remain, output nothing. +Output only lines like: Doc: [number], Relevance: [score], where [score] is actually the final score. + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + +Example 5 User Query: "What are the arguments for and against universal basic income in modern economies?" + +Inferred Properties: + +Balance of perspectives (0-5): Higher if the document presents both pro and con arguments. A 5 means thorough coverage of both sides. +Reasoning depth (0-5): Higher if it explains the rationale behind arguments, providing logic or evidence. +Authoritativeness (0-5): Higher if referencing economists, studies, or policy analyses from reputable sources. +Scoring Rubric: Relevance (0-10): A 10 means it clearly discusses UBI arguments both for and against. Balance of perspectives (0-5) Reasoning depth (0-5) Authoritativeness (0-5) + +Weighted Composite Score: Final Score = Relevance + 0.5*(Balance of perspectives) + 0.5*(Reasoning depth) + 0.5*(Authoritativeness) + +Instructions: After prompt: Document 1: ... Document N: Question: "What are the arguments for and against universal basic income in modern economies?" + +Assign Relevance, discard < 3. +Assign Balance of perspectives, Reasoning depth, Authoritativeness. +Compute Final Score. +Sort by Final Score. +If tied, prefer documents with higher reasoning depth or authoritativeness. +If none remain, output nothing. +Output only: Doc: [number], Relevance: [score], where [score] is actually the final score. + +"Example format: \n" +"Document 1:\n\n\n" +"Document 2:\n\n\n" +"...\n\n" +"Document 10:\n\n\n" +"Question: \n" +"Answer:\n" +"Doc: 9, Relevance: 7\n" +"Doc: 3, Relevance: 4\n" +"Doc: 7, Relevance: 3\n\n" +"Let's try this now: \n\n" +"{context_str}\n" +"Question: {query_str}\n" +"Answer:\n" + + +Follow these examples as a template for your final prompt. For any new user query, do the following: + +Include the user's query verbatim. +Infer the relevant secondary properties and define them clearly. +Give a scoring rubric for Relevance and each property. +Specify a weighted composite score formula that combines Relevance and the properties. +Provide step-by-step instructions: assign scores, filter out irrelevant documents, compute Final Score, sort by Final Score, handle ties, and if none qualify, output nothing. +Instruct the re-ranker to output only the final list of documents and their Relevance scores, with no extra commentary. +Now, here is the user's query: + +{user_query} +''' +) + +DEFAULT_REBEL_META_PROMPT = PromptTemplate( + DEFAULT_REBEL_META_PROMPT_TMPL, prompt_type=PromptType.REBEL_RERANK +) + +DEFAULT_REBEL_CHOICE_SELECT_PROMPT_TMPL = ( +"""You are a re-ranking system. Your task is to analyze a user's query and a set of candidate documents, assign scores based on specified properties, and output the final ranking of documents. + +**Inferred Properties** + +1. **Depth of Content (0-5):** +- Higher scores indicate thorough detail and comprehensive coverage of the topic. +- A "5" is exceptionally in-depth with multiple facets addressed; a "0" is very superficial. + +2. **Diversity of Perspectives (0-5):** +- Higher scores indicate that multiple viewpoints or angles are represented. +- A "5" means it engages with a variety of perspectives or sources; a "0" means it is entirely one-sided. + +3. **Clarity and Specificity (0-5):** +- Higher scores indicate that the document presents information clearly and addresses the query with precise, unambiguous detail. +- A "5" means it is highly specific and clear, while a "0" means it is vague or overly general. + +4. **Authoritativeness (0-5):** +- Higher scores indicate reputable sources, expert authorship, or recognized credibility. +- A "5" might be an extensively cited academic work or an official standard; a "0" would be an unknown or dubious source. + +5. **Recency (0-5):** +- Higher scores indicate that the document references recent studies, data, or developments. +- A "5" means it is very current and up-to-date; a "0" means it is outdated or does not reference any time-sensitive information. + +**Scoring Rubric** + +- **Relevance (0-10):** +- A "10" means the document directly addresses the user's query, covering the key subject comprehensively. +- A "0" means it is completely off-topic. + +- **Depth of Content (0-5):** Based on how detailed or thorough the document is. +- **Diversity of Perspectives (0-5):** Based on how many viewpoints or angles are presented. +- **Clarity and Specificity (0-5):** Based on how clear and precise the document is. +- **Authoritativeness (0-5):** Based on source credibility or recognized expertise. +- **Recency (0-5):** Based on how up-to-date the document is. + +**Weighted Composite Score** +Final Score = Relevance + 0.5*(Depth of Content) + 0.5*(Diversity of Perspectives) + 0.5*(Clarity and Specificity) + 0.5*(Authoritativeness) + 0.5*(Recency) + +**Instructions** +1. Assign Relevance to each document on a scale of 0-10. Discard any document with Relevance < 3. +2. For the remaining documents, assign scores for: +- Depth of Content (0-5) +- Diversity of Perspectives (0-5) +- Clarity and Specificity (0-5) +- Authoritativeness (0-5) +- Recency (0-5) +3. Compute each document's Final Score using the formula above. +4. Sort the documents by their Final Score in descending order. +5. If two documents end up with the same Final Score, prefer the one with higher Authoritativeness (or apply another consistent tie-breaking rule). +6. If no documents remain after filtering for Relevance, output nothing. +7. Output only the list of theselected documents with their Relevance scores, in this format (no extra text or commentary), where [score] is actually the Final Score and NOT the relevance score. +``` +Doc: [document number], Relevance: [score] +``` + +**Example format:** +``` +Document 1: + + +Document 2: + + +... + +Document 10: + + +Question: +Answer: +Doc: 9, Relevance: 7 +Doc: 3, Relevance: 4 +Doc: 7, Relevance: 3 + +Let's try this now: + +{context_str} +Question: {query_str} +Answer: +```""") + +DEFAULT_REBEL_CHOICE_SELECT_PROMPT = PromptTemplate( + DEFAULT_REBEL_CHOICE_SELECT_PROMPT_TMPL, prompt_type=PromptType.CHOICE_SELECT +) diff --git a/llama-index-core/llama_index/core/prompts/prompt_type.py b/llama-index-core/llama_index/core/prompts/prompt_type.py index 485c7dea41..8bc626238a 100644 --- a/llama-index-core/llama_index/core/prompts/prompt_type.py +++ b/llama-index-core/llama_index/core/prompts/prompt_type.py @@ -78,3 +78,6 @@ class PromptType(str, Enum): # RankGPT rerank RANKGPT_RERANK = "rankgpt_rerank" + + # REBEL rerank + REBEL_RERANK = "rebel_rerank" diff --git a/llama-index-core/tests/postprocessor/test_rebel_rerank.py b/llama-index-core/tests/postprocessor/test_rebel_rerank.py new file mode 100644 index 0000000000..28b3a4bce4 --- /dev/null +++ b/llama-index-core/tests/postprocessor/test_rebel_rerank.py @@ -0,0 +1,80 @@ +"""Test LLM reranker.""" + +from typing import Any, List +from unittest.mock import patch + +from llama_index.core.llms.mock import MockLLM +from llama_index.core.postprocessor import REBELRerank +from llama_index.core.prompts import BasePromptTemplate +from llama_index.core.schema import BaseNode, NodeWithScore, QueryBundle, TextNode + + +def mock_rebel_predict( + self: Any, prompt: BasePromptTemplate, **prompt_args: Any +) -> str: + """Patch llm predictor predict.""" + # If this is the meta_prompt call (first call), just return a template + if "context_str" not in prompt_args: + # Return a simple template for the choice selection prompt + return "Doc: {doc_id}, Relevance: {relevance}" + + # This is the second call with context_str + context_str = prompt_args["context_str"] + node_strs = context_str.split("\n") + node_to_choice_and_score = { + "Test": (True, "1"), + "Test2": (False, "0"), + "Test3": (True, "3"), + "Test4": (False, "0"), + "Test5": (True, "5"), + "Test6": (False, "0"), + "Test7": (True, "7"), + "Test8": (False, "0"), + } + choices_and_scores = [] + for idx, node_str in enumerate(node_strs): + choice, score = node_to_choice_and_score[node_str] + if choice: + choices_and_scores.append((idx + 1, score)) + + result_strs = [f"Doc: {c!s}, Relevance: {s}" for c, s in choices_and_scores] + return "\n".join(result_strs) + + +def mock_format_node_batch_fn(nodes: List[BaseNode]) -> str: + """Mock format node batch fn.""" + return "\n".join([node.get_content() for node in nodes]) + + +@patch.object( + MockLLM, + "predict", + mock_rebel_predict, +) +def test_rebel_rerank() -> None: + """Test LLM rerank.""" + nodes = [ + TextNode(text="Test"), + TextNode(text="Test2"), + TextNode(text="Test3"), + TextNode(text="Test4"), + TextNode(text="Test5"), + TextNode(text="Test6"), + TextNode(text="Test7"), + TextNode(text="Test8"), + ] + nodes_with_score = [NodeWithScore(node=n) for n in nodes] + + # choice batch size 4 (so two batches) + # take top-3 across all data + rebel_rerank = REBELRerank( + format_node_batch_fn=mock_format_node_batch_fn, choice_batch_size=4, top_n=3 + ) + query_str = "What is?" + result_nodes = rebel_rerank.postprocess_nodes( + nodes_with_score, QueryBundle(query_str) + ) + assert len(result_nodes) == 3 + assert result_nodes[0].node.get_content() == "Test7" + assert result_nodes[1].node.get_content() == "Test5" + assert result_nodes[2].node.get_content() == "Test3"