|  | 
| 511 | 511 |     "        # Format results\n", | 
| 512 | 512 |     "        formatted_results = []\n", | 
| 513 | 513 |     "        for hit in results[0]:\n", | 
|  | 514 | +    "            # For COSINE metric, Milvus returns similarity scores (higher = more similar)\n", | 
|  | 515 | +    "            # No need to convert - use the score directly\n", | 
|  | 516 | +    "            similarity_score = hit[\"distance\"]  # This is actually similarity for COSINE\n", | 
|  | 517 | +    "            \n", | 
| 514 | 518 |     "            formatted_results.append({\n", | 
| 515 | 519 |     "                \"text\": hit[\"text\"],\n", | 
| 516 | 520 |     "                \"metadata\": json.loads(hit[\"metadata\"]),\n", | 
| 517 |  | -    "                \"score\": 1.0 - hit[\"distance\"],  # Convert distance to similarity score for COSINE\n", | 
|  | 521 | +    "                \"score\": similarity_score,  # Use direct similarity score\n", | 
| 518 | 522 |     "                \"id\": hit[\"id\"]\n", | 
| 519 | 523 |     "            })\n", | 
| 520 | 524 |     "        \n", | 
|  | 
| 851 | 855 |    "metadata": {}, | 
| 852 | 856 |    "outputs": [], | 
| 853 | 857 |    "source": [ | 
| 854 |  | -    "# Test Sample Queries\n", | 
| 855 |  | -    "queries = [\n", | 
| 856 |  | -    "    \"What is artificial intelligence?\",\n", | 
| 857 |  | -    "    \"How does Milvus work as a vector database?\",\n", | 
| 858 |  | -    "    \"Explain the RAG pipeline steps\",\n", | 
| 859 |  | -    "    \"What are the differences between machine learning and deep learning?\"\n", | 
| 860 |  | -    "]\n", | 
|  | 858 | +    "# Debug Sample Queries - Test one specific query with detailed logging\n", | 
|  | 859 | +    "print(\"🔍 Debug: Testing specific query with detailed logging\\n\")\n", | 
| 861 | 860 |     "\n", | 
| 862 |  | -    "print(\"Testing RAG Pipeline with sample queries...\\n\")\n", | 
|  | 861 | +    "# Test the Milvus query that should definitely work\n", | 
|  | 862 | +    "test_query = \"How does Milvus work as a vector database?\"\n", | 
|  | 863 | +    "print(f\"Query: {test_query}\")\n", | 
|  | 864 | +    "print(\"-\" * 50)\n", | 
| 863 | 865 |     "\n", | 
| 864 |  | -    "for i, query in enumerate(queries, 1):\n", | 
| 865 |  | -    "    print(f\"Query {i}: {query}\")\n", | 
| 866 |  | -    "    print(\"-\" * 50)\n", | 
| 867 |  | -    "    \n", | 
| 868 |  | -    "    result = rag.query(query, top_k=3)\n", | 
| 869 |  | -    "    \n", | 
| 870 |  | -    "    print(f\"Answer: {result['answer']}\")\n", | 
| 871 |  | -    "    print(f\"\\nRetrieved {result['num_retrieved']} documents:\")\n", | 
| 872 |  | -    "    \n", | 
| 873 |  | -    "    for j, doc in enumerate(result['retrieved_documents'], 1):\n", | 
| 874 |  | -    "        print(f\"  {j}. Score: {doc['score']:.4f}\")\n", | 
| 875 |  | -    "        print(f\"     Text: {doc['text'][:100]}...\")\n", | 
| 876 |  | -    "        print(f\"     Source: {doc['metadata'].get('source', 'Unknown')}\")\n", | 
|  | 866 | +    "# Get query embedding\n", | 
|  | 867 | +    "query_embedding = rag.embedding_generator.embed_text(test_query)\n", | 
|  | 868 | +    "print(f\"✅ Generated query embedding: shape {query_embedding.shape}\")\n", | 
|  | 869 | +    "\n", | 
|  | 870 | +    "# Test direct search on vector store\n", | 
|  | 871 | +    "print(\"🔍 Testing direct vector store search...\")\n", | 
|  | 872 | +    "try:\n", | 
|  | 873 | +    "    search_results = rag.vector_store.search(query_embedding, top_k=5)\n", | 
|  | 874 | +    "    print(f\"✅ Direct search returned {len(search_results)} results\")\n", | 
| 877 | 875 |     "    \n", | 
| 878 |  | -    "    print(\"\\n\" + \"=\" * 80 + \"\\n\")" | 
|  | 876 | +    "    if search_results:\n", | 
|  | 877 | +    "        for i, result in enumerate(search_results):\n", | 
|  | 878 | +    "            print(f\"  Result {i+1}:\")\n", | 
|  | 879 | +    "            print(f\"    Score: {result.get('score', 'N/A')}\")\n", | 
|  | 880 | +    "            print(f\"    Text preview: {str(result.get('text', 'N/A'))[:100]}...\")\n", | 
|  | 881 | +    "            print(f\"    ID: {result.get('id', 'N/A')}\")\n", | 
|  | 882 | +    "    else:\n", | 
|  | 883 | +    "        print(\"  ❌ No results from direct search\")\n", | 
|  | 884 | +    "        \n", | 
|  | 885 | +    "except Exception as e:\n", | 
|  | 886 | +    "    print(f\"❌ Direct search failed: {e}\")\n", | 
|  | 887 | +    "\n", | 
|  | 888 | +    "print(\"\\n\" + \"=\" * 50)\n", | 
|  | 889 | +    "\n", | 
|  | 890 | +    "# Now test full RAG pipeline\n", | 
|  | 891 | +    "print(\"🔍 Testing full RAG pipeline...\")\n", | 
|  | 892 | +    "result = rag.query(test_query, top_k=5)\n", | 
|  | 893 | +    "print(f\"Full pipeline returned {result['num_retrieved']} documents\")\n", | 
|  | 894 | +    "print(f\"Answer: {result['answer'][:200]}...\")\n", | 
|  | 895 | +    "\n", | 
|  | 896 | +    "print(\"\\n🔍 Let's also test a simple keyword match:\")\n", | 
|  | 897 | +    "keyword_result = rag.query(\"Milvus vector database\", top_k=5)  \n", | 
|  | 898 | +    "print(f\"Keyword query returned {keyword_result['num_retrieved']} documents\")" | 
| 879 | 899 |    ] | 
| 880 | 900 |   }, | 
| 881 | 901 |   { | 
|  | 
0 commit comments