@@ -474,7 +474,8 @@ def get_topn_similar_docs_pinecone(
474474 only_urls (bool, optional): Whether to return only URLs. Defaults to False.
475475
476476 Returns:
477- List[Tuple]: List of tuples containing document content and similarity scores.
477+ List[Tuple]: List of tuples containing the content and metadata (if include_metadata is True)
478+ of the top n most similar documents.
478479 """
479480 # Convert numpy array to list if needed
480481 if isinstance (query_embedding , np .ndarray ):
@@ -488,16 +489,20 @@ def get_topn_similar_docs_pinecone(
488489 # Process results
489490 similar_docs = []
490491 for match in results .matches :
491- score = match .score
492492 metadata = match .metadata
493493
494494 if only_urls :
495- similar_docs .append ((metadata ["url" ], score ))
495+ similar_docs .append ((metadata ["url" ],))
496+ elif include_metadata :
497+ similar_docs .append (
498+ (
499+ metadata ["page_content" ],
500+ metadata ["url" ],
501+ metadata ["parent_section" ],
502+ )
503+ )
496504 else :
497- content = metadata ["page_content" ]
498- if include_metadata :
499- content = f"{ metadata ['filename' ]} - { metadata ['parent_section' ]} : { content } "
500- similar_docs .append ((content , score ))
505+ similar_docs .append ((metadata ["page_content" ],))
501506
502507 return similar_docs
503508
@@ -523,7 +528,8 @@ def get_topn_similar_docs(
523528 only_urls (bool, optional): Whether to return only URLs. Defaults to False.
524529
525530 Returns:
526- List[Tuple]: List of tuples containing document content and similarity scores.
531+ List[Tuple]: List of tuples containing the content and metadata (if include_metadata is True)
532+ of the top n most similar documents.
527533
528534 Raises:
529535 ValueError: If no valid vector store client is provided.
0 commit comments