revert(cli): Revert the default reranker to NaiveReranker. (#277)

Davidyz · web-flow · commit 84d6d1adbbab · 2025-08-28T03:26:27.000+01:00
* revert(cli): default to `NaiveReranker`

* Auto generate docs
diff --git a/doc/VectorCode-cli.txt b/doc/VectorCode-cli.txt
@@ -358,18 +358,13 @@ most `n` documents. A larger value of `query_multiplier` guarantees the return
 of `n` documents, but with the risk of including too many less-relevant chunks
 that may affect the document selection. Default: `-1` (any negative value means
 selecting documents based on all indexed chunks); - `reranker`string, the
-reranking method to use. Currently supports `CrossEncoderReranker` (default,
-using sentence-transformers cross-encoder
-<https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html> )
-and `NaiveReranker` (sort chunks by the "distance" between the embedding
-vectors). Note: If you’re using a good embedding model (eg. a hosted service
-from OpenAI, or a LLM-based embedding model like Qwen3-Embedding-0.6B
-<https://huggingface.co/Qwen/Qwen3-Embedding-0.6B>), you may get better results
-if you use `NaiveReranker` here because a good embedding model may understand
-texts better than a mediocre reranking model. - `reranker_params`dictionary,
-similar to `embedding_params`. The options passed to the reranker class
-constructor. For `CrossEncoderReranker`, these are the options passed to the
-`CrossEncoder`
+reranking method to use. Currently supports `NaiveReranker` (sort chunks by the
+"distance" between the embedding vectors) and `CrossEncoderReranker` (using
+sentence-transformers cross-encoder
+<https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html> ).
+- `reranker_params`dictionary, similar to `embedding_params`. The options
+passed to the reranker class constructor. For `CrossEncoderReranker`, these are
+the options passed to the `CrossEncoder`
 <https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html#id1>
 class. For example, if you want to use a non-default model, you can use the
 following: `json { "reranker_params": { "model_name_or_path": "your_model_here"
diff --git a/docs/cli.md b/docs/cli.md
@@ -311,16 +311,11 @@ The JSON configuration file may hold the following values:
   guarantees the return of `n` documents, but with the risk of including too
   many less-relevant chunks that may affect the document selection. Default: 
   `-1` (any negative value means selecting documents based on all indexed chunks);
-- `reranker`: string, the reranking method to use. Currently supports
-  `CrossEncoderReranker` (default, using 
+- `reranker`: string, the reranking method to use. Currently supports `NaiveReranker` 
+  (sort chunks by the "distance" between the embedding vectors) and 
+  `CrossEncoderReranker` (using 
   [sentence-transformers cross-encoder](https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html)
-  ) and `NaiveReranker` (sort chunks by the "distance" between the embedding
-  vectors).
-  Note: If you're using a good embedding model (eg. a hosted service from OpenAI, or 
-  a LLM-based embedding model like 
-  [Qwen3-Embedding-0.6B](https://huggingface.co/Qwen/Qwen3-Embedding-0.6B)), you
-  may get better results if you use `NaiveReranker` here because a good embedding
-  model may understand texts better than a mediocre reranking model.
+  ).
 - `reranker_params`: dictionary, similar to `embedding_params`. The options
   passed to the reranker class constructor. For `CrossEncoderReranker`, these
   are the options passed to the 
diff --git a/src/vectorcode/cli_utils.py b/src/vectorcode/cli_utils.py
@@ -100,7 +100,7 @@ class Config:
     overlap_ratio: float = 0.2
     query_multiplier: int = -1
     query_exclude: list[Union[str, os.PathLike]] = field(default_factory=list)
-    reranker: Optional[str] = "CrossEncoderReranker"
+    reranker: Optional[str] = "NaiveReranker"
     reranker_params: dict[str, Any] = field(default_factory=lambda: {})
     check_item: Optional[str] = None
     use_absolute_path: bool = False
diff --git a/tests/test_cli_utils.py b/tests/test_cli_utils.py
@@ -113,7 +113,7 @@ async def test_config_import_from_missing_keys():
     assert config.chunk_size == 2500
     assert config.overlap_ratio == 0.2
     assert config.query_multiplier == -1
-    assert config.reranker == "CrossEncoderReranker"
+    assert config.reranker == "NaiveReranker"
     assert config.reranker_params == {}
     assert config.db_settings is None