Skip to content

Commit 5bce968

Browse files
committed
Update the example to use better reference models and allocate less memory for sequence ids
1 parent 12a3f8b commit 5bce968

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

embeddings/src/main.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ enum Model {
5252
/// Download a model from huggingface (or use a cached version)
5353
#[clap(name = "hf-model")]
5454
HuggingFace {
55-
/// the repo containing the model. e.g. `TheBloke/Llama-2-7B-Chat-GGUF`
55+
/// the repo containing the model. e.g. `BAAI/bge-small-en-v1.5`
5656
repo: String,
57-
/// the model name. e.g. `llama-2-7b-chat.Q4_K_M.gguf`
57+
/// the model name. e.g. `BAAI-bge-small-v1.5.Q4_K_M.gguf`
5858
model: String,
5959
},
6060
}
@@ -147,7 +147,7 @@ fn main() -> Result<()> {
147147

148148
// create a llama_batch with the size of the context
149149
// we use this object to submit token data for decoding
150-
let mut batch = LlamaBatch::new(n_ctx, tokens_lines_list.len() as i32);
150+
let mut batch = LlamaBatch::new(n_ctx, 1);
151151

152152
// Amount of tokens in the current batch
153153
let mut s_batch = 0;

0 commit comments

Comments
 (0)