We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 65ef1c8 commit a217382Copy full SHA for a217382
examples/server/server.cpp
@@ -1958,7 +1958,6 @@ struct server_context {
1958
size_t n_probs = slot.params.sampling.n_probs;
1959
int n_vocab = llama_n_vocab(llama_get_model(ctx));
1960
if (post_sampling) {
1961
- // TODO: optimize this with min-p optimization
1962
const auto * cur_p = common_sampler_get_candidates(slot.smpl);
1963
const size_t max_probs = cur_p->size;
1964
@@ -1982,6 +1981,7 @@ struct server_context {
1982
1981
}
1983
1984
} else {
+ // TODO: optimize this with min-p optimization
1985
std::vector<llama_token_data> cur = get_token_probabilities(ctx, idx);
1986
1987
bool found_sampled_tok = false;
0 commit comments