Skip to content

Commit 0638c44

Browse files
committed
llama: updated comments
1 parent ee599f9 commit 0638c44

File tree

1 file changed

+19
-8
lines changed

1 file changed

+19
-8
lines changed

src/llama.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3502,13 +3502,10 @@ static bool llama_kv_cache_init(
35023502
return true;
35033503
}
35043504

3505-
// find an empty slot of size "n_tokens" in the cache
3506-
// updates the cache head
3507-
// Note: On success, it's important that cache.head points
3508-
// to the first cell of the slot.
3505+
// a structure holds information about the slot found in llama_kv_cache_find_slot
35093506
struct llama_kv_cache_slot_info {
3510-
std::pair<uint32_t, uint32_t> boundaries;
3511-
bool found = false;
3507+
std::pair<uint32_t, uint32_t> boundaries; // slot boundaries [begin, end)
3508+
bool found = false; // the slot was found
35123509

35133510
explicit llama_kv_cache_slot_info(bool found_) : found{found_} {}
35143511
llama_kv_cache_slot_info(uint32_t begin, uint32_t end) : boundaries{begin, end}, found{true} {}
@@ -3517,6 +3514,11 @@ struct llama_kv_cache_slot_info {
35173514
};
35183515
static const llama_kv_cache_slot_info llama_kv_cache_slot_info_failed{false};
35193516

3517+
// find an empty slot of size "n_tokens" in the cache
3518+
// updates the cache head
3519+
// returns a structure holding information about the slot found
3520+
// Note: On success, it's important that cache.head points
3521+
// to the first cell of the slot.
35203522
static struct llama_kv_cache_slot_info llama_kv_cache_find_slot(
35213523
struct llama_kv_cache & cache,
35223524
const struct llama_ubatch & batch) {
@@ -4019,7 +4021,9 @@ struct llama_kv_slot_restorer {
40194021
uint32_t n = 0;
40204022
} old_state;
40214023

4022-
std::vector<std::pair<uint32_t, uint32_t>> slot_boundaries; // for non-recurrent models only
4024+
// for non-recurrent models only
4025+
// list of slots to restore
4026+
std::vector<std::pair<uint32_t, uint32_t>> slot_boundaries;
40234027

40244028
bool do_restore = false;
40254029

@@ -4028,7 +4032,8 @@ struct llama_kv_slot_restorer {
40284032
old_state.n = cache.n;
40294033
}
40304034

4031-
void save(const struct llama_kv_cache_slot_info& slot) {
4035+
// saves a slot information for future restoration
4036+
void save(const struct llama_kv_cache_slot_info & slot) {
40324037
if (slot) {
40334038
do_restore = true;
40344039
if (slot.boundaries.first != slot.boundaries.second) {
@@ -4037,6 +4042,8 @@ struct llama_kv_slot_restorer {
40374042
}
40384043
}
40394044

4045+
// must be explicitly called to restore the kv_cache state
4046+
// and rollback changes from all llama_kv_cache_find_slot calls
40404047
void restore(struct llama_kv_cache & cache) {
40414048
if (do_restore) {
40424049
cache.head = old_state.head;
@@ -17236,6 +17243,7 @@ static void llama_output_reorder(struct llama_context * ctx) {
1723617243
}
1723717244
}
1723817245

17246+
// returns the result of ggml_backend_sched_graph_compute_async execution
1723917247
static enum ggml_status llama_graph_compute(
1724017248
llama_context & lctx,
1724117249
ggml_cgraph * gf,
@@ -17262,6 +17270,9 @@ static enum ggml_status llama_graph_compute(
1726217270
}
1726317271

1726417272
// decode a batch of tokens by evaluating the transformer
17273+
// in case of unsuccessful decoding (error or warning),
17274+
// the kv_cache state will be returned to its original state
17275+
// (for non-recurrent models) or cleaned (for recurrent models)
1726517276
//
1726617277
// - lctx: llama context
1726717278
// - batch: batch to evaluate

0 commit comments

Comments
 (0)