Skip to content

Commit 239d2dd

Browse files
author
Lorenzo Toniazzi
committed
Add printing to check weights match torch version
1 parent db20f50 commit 239d2dd

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

src/llama.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7812,13 +7812,53 @@ static void llm_build_kv_store(
78127812
ggml_build_forward_expand(graph, ggml_cpy(ctx, v_cur, v_cache_view));
78137813
}
78147814

7815+
#include <iostream>
7816+
// Function to print the tensor data
7817+
// Check the printing in this example and see if it makes sense here
7818+
// https://github.com/ggerganov/ggml/blob/master/examples/simple/simple-backend.cpp
7819+
// but this seems to try plot in column major order as blocking by columns
7820+
// https://github.com/ggerganov/ggml/blob/master/examples/simple/simple-backend.cpp#L208
7821+
void print_tensor_values(const ggml_tensor *tensor) {
7822+
// Assuming the tensor stores floats and is contiguous in memory
7823+
// float *data = reinterpret_cast<float*>(tensor->data);
7824+
std::vector<float> data(ggml_nelements(tensor));
7825+
// bring the data from the backend memory
7826+
ggml_backend_tensor_get(tensor, data.data(), 0, ggml_nbytes(tensor));
7827+
7828+
7829+
// Get the shape of the tensor
7830+
std::cout << std::endl;
7831+
7832+
std::cout << "Dimensions of " << tensor->name << ": ";
7833+
printf("(i=%d x j=%d) (transposed tensor):\n[", (int) tensor->ne[0], (int) tensor->ne[1]);
7834+
for (int j = 0; j < tensor->ne[1] /* rows */; j++) {
7835+
if (j > 0) {
7836+
printf("\n");
7837+
}
7838+
7839+
for (int i = 0; i < tensor->ne[0] /* cols */; i++) {
7840+
printf(" %.6f", data[j * tensor->ne[0] + i]); // ?correct dim stored first which is the cols of the orig
7841+
// printf(" %.6f", data[i * tensor->ne[1] + j]); // wrong for both base and lora layerss
7842+
if (i > 5) {
7843+
break;
7844+
}
7845+
}
7846+
if (j > 6) {
7847+
break;
7848+
}
7849+
}
7850+
printf(" ]\n");
7851+
7852+
}
7853+
78157854
// do mat_mul, while optionally apply lora
78167855
static struct ggml_tensor * llm_build_lora_mm(
78177856
struct llama_context & lctx,
78187857
struct ggml_context * ctx0,
78197858
struct ggml_tensor * w,
78207859
struct ggml_tensor * cur) {
78217860
struct ggml_tensor * res = ggml_mul_mat(ctx0, w, cur);
7861+
78227862
for (auto & it : lctx.lora_adapters) {
78237863
struct llama_lora_weight * lora = it.first->get_weight(w);
78247864
if (lora == nullptr) {
@@ -7827,6 +7867,10 @@ static struct ggml_tensor * llm_build_lora_mm(
78277867
const float alpha = it.first->alpha;
78287868
const float rank = (float) lora->b->ne[0];
78297869
const float scale = alpha ? it.second * alpha / rank : it.second;
7870+
7871+
print_tensor_values(w); // base layer
7872+
print_tensor_values(lora->a); // lora a
7873+
print_tensor_values(lora->b); // lora b
78307874
struct ggml_tensor * ab_cur = ggml_mul_mat(
78317875
ctx0, lora->b,
78327876
ggml_mul_mat(ctx0, lora->a, cur)

0 commit comments

Comments
 (0)