Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,17 +333,17 @@ static void print_params(struct my_llama_hparams * params) {
}

static void print_tensor_info(const struct ggml_context * ctx) {
for (auto t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
for (auto * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
LOG_INF("%s: Allocating ", __func__);
int64_t total = 1;
int i = 0;
for (; i < ggml_n_dims(t); ++i) {
if (i > 0) LOG("x ");
LOG("[%" PRId64 "] ", t->ne[i]);
if (i > 0) { LOG_INF("x "); }
LOG_INF("[%" PRId64 "] ", t->ne[i]);
total *= t->ne[i];
}
if (i > 1) LOG("= [%" PRId64 "] ", total);
LOG("float space for %s\n", ggml_get_name(t));
if (i > 1) { LOG_INF("= [%" PRId64 "] ", total); }
LOG_INF("float space for %s\n", ggml_get_name(t));
}
}

Expand Down
129 changes: 102 additions & 27 deletions ggml/src/gguf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1166,50 +1166,51 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
}

struct gguf_writer {
std::vector<int8_t> & buf;
struct gguf_writer_base {
size_t written_bytes {0u};

~gguf_writer_base(void) {}

gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
// we bet on devirtualization
virtual void write(int8_t val) = 0;
virtual void write(const std::vector<int8_t> & val) = 0;
virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;

template <typename T>
void write(const T & val) const {
void write(const T & val) {
for (size_t i = 0; i < sizeof(val); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
write(reinterpret_cast<const int8_t *>(&val)[i]);
}
}

void write(const std::vector<int8_t> & val) const {
buf.insert(buf.end(), val.begin(), val.end());
}

void write(const bool & val) const {
void write(const bool & val) {
const int8_t val8 = val ? 1 : 0;
write(val8);
}

void write(const std::string & val) const {
void write(const std::string & val) {
{
const uint64_t n = val.length();
write(n);
}
for (size_t i = 0; i < val.length(); ++i) {
buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
write((val.data())[i]);
}
}

void write(const char * val) const {
void write(const char * val) {
write(std::string(val));
}

void write(const enum ggml_type & val) const {
void write(const enum ggml_type & val) {
write(int32_t(val));
}

void write(const enum gguf_type & val) const {
void write(const enum gguf_type & val) {
write(int32_t(val));
}

void write(const struct gguf_kv & kv) const {
void write(const struct gguf_kv & kv) {
const uint64_t ne = kv.get_ne();

write(kv.get_key());
Expand Down Expand Up @@ -1250,7 +1251,7 @@ struct gguf_writer {
}
}

void write_tensor_meta(const struct gguf_tensor_info & info) const {
void write_tensor_meta(const struct gguf_tensor_info & info) {
write(info.t.name);

const uint32_t n_dims = ggml_n_dims(&info.t);
Expand All @@ -1263,14 +1264,33 @@ struct gguf_writer {
write(info.offset);
}

void pad(const size_t alignment) const {
while (buf.size() % alignment != 0) {
void pad(const size_t alignment) {
while (written_bytes % alignment != 0) {
const int8_t zero = 0;
write(zero);
}
}
};

// vector buffer based writer
struct gguf_writer_buf final : public gguf_writer_base {
std::vector<int8_t> & buf;

gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}

using gguf_writer_base::write;

void write(const int8_t val) override {
buf.push_back(val);
written_bytes++;
}

void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
void write(const std::vector<int8_t> & val) override {
buf.insert(buf.end(), val.begin(), val.end());
written_bytes += val.size();
}

void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
GGML_ASSERT(buf.size() - offset_data == info.offset);

GGML_ASSERT(ggml_is_contiguous(&info.t));
Expand All @@ -1284,14 +1304,58 @@ struct gguf_writer {
GGML_ASSERT(info.t.data);
memcpy(buf.data() + offset, info.t.data, nbytes);
}
written_bytes += nbytes;

pad(alignment);
}
};

void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
const struct gguf_writer gw(buf);
// file based writer
struct gguf_writer_file final : public gguf_writer_base {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make it so that gguf_writer_file stops trying to write to the file once ok is false. Also please move it upwards so that the writer structs are all next to each other.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please make it so that gguf_writer_file stops trying to write to the file once ok is false.

Not a perfect solution of breaking out of writing the file, but this turns writes effectively into nops.

Also please move it upwards so that the writer structs are all next to each other.

done.

FILE * file;

gguf_writer_file(FILE* file) : file(file) {}

using gguf_writer_base::write;

void write(const int8_t val) override {
const auto real_val = static_cast<uint8_t>(val);
const auto ret = fputc(real_val, file);
written_bytes++;
if (ret != real_val) {
throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
}
}

void write(const std::vector<int8_t> & val) override {
const auto ret = fwrite(val.data(), 1, val.size(), file);
written_bytes += val.size();
if (ret != val.size()) {
throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
}
}

void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
GGML_ASSERT(written_bytes - offset_data == info.offset);

GGML_ASSERT(ggml_is_contiguous(&info.t));
const size_t nbytes = ggml_nbytes(&info.t);

std::vector<int8_t> buf(nbytes);
if (info.t.buffer) {
ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
} else {
GGML_ASSERT(info.t.data);
memcpy(buf.data(), info.t.data, nbytes);
}
write(buf);

pad(alignment);
}
};

template <typename writer_t>
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
const int64_t n_kv = gguf_get_n_kv(ctx);
const int64_t n_tensors = gguf_get_n_tensors(ctx);

Expand Down Expand Up @@ -1321,14 +1385,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
return;
}

const size_t offset_data = gw.buf.size();
const size_t offset_data = gw.written_bytes;

// write tensor data
for (int64_t i = 0; i < n_tensors; ++i) {
gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
}
}

void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
gguf_writer_buf gw(buf);
gguf_write_out(ctx, gw, only_meta);
}

bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = ggml_fopen(fname, "wb");

Expand All @@ -1337,11 +1406,17 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
return false;
}

std::vector<int8_t> buf;
gguf_write_to_buf(ctx, buf, only_meta);
const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size();
try {
gguf_writer_file gw(file);
gguf_write_out(ctx, gw, only_meta);
} catch (const std::runtime_error& ex) {
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
fclose(file);
return false;
}

fclose(file);
return ok;
return true;
}

size_t gguf_get_meta_size(const struct gguf_context * ctx) {
Expand Down
Loading