Skip to content

Commit 7b33d03

Browse files
author
lexasub
committed
tool: fix convertation of text/parquet to custom format
1 parent bff2db8 commit 7b33d03

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

tools/dataset-converter/convert-to-train-gguf.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,10 @@ int main(int argc, char ** argv) {
100100
llama_gguf_converter converter;
101101
bool success = converter.llama_gguf_converter_convert(params, model);
102102

103-
// Clean up llama model
104-
llama_model_free(model);
105-
llama_backend_free();
106-
107103
if (!success) {
108104
fprintf(stderr, "error: GGUF conversion failed.\n");
105+
llama_model_free(model); // Free model on conversion failure
106+
llama_backend_free();
109107
return 1;
110108
}
111109

@@ -120,6 +118,8 @@ int main(int argc, char ** argv) {
120118

121119
if (!reader.llama_gguf_reader_is_initialized()) {
122120
fprintf(stderr, "error: llama_gguf_reader failed to initialize for preview.\n");
121+
llama_model_free(model); // Free model before exiting
122+
llama_backend_free();
123123
return 1;
124124
}
125125

@@ -153,14 +153,19 @@ int main(int argc, char ** argv) {
153153
std::string detokenized_text = "";
154154
// Buffer for a single token
155155
std::array<char, 256> piece_buf; // Large enough buffer for a single token
156-
for (llama_token token : sequence_tokens) {
157-
int n_chars = llama_token_to_piece(llama_model_get_vocab(model), token,
158-
piece_buf.data(), piece_buf.size(), 1, false);
159-
if (n_chars > 0) {
160-
detokenized_text.append(piece_buf.data(), n_chars);
156+
// Ensure model is valid before calling llama_model_get_vocab
157+
if (model != nullptr) {
158+
for (llama_token token : sequence_tokens) {
159+
int n_chars = llama_token_to_piece(llama_model_get_vocab(model), token,
160+
piece_buf.data(), piece_buf.size(), 1, false);
161+
if (n_chars > 0) {
162+
detokenized_text.append(piece_buf.data(), n_chars);
163+
}
161164
}
165+
printf(" Detokenized: \"%s\"\n", detokenized_text.c_str());
166+
} else {
167+
fprintf(stderr, " Warning: Cannot detokenize preview, model is null.\n");
162168
}
163-
printf(" Detokenized: \"%s\"\n", detokenized_text.c_str());
164169
}
165170

166171
} else {
@@ -173,10 +178,16 @@ int main(int argc, char ** argv) {
173178

174179
} catch (const std::runtime_error & e) {
175180
fprintf(stderr, "error: GGUF preview failed: %s\n", e.what());
181+
llama_model_free(model); // Free model before exiting
182+
llama_backend_free();
176183
return 1;
177184
}
178185
printf("--- End of GGUF file preview ---\n");
179186
}
180187

188+
// Clean up llama model and backend after all usage
189+
llama_model_free(model);
190+
llama_backend_free();
191+
181192
return 0;
182193
}

0 commit comments

Comments
 (0)