Skip to content

Commit 4661940

Browse files
committed
Fix blk sequence bug and incorporate CISC reccomendations
1 parent a36e0e1 commit 4661940

File tree

2 files changed

+15
-17
lines changed

2 files changed

+15
-17
lines changed

src/llama-quant.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static void zeros(std::ofstream & file, size_t n) {
2626
}
2727
}
2828

29-
static std::string remap_layer(const std::string & orig_name, const std::vector<int>& prune, std::map<int, std::string>& mapped, int& next_id) {
29+
static std::string remap_layer(const std::string & orig_name, const std::vector<int> & prune, std::map<int, std::string> & mapped, int & next_id) {
3030
if (prune.empty()) {
3131
return orig_name;
3232
}
@@ -39,7 +39,7 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
3939
if (mapped.count(blk)) {
4040
// Already mapped, do nothing
4141
} else if (std::find(prune.begin(), prune.end(), blk) != prune.end()) {
42-
mapped[blk] = "X";
42+
mapped[blk] = "";
4343
} else if (blk < prune.front()) {
4444
mapped[blk] = std::to_string(blk);
4545
next_id = blk + 1;
@@ -48,13 +48,13 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
4848
++next_id;
4949
}
5050

51-
return mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
51+
return mapped[blk].empty() ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
5252
}
5353

5454
return orig_name;
5555
}
5656

57-
static std::string remap_imatrix (const std::string & orig_name, const std::map<int, std::string>& mapped) {
57+
static std::string remap_imatrix (const std::string & orig_name, const std::map<int, std::string> & mapped) {
5858
if (mapped.empty()) {
5959
return orig_name;
6060
}
@@ -628,7 +628,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
628628
gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
629629

630630
if (!prune_list.empty()) {
631-
gguf_set_val_bool(ctx_out.get(), "general.pruned", true);
632631
uint32_t block_count = 0;
633632
ml.get_key(LLM_KV_BLOCK_COUNT, block_count);
634633
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count - prune_list.size());
@@ -667,10 +666,11 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
667666
tensors.reserve(ml.weights_map.size());
668667
for (const auto & it : ml.weights_map) {
669668
const std::string remapped_name(remap_layer(it.first, prune_list, mapped, next_blk_id));
670-
if (remapped_name == "X") {
671-
if (it.first.find("attn_v.weight") != std::string::npos ||
672-
it.first.find("attn_qkv.weight") != std::string::npos ||
673-
it.first.find("attn_kv_b.weight")!= std::string::npos) {
669+
if (remapped_name.empty()) {
670+
if (false
671+
|| it.first.find("attn_v.weight") != std::string::npos
672+
|| it.first.find("attn_qkv.weight") != std::string::npos
673+
|| it.first.find("attn_kv_b.weight")!= std::string::npos) {
674674
pruned_attention_w++;
675675
}
676676
LLAMA_LOG_DEBUG("%s: prunning tensor %s\n", __func__, it.first.c_str());

tools/quantize/quantize.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -293,24 +293,22 @@ static bool parse_layer_prune(const char * data, std::vector<int> & prune_layers
293293
}
294294

295295
const auto block_ids = string_split<std::string>(data, ',');
296-
297-
for ( const auto & block_id : block_ids) {
298-
296+
for (const auto & block_id : block_ids) {
297+
int id;
299298
try {
300-
std::stoi(block_id);
299+
id = std::stoi(block_id);
301300
} catch (...) {
302-
printf("%s: invalid layer id '%s'\n\n", __func__, block_id.c_str());
303-
return false;
301+
id = -1;
304302
}
305-
306-
int id = std::stoi(block_id);
307303
if (id < 0) {
308304
printf("\n%s: invalid layer id '%s'\n\n", __func__, block_id.c_str());
309305
return false;
310306
}
311307
prune_layers.emplace_back(id);
312308
}
313309

310+
sort(prune_layers.begin(), prune_layers.end());
311+
prune_layers.erase(std::unique(prune_layers.begin(), prune_layers.end()), prune_layers.end());
314312
return true;
315313
}
316314

0 commit comments

Comments
 (0)