Skip to content

Commit 3efbb74

Browse files
committed
code review cleanup
1 parent 0d9725c commit 3efbb74

File tree

2 files changed

+3
-12
lines changed

2 files changed

+3
-12
lines changed

src/llama-graph.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,6 @@ void llm_graph_input_rs::set_input(const llama_ubatch * ubatch) {
238238
const int64_t n_rs = mctx->get_n_rs();
239239

240240
if (s_copy) {
241-
// Check if buffer was allocated - skip if not
242-
if (s_copy->buffer == nullptr) {
243-
return;
244-
}
245241
GGML_ASSERT(ggml_backend_buffer_is_host(s_copy->buffer));
246242
int32_t * data = (int32_t *) s_copy->data;
247243

src/llama-model.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3751,14 +3751,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
37513751
case LLM_ARCH_NEMOTRON_H:
37523752
{
37533753
const int64_t d_conv = hparams.ssm_d_conv;
3754-
// Nemotron-H uses 12288 for conv1d tensors, not the standard 15680
3755-
const int64_t d_inner = 12288; // Override: actual conv1d tensor dimension
37563754
const int64_t d_state = hparams.ssm_d_state;
3757-
const int64_t n_head = hparams.ssm_dt_rank;
37583755
const int64_t n_group = hparams.ssm_n_group;
37593756
// Use actual dimension from model: 22656 instead of calculated 22608
3760-
const int64_t d_in_proj = 22656; // 2*d_inner + 2*n_group*d_state + n_head + 48;
3761-
const int64_t d_x_part = d_inner + 2*n_group*d_state; // x1 + B + C
3757+
const int64_t d_in_proj = 22656;
37623758

37633759
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
37643760

@@ -11688,8 +11684,7 @@ struct llm_build_nemotron_h : public llm_graph_context_mamba {
1168811684
ggml_tensor * cur,
1168911685
const llama_model & model,
1169011686
const llama_ubatch & ubatch,
11691-
int il,
11692-
ggml_cgraph * gf) const {
11687+
int il) const {
1169311688
// Reuse the Mamba-2 implementation which handles FP32 conv + SSM states
1169411689
return build_mamba2_layer(inp, cur, model, ubatch, il);
1169511690
}
@@ -11712,7 +11707,7 @@ struct llm_build_nemotron_h : public llm_graph_context_mamba {
1171211707
// Nemotron-H hybrid layer logic based on schedule
1171311708
if (hparams.is_recurrent(il)) {
1171411709
// SSM/Mamba layer - use Nemotron-H specific implementation
11715-
cur = build_nemotron_h_ssm_layer(inp_hybrid->get_recr(), cur, model, ubatch, il, gf);
11710+
cur = build_nemotron_h_ssm_layer(inp_hybrid->get_recr(), cur, model, ubatch, il);
1171611711
} else {
1171711712
// Attention layer if KV heads are present (per schedule)
1171811713
const bool is_attention_layer = hparams.n_head_kv(il) > 0;

0 commit comments

Comments
 (0)