llama : add classigication head (wip) [no ci]

ggerganov · ggerganov · commit 152e90331e03 · 2024-09-18T21:20:21.000+03:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -391,7 +391,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
         [](gpt_params & params) {
             params.verbose_prompt = true;
         }
-    ).set_examples({LLAMA_EXAMPLE_MAIN}));
+    ));
     add_opt(llama_arg(
         {"--no-display-prompt"},
         format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"),
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -11291,8 +11291,20 @@ struct llm_build_context {
             inpL = cur;
         }
 
-        // final output
         cur = inpL;
+
+        // classification head
+        // https://github.com/huggingface/transformers/blob/5af7d41e49bbfc8319f462eb45253dcb3863dfb7/src/transformers/models/roberta/modeling_roberta.py#L1566
+        // TODO: become pooling layer?
+        if (model.cls) {
+            cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.cls, cur), model.cls_b);
+
+            cur = ggml_tanh(ctx0, cur);
+
+            cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.cls_out, cur), model.cls_out_b);
+            // TODO: cur is now a scalar - what to do?
+        }
+
         cb(cur, "result_embd", -1);
 
         ggml_build_forward_expand(gf, cur);

Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,`
`391`	`391`	`[](gpt_params & params) {`
`392`	`392`	`params.verbose_prompt = true;`
`393`	`393`	`}`
`394`		`- ).set_examples({LLAMA_EXAMPLE_MAIN}));`
	`394`	`+ ));`
`395`	`395`	`add_opt(llama_arg(`
`396`	`396`	`{"--no-display-prompt"},`
`397`	`397`	`format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"),`