From 68cc4002b8a6f7a704225e79b6321fd8c610af06 Mon Sep 17 00:00:00 2001 From: Pawel Date: Fri, 8 Aug 2025 14:44:37 +0200 Subject: [PATCH 1/2] added jinja2 extension class to enable Phi4-reasoning loading --- src/llm/servable_initializer.cpp | 50 +++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index 431e5a54f8..d2d9b0814b 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -61,13 +61,61 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr jinja2.nodes.CallBlock: + lineno = next(parser.stream).lineno + body = parser.parse_statements(["name:endgeneration"], drop_needle=True) + return jinja2.nodes.CallBlock(self.call_method("_generation_support"), [], [], body).set_lineno(lineno) + + @jinja2.pass_eval_context + def _generation_support(self, context: jinja2.nodes.EvalContext, caller: jinja2.runtime.Macro) -> str: + rv = caller() + if self.is_active(): + # Only track generation indices if the tracker is active + start_index = len("".join(self._rendered_blocks)) + end_index = start_index + len(rv) + self._generation_indices.append((start_index, end_index)) + return rv + + def is_active(self) -> bool: + return self._rendered_blocks or self._generation_indices + + @contextmanager + def activate_tracker(self, rendered_blocks: list[int], generation_indices: list[int]): + try: + if self.is_active(): + raise ValueError("AssistantTracker should not be reused before closed") + self._rendered_blocks = rendered_blocks + self._generation_indices = generation_indices + + yield + finally: + self._rendered_blocks = None + self._generation_indices = None + + # Default chat template accepts only single message and outputs only it's 'content' # effectively turning it into a regular prompt. default_chat_template = "{% if messages|length != 1 %} {{ raise_exception('This servable accepts only single message requests') }}{% endif %}{{ messages[0]['content'] }}" @@ -83,7 +131,7 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr Date: Mon, 11 Aug 2025 06:47:34 +0100 Subject: [PATCH 2/2] Added comment about the source of the new class --- src/llm/servable_initializer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index d2d9b0814b..edcbcb3a67 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -72,7 +72,9 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr