Skip to content

Commit e090ff1

Browse files
authored
Accuracy improvements in phi4 response parser (openvinotoolkit#3448)
1 parent a321bda commit e090ff1

File tree

3 files changed

+89
-30
lines changed

3 files changed

+89
-30
lines changed

src/llm/response_parsers/phi4_response_parser.cpp

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,17 @@ ParsedResponse Phi4ResponseParser::parse(const std::vector<int64_t>& generatedTo
3939
// Phi4 with vLLM template produces tool calls in the format:
4040
// functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
4141
std::string decoded = tokenizer.decode(generatedTokens);
42-
std::regex toolRegex(R"(functools\[(.*?)\])");
43-
std::sregex_iterator begin(decoded.begin(), decoded.end(), toolRegex);
44-
std::sregex_iterator end;
45-
size_t matchCount = std::distance(begin, end);
46-
47-
if (matchCount == 0) {
48-
parsedResponse.content = decoded;
49-
} else if (matchCount == 1) {
50-
std::smatch match = *begin;
51-
// Put everything, but functools[...] part into the response content
52-
parsedResponse.content = decoded.substr(0, match.position()) +
53-
decoded.substr(match.position() + match.length());
54-
55-
std::string toolsStr = match[1].str();
56-
std::string toolsJson = "{\"functools\": [" + toolsStr + "]}"; // Wrap in JSON array
57-
42+
std::string toolsStartString = "functools";
43+
size_t toolsStartPos = decoded.find(toolsStartString);
44+
if (toolsStartPos != std::string::npos) {
45+
// Extract the content before the tools part
46+
parsedResponse.content = decoded.substr(0, toolsStartPos);
47+
// Extract the tools part, assuming it's all the remaining content after "functools"
48+
std::string toolsString = decoded.substr(toolsStartPos + toolsStartString.length());
5849
rapidjson::Document toolsDoc;
59-
toolsDoc.Parse(toolsJson.c_str());
60-
if (!toolsDoc.HasParseError() && toolsDoc.IsObject() && toolsDoc.HasMember("functools") && toolsDoc["functools"].IsArray()) {
61-
const rapidjson::Value& toolsArray = toolsDoc["functools"];
62-
for (auto& toolVal : toolsArray.GetArray()) {
50+
toolsDoc.Parse(toolsString.c_str());
51+
if (!toolsDoc.HasParseError() && toolsDoc.IsArray()) {
52+
for (auto& toolVal : toolsDoc.GetArray()) {
6353
if (!toolVal.IsObject()) {
6454
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool call is not a valid JSON object");
6555
continue;
@@ -81,10 +71,11 @@ ParsedResponse Phi4ResponseParser::parse(const std::vector<int64_t>& generatedTo
8171
parsedResponse.toolCalls.push_back(toolCall);
8272
}
8373
} else {
84-
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse toolsJson or extract tools array");
74+
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Failed to parse functools content or extract tools array");
75+
parsedResponse.content = decoded; // If parsing fails, return the whole decoded content
8576
}
8677
} else {
87-
throw std::runtime_error("Multiple 'functools[...]' matches found in the response.");
78+
parsedResponse.content = decoded;
8879
}
8980
return parsedResponse;
9081
}

src/test/llm/response_parsers/phi4_response_parser_test.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,29 @@ TEST_F(Phi4ResponseParserTest, ParseToolCallOutputWithContentAndSingleToolCall)
116116
EXPECT_EQ(parsedResponse.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}");
117117
EXPECT_EQ(parsedResponse.toolCalls[0].id.empty(), false); // ID should be generated
118118
}
119-
TEST_F(Phi4ResponseParserTest, ParseToolCallOutputWithMultipleFunctoolsThrows) {
119+
TEST_F(Phi4ResponseParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsContentOnly) {
120120
std::string input = "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]";
121121
auto generatedTensor = tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
122122
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
123-
EXPECT_THROW({
124-
responseParser->parse(generatedTokens);
125-
},
126-
std::runtime_error);
123+
ParsedResponse parsedResponse = responseParser->parse(generatedTokens);
124+
// Content after 'functools' cannot be parsed as array of JSON objects, so it is treated as content
125+
EXPECT_EQ(parsedResponse.content, "functools[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}]\n\nThis is some content\n\nfunctools[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]");
126+
EXPECT_EQ(parsedResponse.reasoning, "");
127+
EXPECT_EQ(parsedResponse.reasoningTokenCount, 0);
128+
ASSERT_EQ(parsedResponse.toolCalls.size(), 0); // No valid tool calls parsed
129+
}
130+
131+
TEST_F(Phi4ResponseParserTest, ParseToolCallOutputWithArrayArguments) {
132+
std::string input = "functools[{\"name\": \"extractLastTransactionId\", \"arguments\": { \"filepath\": \"/var/log/db.log\", \"status\": [\"completed\", \"failed\"], \"encoding\": \"utf-8\", \"processFunction\": \"processFunction\"}}]";
133+
auto generatedTensor = tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
134+
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
135+
ParsedResponse parsedResponse = responseParser->parse(generatedTokens);
136+
EXPECT_EQ(parsedResponse.content, "");
137+
EXPECT_EQ(parsedResponse.reasoning, "");
138+
EXPECT_EQ(parsedResponse.reasoningTokenCount, 0);
139+
ASSERT_EQ(parsedResponse.toolCalls.size(), 1);
140+
EXPECT_EQ(parsedResponse.toolCalls[0].name, "extractLastTransactionId");
141+
// Parser removes whitespaces, so we expect arguments value to be without spaces
142+
EXPECT_EQ(parsedResponse.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}");
143+
EXPECT_EQ(parsedResponse.toolCalls[0].id.empty(), false); // ID should be generated
127144
}

windows_prepare_llm_models.bat

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,23 @@ set "RERANK_MODEL=BAAI/bge-reranker-base"
3333
set "TEXT_GENERATION_MODEL=facebook/opt-125m"
3434
set "VLM_MODEL=OpenGVLab/InternVL2-1B"
3535

36-
if exist "%~1\%TEXT_GENERATION_MODEL%" if exist "%~1\%EMBEDDING_MODEL%" if exist "%~1\%EMBEDDING_MODEL%\ov" if exist "%~1\%RERANK_MODEL%" if exist "%~1\%VLM_MODEL%" (
37-
echo Models directory %~1 exists. Skipping downloading models.
36+
:: Models for tools testing. Only tokenizers are downloaded.
37+
set "QWEN3_MODEL=Qwen/Qwen3-8B"
38+
set "LLAMA3_MODEL=meta-llama/Llama-3.1-8B-Instruct"
39+
set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B"
40+
set "PHI4_MODEL=microsoft/Phi-4-mini-instruct"
41+
42+
set MODELS_LIST=%TEXT_GENERATION_MODEL% %EMBEDDING_MODEL% %EMBEDDING_MODEL%\ov %RERANK_MODEL% %VLM_MODEL% %QWEN3_MODEL% %LLAMA3_MODEL% %HERMES3_MODEL% %PHI4_MODEL%
43+
44+
set "ALL_EXIST=1"
45+
for %%M in ("%MODELS_LIST%") do (
46+
if not exist "%~1\%%~M" (
47+
set "ALL_EXIST=0"
48+
)
49+
)
50+
51+
if "!ALL_EXIST!"=="1" (
52+
echo All required models exist in %~1. Skipping downloading models.
3853
exit /b 0
3954
)
4055

@@ -97,7 +112,43 @@ if exist "%~1\%VLM_MODEL%" (
97112
echo Models directory %~1\%VLM_MODEL% exists. Skipping downloading models.
98113
) else (
99114
echo Downloading visual language model to %~1\%VLM_MODEL% directory.
100-
python demos\common\export_models\export_model.py text_generation --pipeline_type VISUAL_LANGUAGE_MODEL --source_model "%VLM_MODEL%" --weight-format int4 --kv_cache_precision u8 --model_repository_path %~1
115+
python demos\common\export_models\export_model.py text_generation --source_model "%VLM_MODEL%" --weight-format int4 --kv_cache_precision u8 --model_repository_path %~1
116+
if !errorlevel! neq 0 exit /b !errorlevel!
117+
)
118+
119+
if exist "%~1\%QWEN3_MODEL%" (
120+
echo Models directory %~1\%QWEN3_MODEL% exists. Skipping downloading models.
121+
) else (
122+
echo Downloading tokenizer and detokenizer for Qwen3 model to %~1\%QWEN3_MODEL% directory.
123+
mkdir "%~1\%QWEN3_MODEL%"
124+
convert_tokenizer "%QWEN3_MODEL%" --with_detokenizer -o "%~1\%QWEN3_MODEL%"
125+
if !errorlevel! neq 0 exit /b !errorlevel!
126+
)
127+
128+
if exist "%~1\%LLAMA3_MODEL%" (
129+
echo Models directory %~1\%LLAMA3_MODEL% exists. Skipping downloading models.
130+
) else (
131+
echo Downloading tokenizer and detokenizer for Llama3.1 model to %~1\%LLAMA3_MODEL% directory.
132+
mkdir "%~1\%LLAMA3_MODEL%"
133+
convert_tokenizer "%LLAMA3_MODEL%" --with_detokenizer -o "%~1\%LLAMA3_MODEL%"
134+
if !errorlevel! neq 0 exit /b !errorlevel!
135+
)
136+
137+
if exist "%~1\%HERMES3_MODEL%" (
138+
echo Models directory %~1\%HERMES3_MODEL% exists. Skipping downloading models.
139+
) else (
140+
echo Downloading tokenizer and detokenizer for Hermes3 model to %~1\%HERMES3_MODEL% directory.
141+
mkdir "%~1\%HERMES3_MODEL%"
142+
convert_tokenizer "%HERMES3_MODEL%" --with_detokenizer -o "%~1\%HERMES3_MODEL%"
143+
if !errorlevel! neq 0 exit /b !errorlevel!
144+
)
145+
146+
if exist "%~1\%PHI4_MODEL%" (
147+
echo Models directory %~1\%PHI4_MODEL% exists. Skipping downloading models.
148+
) else (
149+
echo Downloading tokenizer and detokenizer for Phi-4 model to %~1\%PHI4_MODEL% directory.
150+
mkdir "%~1\%PHI4_MODEL%"
151+
convert_tokenizer "%PHI4_MODEL%" --with_detokenizer -o "%~1\%PHI4_MODEL%"
101152
if !errorlevel! neq 0 exit /b !errorlevel!
102153
)
103154

0 commit comments

Comments
 (0)