@@ -63,6 +63,7 @@ def run_tool_calling_llm(llm, request_params):
63
63
accumulated_deltas = {}
64
64
language = None
65
65
code = ""
66
+ function_call_detected = False
66
67
67
68
for chunk in llm .completions (** request_params ):
68
69
if "choices" not in chunk or len (chunk ["choices" ]) == 0 :
@@ -73,6 +74,8 @@ def run_tool_calling_llm(llm, request_params):
73
74
74
75
# Convert tool call into function call, which we have great parsing logic for below
75
76
if "tool_calls" in delta and delta ["tool_calls" ]:
77
+ function_call_detected = True
78
+
76
79
# import pdb; pdb.set_trace()
77
80
if len (delta ["tool_calls" ]) > 0 and delta ["tool_calls" ][0 ].function :
78
81
delta = {
@@ -87,7 +90,40 @@ def run_tool_calling_llm(llm, request_params):
87
90
accumulated_deltas = merge_deltas (accumulated_deltas , delta )
88
91
89
92
if "content" in delta and delta ["content" ]:
90
- yield {"type" : "message" , "content" : delta ["content" ]}
93
+ if function_call_detected :
94
+ # More content after a code block? This is a code review by a judge layer.
95
+
96
+ # print("Code safety review:", delta["content"])
97
+
98
+ if review_category == None :
99
+ accumulated_review += delta ["content" ]
100
+
101
+ if "<unsafe>" in accumulated_review :
102
+ review_category = "unsafe"
103
+ if "<warning>" in accumulated_review :
104
+ review_category = "warning"
105
+ if "<safe>" in accumulated_review :
106
+ review_category = "safe"
107
+
108
+ if review_category != None :
109
+ for tag in [
110
+ "<safe>" ,
111
+ "</safe>" ,
112
+ "<warning>" ,
113
+ "</warning>" ,
114
+ "<unsafe>" ,
115
+ "</unsafe>" ,
116
+ ]:
117
+ delta ["content" ] = delta ["content" ].replace (tag , "" )
118
+
119
+ yield {
120
+ "type" : "review" ,
121
+ "format" : review_category ,
122
+ "content" : delta ["content" ],
123
+ }
124
+
125
+ else :
126
+ yield {"type" : "message" , "content" : delta ["content" ]}
91
127
92
128
if (
93
129
accumulated_deltas .get ("function_call" )
0 commit comments