|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 2, |
| 5 | + "execution_count": 1, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
11 | 11 | },
|
12 | 12 | {
|
13 | 13 | "cell_type": "code",
|
14 |
| - "execution_count": 4, |
| 14 | + "execution_count": 2, |
15 | 15 | "metadata": {},
|
16 | 16 | "outputs": [
|
17 | 17 | {
|
18 | 18 | "data": {
|
19 | 19 | "text/plain": [
|
20 |
| - "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness'])" |
| 20 | + "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness', 'educational_content_template', 'code_review_template', 'customer_service_template', 'writing_quality_template', 'product_review_template', 'medical_info_template', 'api_docs_template'])" |
21 | 21 | ]
|
22 | 22 | },
|
23 |
| - "execution_count": 4, |
| 23 | + "execution_count": 2, |
24 | 24 | "metadata": {},
|
25 | 25 | "output_type": "execute_result"
|
26 | 26 | }
|
|
31 | 31 | },
|
32 | 32 | {
|
33 | 33 | "cell_type": "code",
|
34 |
| - "execution_count": 5, |
| 34 | + "execution_count": 3, |
35 | 35 | "metadata": {},
|
36 | 36 | "outputs": [],
|
37 | 37 | "source": [
|
38 |
| - "judge = Judge.from_url(base_url=\"http://localhost:8080\", model=\"qwen2\")" |
| 38 | + "judge = Judge.from_url(base_url=\"http://localhost:8080\")" |
39 | 39 | ]
|
40 | 40 | },
|
41 | 41 | {
|
42 | 42 | "cell_type": "code",
|
43 |
| - "execution_count": null, |
| 43 | + "execution_count": 4, |
44 | 44 | "metadata": {},
|
45 | 45 | "outputs": [],
|
46 | 46 | "source": [
|
|
50 | 50 | },
|
51 | 51 | {
|
52 | 52 | "cell_type": "code",
|
53 |
| - "execution_count": 11, |
| 53 | + "execution_count": 5, |
54 | 54 | "metadata": {},
|
55 | 55 | "outputs": [
|
56 | 56 | {
|
|
63 | 63 | " 'raw_response': '{\\n \"decision\": false,\\n \"reasoning\": \"The response lacks a professional tone and is informal. It uses casual language and lacks context or formal structure.\",\\n \"score\": null\\n}'}}"
|
64 | 64 | ]
|
65 | 65 | },
|
66 |
| - "execution_count": 11, |
| 66 | + "execution_count": 5, |
67 | 67 | "metadata": {},
|
68 | 68 | "output_type": "execute_result"
|
69 | 69 | }
|
|
74 | 74 | },
|
75 | 75 | {
|
76 | 76 | "cell_type": "code",
|
77 |
| - "execution_count": 12, |
| 77 | + "execution_count": 6, |
78 | 78 | "metadata": {},
|
79 | 79 | "outputs": [
|
80 | 80 | {
|
|
87 | 87 | " 'raw_response': '{\\n \"decision\": 5,\\n \"reasoning\": \"The response lacks a formal and professional tone. It uses informal language and an interrogative form which is not typical in professional communication.\",\\n \"score\": 5\\n}'}}"
|
88 | 88 | ]
|
89 | 89 | },
|
90 |
| - "execution_count": 12, |
| 90 | + "execution_count": 6, |
91 | 91 | "metadata": {},
|
92 | 92 | "output_type": "execute_result"
|
93 | 93 | }
|
|
101 | 101 | },
|
102 | 102 | {
|
103 | 103 | "cell_type": "code",
|
104 |
| - "execution_count": 13, |
| 104 | + "execution_count": 7, |
105 | 105 | "metadata": {},
|
106 | 106 | "outputs": [
|
107 | 107 | {
|
|
114 | 114 | " 'raw_response': '{\\n \"decision\": 5,\\n \"reasoning\": \"The response is somewhat direct and to the point, but lacks formality and context typically expected in a professional setting.\",\\n \"score\": 5\\n}'}}"
|
115 | 115 | ]
|
116 | 116 | },
|
117 |
| - "execution_count": 13, |
| 117 | + "execution_count": 7, |
118 | 118 | "metadata": {},
|
119 | 119 | "output_type": "execute_result"
|
120 | 120 | }
|
|
132 | 132 | },
|
133 | 133 | {
|
134 | 134 | "cell_type": "code",
|
135 |
| - "execution_count": 14, |
| 135 | + "execution_count": 8, |
136 | 136 | "metadata": {},
|
137 | 137 | "outputs": [],
|
138 | 138 | "source": [
|
|
146 | 146 | },
|
147 | 147 | {
|
148 | 148 | "cell_type": "code",
|
149 |
| - "execution_count": 15, |
| 149 | + "execution_count": 9, |
150 | 150 | "metadata": {},
|
151 | 151 | "outputs": [
|
152 | 152 | {
|
|
159 | 159 | " 'raw_response': '{\\n \"decision\": \"moderate\",\\n \"reasoning\": \"The email lacks formal language and a clear request, which is more appropriate for a professional setting. It is direct but informal.\",\\n \"score\": 5\\n}'}}"
|
160 | 160 | ]
|
161 | 161 | },
|
162 |
| - "execution_count": 15, |
| 162 | + "execution_count": 9, |
163 | 163 | "metadata": {},
|
164 | 164 | "output_type": "execute_result"
|
165 | 165 | }
|
|
172 | 172 | },
|
173 | 173 | {
|
174 | 174 | "cell_type": "code",
|
175 |
| - "execution_count": null, |
| 175 | + "execution_count": 10, |
176 | 176 | "metadata": {},
|
177 | 177 | "outputs": [
|
178 | 178 | {
|
179 | 179 | "data": {
|
180 | 180 | "text/plain": [
|
181 | 181 | "{'decision': 'non-professional',\n",
|
182 |
| - " 'reasoning': 'The response lacks clarity and formality, which are key elements of a professional tone.',\n", |
183 |
| - " 'score': 4.0,\n", |
| 182 | + " 'reasoning': 'The response uses informal and expletive language, which is not appropriate for a professional context.',\n", |
| 183 | + " 'score': 1.0,\n", |
184 | 184 | " 'metadata': {'model': 'qwen2',\n",
|
185 |
| - " 'raw_response': '{\\n \"decision\": \"non-professional\",\\n \"reasoning\": \"The response lacks clarity and formality, which are key elements of a professional tone.\",\\n \"score\": 4\\n}'}}" |
| 185 | + " 'raw_response': '{\\n \"decision\": \"non-professional\",\\n \"reasoning\": \"The response uses informal and expletive language, which is not appropriate for a professional context.\",\\n \"score\": 1\\n}'}}" |
186 | 186 | ]
|
187 | 187 | },
|
188 |
| - "execution_count": 17, |
| 188 | + "execution_count": 10, |
189 | 189 | "metadata": {},
|
190 | 190 | "output_type": "execute_result"
|
191 | 191 | }
|
|
0 commit comments