trustyai-explainability
diff --git a/‎examples/basic_test.ipynb
Lines changed: 21 additions & 21 deletions b/‎examples/basic_test.ipynb
Lines changed: 21 additions & 21 deletions
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,16 +11,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness'])"
+       "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness', 'educational_content_template', 'code_review_template', 'customer_service_template', 'writing_quality_template', 'product_review_template', 'medical_info_template', 'api_docs_template'])"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -31,16 +31,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "judge = Judge.from_url(base_url=\"http://localhost:8080\", model=\"qwen2\")"
+    "judge = Judge.from_url(base_url=\"http://localhost:8080\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -63,7 +63,7 @@
        "  'raw_response': '{\\n    \"decision\": false,\\n    \"reasoning\": \"The response lacks a professional tone and is informal. It uses casual language and lacks context or formal structure.\",\\n    \"score\": null\\n}'}}"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -74,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -87,7 +87,7 @@
        "  'raw_response': '{\\n    \"decision\": 5,\\n    \"reasoning\": \"The response lacks a formal and professional tone. It uses informal language and an interrogative form which is not typical in professional communication.\",\\n    \"score\": 5\\n}'}}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -101,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -114,7 +114,7 @@
        "  'raw_response': '{\\n    \"decision\": 5,\\n    \"reasoning\": \"The response is somewhat direct and to the point, but lacks formality and context typically expected in a professional setting.\",\\n    \"score\": 5\\n}'}}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -132,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -146,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -159,7 +159,7 @@
        "  'raw_response': '{\\n    \"decision\": \"moderate\",\\n    \"reasoning\": \"The email lacks formal language and a clear request, which is more appropriate for a professional setting. It is direct but informal.\",\\n    \"score\": 5\\n}'}}"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -172,20 +172,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'decision': 'non-professional',\n",
-       " 'reasoning': 'The response lacks clarity and formality, which are key elements of a professional tone.',\n",
-       " 'score': 4.0,\n",
+       " 'reasoning': 'The response uses informal and expletive language, which is not appropriate for a professional context.',\n",
+       " 'score': 1.0,\n",
        " 'metadata': {'model': 'qwen2',\n",
-       "  'raw_response': '{\\n    \"decision\": \"non-professional\",\\n    \"reasoning\": \"The response lacks clarity and formality, which are key elements of a professional tone.\",\\n    \"score\": 4\\n}'}}"
+       "  'raw_response': '{\\n    \"decision\": \"non-professional\",\\n    \"reasoning\": \"The response uses informal and expletive language, which is not appropriate for a professional context.\",\\n    \"score\": 1\\n}'}}"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`"cells": [`
`3`	`3`	`{`
`4`	`4`	`"cell_type": "code",`
`5`		`- "execution_count": 2,`
	`5`	`+ "execution_count": 1,`
`6`	`6`	`"metadata": {},`
`7`	`7`	`"outputs": [],`
`8`	`8`	`"source": [`
`@@ -11,16 +11,16 @@`
`11`	`11`	`},`
`12`	`12`	`{`
`13`	`13`	`"cell_type": "code",`
`14`		`- "execution_count": 4,`
	`14`	`+ "execution_count": 2,`
`15`	`15`	`"metadata": {},`
`16`	`16`	`"outputs": [`
`17`	`17`	`{`
`18`	`18`	`"data": {`
`19`	`19`	`"text/plain": [`
`20`		`- "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness'])"`
	`20`	`+ "dict_keys(['helpfulness', 'accuracy', 'clarity', 'conciseness', 'relevance', 'safety', 'toxicity', 'code_quality', 'code_security', 'creativity', 'professionalism', 'educational_value', 'preference', 'appropriate', 'factual', 'medical_accuracy', 'legal_appropriateness', 'educational_content_template', 'code_review_template', 'customer_service_template', 'writing_quality_template', 'product_review_template', 'medical_info_template', 'api_docs_template'])"`
`21`	`21`	`]`
`22`	`22`	`},`
`23`		`- "execution_count": 4,`
	`23`	`+ "execution_count": 2,`
`24`	`24`	`"metadata": {},`
`25`	`25`	`"output_type": "execute_result"`
`26`	`26`	`}`
`@@ -31,16 +31,16 @@`
`31`	`31`	`},`
`32`	`32`	`{`
`33`	`33`	`"cell_type": "code",`
`34`		`- "execution_count": 5,`
	`34`	`+ "execution_count": 3,`
`35`	`35`	`"metadata": {},`
`36`	`36`	`"outputs": [],`
`37`	`37`	`"source": [`
`38`		`- "judge = Judge.from_url(base_url=\"http://localhost:8080\", model=\"qwen2\")"`
	`38`	`+ "judge = Judge.from_url(base_url=\"http://localhost:8080\")"`
`39`	`39`	`]`
`40`	`40`	`},`
`41`	`41`	`{`
`42`	`42`	`"cell_type": "code",`
`43`		`- "execution_count": null,`
	`43`	`+ "execution_count": 4,`
`44`	`44`	`"metadata": {},`
`45`	`45`	`"outputs": [],`
`46`	`46`	`"source": [`
`@@ -50,7 +50,7 @@`
`50`	`50`	`},`
`51`	`51`	`{`
`52`	`52`	`"cell_type": "code",`
`53`		`- "execution_count": 11,`
	`53`	`+ "execution_count": 5,`
`54`	`54`	`"metadata": {},`
`55`	`55`	`"outputs": [`
`56`	`56`	`{`
`@@ -63,7 +63,7 @@`
`63`	`63`	`" 'raw_response': '{\\n \"decision\": false,\\n \"reasoning\": \"The response lacks a professional tone and is informal. It uses casual language and lacks context or formal structure.\",\\n \"score\": null\\n}'}}"`
`64`	`64`	`]`
`65`	`65`	`},`
`66`		`- "execution_count": 11,`
	`66`	`+ "execution_count": 5,`
`67`	`67`	`"metadata": {},`
`68`	`68`	`"output_type": "execute_result"`
`69`	`69`	`}`
`@@ -74,7 +74,7 @@`
`74`	`74`	`},`
`75`	`75`	`{`
`76`	`76`	`"cell_type": "code",`
`77`		`- "execution_count": 12,`
	`77`	`+ "execution_count": 6,`
`78`	`78`	`"metadata": {},`
`79`	`79`	`"outputs": [`
`80`	`80`	`{`
`@@ -87,7 +87,7 @@`
`87`	`87`	`" 'raw_response': '{\\n \"decision\": 5,\\n \"reasoning\": \"The response lacks a formal and professional tone. It uses informal language and an interrogative form which is not typical in professional communication.\",\\n \"score\": 5\\n}'}}"`
`88`	`88`	`]`
`89`	`89`	`},`
`90`		`- "execution_count": 12,`
	`90`	`+ "execution_count": 6,`
`91`	`91`	`"metadata": {},`
`92`	`92`	`"output_type": "execute_result"`
`93`	`93`	`}`
`@@ -101,7 +101,7 @@`
`101`	`101`	`},`
`102`	`102`	`{`
`103`	`103`	`"cell_type": "code",`
`104`		`- "execution_count": 13,`
	`104`	`+ "execution_count": 7,`
`105`	`105`	`"metadata": {},`
`106`	`106`	`"outputs": [`
`107`	`107`	`{`
`@@ -114,7 +114,7 @@`
`114`	`114`	`" 'raw_response': '{\\n \"decision\": 5,\\n \"reasoning\": \"The response is somewhat direct and to the point, but lacks formality and context typically expected in a professional setting.\",\\n \"score\": 5\\n}'}}"`
`115`	`115`	`]`
`116`	`116`	`},`
`117`		`- "execution_count": 13,`
	`117`	`+ "execution_count": 7,`
`118`	`118`	`"metadata": {},`
`119`	`119`	`"output_type": "execute_result"`
`120`	`120`	`}`
`@@ -132,7 +132,7 @@`
`132`	`132`	`},`
`133`	`133`	`{`
`134`	`134`	`"cell_type": "code",`
`135`		`- "execution_count": 14,`
	`135`	`+ "execution_count": 8,`
`136`	`136`	`"metadata": {},`
`137`	`137`	`"outputs": [],`
`138`	`138`	`"source": [`
`@@ -146,7 +146,7 @@`
`146`	`146`	`},`
`147`	`147`	`{`
`148`	`148`	`"cell_type": "code",`
`149`		`- "execution_count": 15,`
	`149`	`+ "execution_count": 9,`
`150`	`150`	`"metadata": {},`
`151`	`151`	`"outputs": [`
`152`	`152`	`{`
`@@ -159,7 +159,7 @@`
`159`	`159`	`" 'raw_response': '{\\n \"decision\": \"moderate\",\\n \"reasoning\": \"The email lacks formal language and a clear request, which is more appropriate for a professional setting. It is direct but informal.\",\\n \"score\": 5\\n}'}}"`
`160`	`160`	`]`
`161`	`161`	`},`
`162`		`- "execution_count": 15,`
	`162`	`+ "execution_count": 9,`
`163`	`163`	`"metadata": {},`
`164`	`164`	`"output_type": "execute_result"`
`165`	`165`	`}`
`@@ -172,20 +172,20 @@`
`172`	`172`	`},`
`173`	`173`	`{`
`174`	`174`	`"cell_type": "code",`
`175`		`- "execution_count": null,`
	`175`	`+ "execution_count": 10,`
`176`	`176`	`"metadata": {},`
`177`	`177`	`"outputs": [`
`178`	`178`	`{`
`179`	`179`	`"data": {`
`180`	`180`	`"text/plain": [`
`181`	`181`	`"{'decision': 'non-professional',\n",`
`182`		`- " 'reasoning': 'The response lacks clarity and formality, which are key elements of a professional tone.',\n",`
`183`		`- " 'score': 4.0,\n",`
	`182`	`+ " 'reasoning': 'The response uses informal and expletive language, which is not appropriate for a professional context.',\n",`
	`183`	`+ " 'score': 1.0,\n",`
`184`	`184`	`" 'metadata': {'model': 'qwen2',\n",`
`185`		`- " 'raw_response': '{\\n \"decision\": \"non-professional\",\\n \"reasoning\": \"The response lacks clarity and formality, which are key elements of a professional tone.\",\\n \"score\": 4\\n}'}}"`
	`185`	`+ " 'raw_response': '{\\n \"decision\": \"non-professional\",\\n \"reasoning\": \"The response uses informal and expletive language, which is not appropriate for a professional context.\",\\n \"score\": 1\\n}'}}"`
`186`	`186`	`]`
`187`	`187`	`},`
`188`		`- "execution_count": 17,`
	`188`	`+ "execution_count": 10,`
`189`	`189`	`"metadata": {},`
`190`	`190`	`"output_type": "execute_result"`
`191`	`191`	`}`