Skip to content

Commit 9bf84d2

Browse files
[python/knowpro] Add --color={auto,never,always} to vizcmp.py; show TypeChat errors as N/A (#1435)
* a35b42a vizcmp.py: Add --color={never,auto,always} flag * 16c0aaa Percolate TypeChat errors up specially, so we can avoid printing their score --------- Co-authored-by: Guido van Rossum <[email protected]>
1 parent 5909ca1 commit 9bf84d2

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

python/ta/tools/utool.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,10 @@ async def process_query(context: ProcessingContext, query_text: str) -> float |
392392
case "Answered":
393393
actual4 = (combined_answer.answer or "", True)
394394
score = await compare_answers(context, expected4, actual4)
395-
print(f"Score: {score:.3f}; Question: {query_text}")
395+
if actual4[0].startswith("TypeChat failure:"):
396+
print(Fore.YELLOW + "No answer received" + Fore.RESET)
397+
else:
398+
print(f"Score: {score:.3f}; Question: {query_text}")
396399
return score
397400
else:
398401
print("Stage 4 diff unavailable; nice answer:")

python/ta/tools/vizcmp.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,44 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33

4-
import os
4+
import argparse
55
import glob
6+
import os
67
import re
78
import statistics
89
import sys
910

10-
from colorama import Back, Fore, Style
11+
from colorama import init as colorama_init, Back, Fore, Style
1112

1213

1314
def main():
14-
files = sys.argv[1:] or sorted(glob.glob("evals/eval-*.txt"))
15+
parser = argparse.ArgumentParser(
16+
description="Compare evaluation results from multiple files."
17+
)
18+
parser.add_argument(
19+
"--color",
20+
choices=["auto", "always", "never"],
21+
default="auto",
22+
help="Control color output. Default 'auto' uses colors if stdout is a terminal.",
23+
)
24+
parser.add_argument(
25+
"files",
26+
nargs="*",
27+
)
28+
args = parser.parse_args()
29+
30+
# Initialize colorama according to --color.
31+
match args.color:
32+
case "auto":
33+
colorama_init(strip=not sys.stdout.isatty())
34+
case "always":
35+
colorama_init(strip=False)
36+
case "never":
37+
colorama_init(strip=True)
38+
case _:
39+
raise ValueError(f"Invalid color option: {args.color}")
40+
41+
files = args.files or sorted(glob.glob("evals/eval-*.txt"))
1542
table = {} # {file: {counter: score, ...}, ...}
1643
questions = {} # {counter: question, ...}
1744

python/ta/typeagent/knowpro/answer_response_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
AnswerType = Literal[
99
"NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT]
1010
"Answered", # Fully answer question
11+
# TODO: Add a category for outright errors, e.g. network errors
1112
]
1213

1314

python/ta/typeagent/knowpro/answers.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ async def generate_answers(
6565
case "NoAnswer":
6666
pass
6767
case _:
68-
raise ValueError(f"Unexpected answer type: {answer.type}")
68+
assert False, f"Unexpected answer type: {answer.type}"
6969
if len(all_answers) == 1:
7070
return all_answers, all_answers[0]
7171
combined_answer: AnswerResponse | None = None
@@ -96,7 +96,11 @@ async def generate_answer[TMessage: IMessage, TIndex: ITermToSemanticRefIndex](
9696
# print("+" * 80)
9797
result = await translator.translate(request)
9898
if isinstance(result, typechat.Failure):
99-
return AnswerResponse(type="NoAnswer", answer=None, whyNoAnswer=result.message)
99+
return AnswerResponse(
100+
type="NoAnswer",
101+
answer=None,
102+
whyNoAnswer=f"TypeChat failure: {result.message}",
103+
)
100104
else:
101105
return result.value
102106

0 commit comments

Comments
 (0)