[python/knowpro] Add --color={auto,never,always} to vizcmp.py; show TypeChat errors as N/A (#1435)

gvanrossum · gvanrossum-ms · web-flow · commit 9bf84d24646b · 2025-08-08T19:24:31.000Z
* a35b42a vizcmp.py: Add --color={never,auto,always} flag * 16c0aaa Percolate TypeChat errors up specially, so we can avoid printing their score --------- Co-authored-by: Guido van Rossum <gvanrossum@microsoft.com>
diff --git a/python/ta/tools/utool.py b/python/ta/tools/utool.py
@@ -392,7 +392,10 @@ async def process_query(context: ProcessingContext, query_text: str) -> float |
                 case "Answered":
                     actual4 = (combined_answer.answer or "", True)
             score = await compare_answers(context, expected4, actual4)
-            print(f"Score: {score:.3f}; Question: {query_text}")
+            if actual4[0].startswith("TypeChat failure:"):
+                print(Fore.YELLOW + "No answer received" + Fore.RESET)
+            else:
+                print(f"Score: {score:.3f}; Question: {query_text}")
             return score
         else:
             print("Stage 4 diff unavailable; nice answer:")
diff --git a/python/ta/tools/vizcmp.py b/python/ta/tools/vizcmp.py
@@ -1,17 +1,44 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-import os
+import argparse
 import glob
+import os
 import re
 import statistics
 import sys
 
-from colorama import Back, Fore, Style
+from colorama import init as colorama_init, Back, Fore, Style
 
 
 def main():
-    files = sys.argv[1:] or sorted(glob.glob("evals/eval-*.txt"))
+    parser = argparse.ArgumentParser(
+        description="Compare evaluation results from multiple files."
+    )
+    parser.add_argument(
+        "--color",
+        choices=["auto", "always", "never"],
+        default="auto",
+        help="Control color output. Default 'auto' uses colors if stdout is a terminal.",
+    )
+    parser.add_argument(
+        "files",
+        nargs="*",
+    )
+    args = parser.parse_args()
+
+    # Initialize colorama according to --color.
+    match args.color:
+        case "auto":
+            colorama_init(strip=not sys.stdout.isatty())
+        case "always":
+            colorama_init(strip=False)
+        case "never":
+            colorama_init(strip=True)
+        case _:
+            raise ValueError(f"Invalid color option: {args.color}")
+
+    files = args.files or sorted(glob.glob("evals/eval-*.txt"))
     table = {}  # {file: {counter: score, ...}, ...}
     questions = {}  # {counter: question, ...}
 
diff --git a/python/ta/typeagent/knowpro/answer_response_schema.py b/python/ta/typeagent/knowpro/answer_response_schema.py
@@ -8,6 +8,7 @@
 AnswerType = Literal[
     "NoAnswer",  # If question cannot be accurately answered from [ANSWER CONTEXT]
     "Answered",  # Fully answer question
+    # TODO: Add a category for outright errors, e.g. network errors
 ]
 
 
diff --git a/python/ta/typeagent/knowpro/answers.py b/python/ta/typeagent/knowpro/answers.py
@@ -65,7 +65,7 @@ async def generate_answers(
                 case "NoAnswer":
                     pass
                 case _:
-                    raise ValueError(f"Unexpected answer type: {answer.type}")
+                    assert False, f"Unexpected answer type: {answer.type}"
     if len(all_answers) == 1:
         return all_answers, all_answers[0]
     combined_answer: AnswerResponse | None = None
@@ -96,7 +96,11 @@ async def generate_answer[TMessage: IMessage, TIndex: ITermToSemanticRefIndex](
     # print("+" * 80)
     result = await translator.translate(request)
     if isinstance(result, typechat.Failure):
-        return AnswerResponse(type="NoAnswer", answer=None, whyNoAnswer=result.message)
+        return AnswerResponse(
+            type="NoAnswer",
+            answer=None,
+            whyNoAnswer=f"TypeChat failure: {result.message}",
+        )
     else:
         return result.value
 

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@`
`8`	`8`	`AnswerType = Literal[`
`9`	`9`	`"NoAnswer", # If question cannot be accurately answered from [ANSWER CONTEXT]`
`10`	`10`	`"Answered", # Fully answer question`
	`11`	`+ # TODO: Add a category for outright errors, e.g. network errors`
`11`	`12`	`]`
`12`	`13`
`13`	`14`