warnings - typer - inference-fix

gabriben · gabriben · commit ca9679246910 · 2025-06-12T16:16:50.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,4 @@ src/wraval/custom_prompts/*
 src/wraval/testing.py
 src/wraval/model_artifacts/*
 !src/wraval/model_artifacts/code/
+build/*
diff --git a/config/settings.toml b/config/settings.toml
@@ -1,10 +1,12 @@
 [default]
 region = 'us-east-1'
-data_dir = 's3://llm-finetune-us-east-1-{aws_account}/eval/tones/'
-#  "./data" 
+data_dir = "./data"
+# 's3://llm-finetune-us-east-1-{aws_account}/eval/tones/'
 deploy_bucket_name = 'llm-finetune-us-east-1-{aws_account}'
 deploy_bucket_prefix = 'models'
 sagemaker_execution_role_arn = 'arn:aws:iam::{aws_account}:role/sagemaker-execution-role-us-east-1'
+endpoint_type = 'bedrock'
+model = 'anthropic.claude-3-haiku-20240307-v1:0'
 
 [haiku-3]
 model = 'anthropic.claude-3-haiku-20240307-v1:0'
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,7 +28,8 @@ dependencies = [
     "numpy",
     "requests",
     "accelerate",
-    "torchvision"
+    "torchvision",
+    "typer"
 ]
 
 [project.scripts]
diff --git a/src/wraval/actions/action_examples.py b/src/wraval/actions/action_examples.py
@@ -8,7 +8,7 @@
 from typing import Optional
 
 
-def show_examples(settings: Dynaconf, tone: Optional[str] = None, n_examples: int = 3) -> None:
+def get_examples(settings: Dynaconf, tone: Optional[str] = None, n_examples: int = 3) -> None:
     """
     Load the latest dataset and display examples grouped by tone and model.
     
diff --git a/src/wraval/actions/action_inference.py b/src/wraval/actions/action_inference.py
@@ -17,10 +17,13 @@ def run_inference(
     """Run inference on sentences using the specified model"""
     results = load_latest_dataset(data_dir)
 
+    no_rewrite = False
+
     if "rewrite" not in results.columns:
-        results["rewrite"] = None
-    if "inference_model" not in results.columns:
-        results["inference_model"] = None
+        if "inference_model" not in results.columns:
+            no_rewrite = True
+            results["rewrite"] = None
+            results["inference_model"] = None
 
     tones = results["tone"].unique()
     print(f"Found tones: {tones}")
@@ -46,10 +49,14 @@ def run_inference(
         outputs = route_completion(settings, queries, tone_prompt)
 
         cleaned_output = [o.strip().strip('"') for o in outputs]
-        new_results = pd.DataFrame({"synthetic_data" : queries, "tone" : tone})
-        new_results["rewrite"] = cleaned_output
-        new_results["inference_model"] = model_name
-
-        results = pd.concat([results, new_results], ignore_index=True)
+        if no_rewrite:
+            mask = results["tone"] == tone
+            results.loc[mask, "rewrite"] = cleaned_output
+            results.loc[mask, "inference_model"] = model_name
+        else:
+            new_results = results[results["tone"] == tone]
+            new_results["rewrite"] = cleaned_output
+            new_results["inference_model"] = model_name
+            results = pd.concat([results, new_results], ignore_index=True)
 
     write_dataset(results, data_dir, "all", "csv")
diff --git a/src/wraval/actions/action_llm_judge.py b/src/wraval/actions/action_llm_judge.py
@@ -3,16 +3,31 @@
 # // SPDX-License-Identifier: Apache-2.0
 #
 import pandas as pd
-from typing import List, Dict, Optional
+from typing import List, Dict, Any, Optional
 from itertools import product
 from dynaconf import Dynaconf
 from .data_utils import write_dataset, load_latest_dataset
-from .prompts_judge import generate_input_prompt, generate_system_prompt, get_rubric, rewrite_prompt
-
 from .completion import batch_get_bedrock_completions
 import re
 import boto3
 
+# Import prompt functions based on settings
+def get_prompt_functions(settings: Dynaconf):
+    """Get the appropriate prompt functions based on settings."""
+    if settings.custom_prompts:
+        from wraval.custom_prompts.prompts_judge import (
+            generate_input_prompt,
+            generate_system_prompt,
+            get_rubric
+        )
+    else:
+        from .prompts_judge import (
+            generate_input_prompt,
+            generate_system_prompt,
+            get_rubric
+        )
+    return generate_input_prompt, generate_system_prompt, get_rubric
+
 def extract_score(text: str) -> Optional[int]:
     """Extract score from text using regex pattern.
     
@@ -60,9 +75,8 @@ def process_tone_data(
     Returns:
         Processed DataFrame with scores
     """
-
-    if settings.custom_prompts == True:
-        from wraval.custom_prompts.prompts_judge import generate_input_prompt, generate_system_prompt
+    # Get the appropriate prompt functions
+    generate_input_prompt, generate_system_prompt, _ = get_prompt_functions(settings)
 
     temp_results = results.copy()
     rubrics = list(tone_rubrics.keys())
@@ -118,9 +132,6 @@ def judge(
         endpoint_type: Type of endpoint to use
     """
 
-    if settings.custom_prompts == True:
-        from wraval.custom_prompts.prompts_judge import get_rubric
-
     try:
         results = load_latest_dataset(settings.data_dir)
         print(f"Loaded dataset with {len(results)} rows")
@@ -139,11 +150,15 @@ def judge(
     if settings.type != "all":
         tones = [settings.type]
     
+    # Get the appropriate prompt functions
+    _, _, get_rubric = get_prompt_functions(settings)
+    
     # Process each tone-model combination that needs scoring
     for tone, inf_model in product(tones, inf_models):
         mask = (results.inference_model == inf_model) & (results.tone == tone)
         # check if any score is missing for this inference model and this tone
-        # If yes, run the eval below
+        if 'overall_score' not in results.columns:
+            results['overall_score'] = None
         if not results[mask].overall_score.isna().any():
             continue
             
diff --git a/src/wraval/actions/action_results.py b/src/wraval/actions/action_results.py
@@ -21,7 +21,7 @@ def normalize_scores(d: pd.DataFrame) -> pd.DataFrame:
     return 100 * (d - 1) / 2
 
 
-def show_results(settings: Dynaconf, tone: Optional[str] = None) -> None:
+def get_results(settings: Dynaconf, tone: Optional[str] = None) -> None:
     """
     Load the latest dataset and display normalized results table grouped by tone.
     
diff --git a/src/wraval/actions/data_utils.py b/src/wraval/actions/data_utils.py
@@ -122,4 +122,5 @@ def load_latest_dataset(data_dir: str) -> pd.DataFrame:
             raise FileNotFoundError(f"No CSV files found in {data_dir}")
         
         file_path = sorted(files, reverse=True)[0]
+        print(f'Loading {file_path}')
         return pd.read_csv(os.path.join(data_dir, file_path))
diff --git a/src/wraval/actions/format.py b/src/wraval/actions/format.py
@@ -39,7 +39,9 @@ def format_prompt(usr_prompt, prompt=None, tokenizer=None, type = 'bedrock'):
             messages = []
             if prompt.examples:
                 for k,v in prompt.examples[0].items():
-                    messages.extend([{"role": k, "content": v}])
+                    # Format each message content as a list of text blocks
+                    messages.extend([{"role": k, "content": [{"text": v}]}])
+            # Format user prompt as a list of text blocks
             usr_prompt = [{"role": "user", "content": [{"text": usr_prompt}]}]
             p = messages + usr_prompt
         else:    
diff --git a/src/wraval/aws_config.py b/src/wraval/aws_config.py
@@ -0,0 +1,25 @@
+#
+# // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# // SPDX-License-Identifier: Apache-2.0
+#
+import os
+import logging
+import warnings
+
+# Suppress Pydantic warning
+warnings.filterwarnings("ignore", message="Field name \"json\" in \"MonitoringDatasetFormat\" shadows an attribute in parent \"Base\"")
+
+# Configure logging before any AWS imports
+logging.getLogger('sagemaker').setLevel(logging.ERROR)
+logging.getLogger('sagemaker.config').setLevel(logging.ERROR)  # Specifically target the config module
+logging.getLogger('boto3').setLevel(logging.ERROR)
+logging.getLogger('botocore').setLevel(logging.ERROR)
+logging.getLogger('urllib3').setLevel(logging.ERROR)
+
+# Suppress AWS credential messages
+os.environ['SAGEMAKER_SUPPRESS_DEFAULTS'] = 'true'
+os.environ['AWS_SDK_LOAD_CONFIG'] = '0'  # Suppress AWS SDK config loading messages
+
+# Now import AWS modules
+import boto3
+import sagemaker 
diff --git a/src/wraval/main.py b/src/wraval/main.py

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,8 @@ dependencies = [`
`28`	`28`	`"numpy",`
`29`	`29`	`"requests",`
`30`	`30`	`"accelerate",`
`31`		`- "torchvision"`
	`31`	`+ "torchvision",`
	`32`	`+ "typer"`
`32`	`33`	`]`
`33`	`34`
`34`	`35`	`[project.scripts]`