edgeai-for-beginners/Workshop/samples/session06/models_router.py at main · microsoft/edgeai-for-beginners · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
"""Session 6 Sample: Intent-based model routing using Foundry Local Manager.

Demonstrates intelligent routing of prompts to specialized models based on
detected intent patterns.

Environment Variables:
  FOUNDRY_LOCAL_ENDPOINT=<url>       # Override endpoint

SDK Reference:
  https://github.com/microsoft/Foundry-Local/tree/main/sdk/python/foundry_local
"""
from __future__ import annotations
import os
import sys
import re
from typing import Dict, Any
from utils.workshop_utils import chat_once

ENDPOINT = os.getenv("FOUNDRY_LOCAL_ENDPOINT")

print("[INFO] Initializing intent-based model router")

CATALOG = {
    "phi-4-mini": {"capabilities": ["general", "summarize"], "priority": 2},
    "deepseek-coder-1.3b": {"capabilities": ["code", "refactor"], "priority": 1},
    "qwen2.5-0.5b": {"capabilities": ["classification", "fast"], "priority": 3},
}

RULES = [
    (re.compile(r"code|refactor|function", re.I), "code"),
    (re.compile(r"summari|abstract|tl;dr", re.I), "summarize"),
    (re.compile(r"classif|category|label", re.I), "classification"),
]

def detect_intent(prompt: str) -> str:
    """Detect intent from user prompt using pattern matching.

    Args:
        prompt: User prompt text

    Returns:
        Detected intent string (e.g., 'code', 'summarize', 'classification', 'general')
    """
    for pat, intent in RULES:
        if pat.search(prompt):
            return intent
    return "general"

def pick_model(intent: str) -> str:
    """Select best model for given intent.

    Args:
        intent: Detected intent string

    Returns:
        Model alias best suited for the intent

    Note:
        Models are ranked by:
        1. Whether they support the intent (capability match)
        2. Priority (lower number = higher priority)
    """
    ranked = []
    for name, meta in CATALOG.items():
        ranked.append((name, intent in meta["capabilities"], meta["priority"]))
    ranked.sort(key=lambda t: (not t[1], t[2]))
    return ranked[0][0]

def route(prompt: str) -> Dict[str, Any]:
    """Route prompt to appropriate model based on detected intent.

    Args:
        prompt: User prompt to route

    Returns:
        Dictionary with intent, selected model, output, and token usage

    Raises:
        Exception: If model execution fails
    """
    intent = detect_intent(prompt)
    alias = pick_model(intent)
    print(f"[INFO] Routing: '{prompt[:50]}...' -> intent='{intent}', model='{alias}'")

    try:
        text, usage = chat_once(
            alias,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200,
            temperature=0.5
        )
        return {
            "intent": intent,
            "alias": alias,
            "output": text,
            "tokens": getattr(usage, 'total_tokens', None) if usage else None
        }
    except Exception as e:
        print(f"[ERROR] Routing failed for prompt: {e}")
        raise

if __name__ == "__main__":
    tests = [
        "Refactor this Python snippet for readability",
        "Summarize the importance of small language models",
        "Classify this feedback: 'The UI is slow but pretty'"
    ]
    print(f"\n[INFO] Testing router with {len(tests)} prompts\n")

    results = []
    for i, t in enumerate(tests, 1):
        try:
            result = route(t)
            results.append(result)
            print(f"\n[TEST {i}/{len(tests)}]")
            print(f"Prompt: {t}")
            print(f"Intent: {result['intent']}")
            print(f"Model: {result['alias']}")
            print(f"Output: {result['output'][:100]}..." if len(result['output']) > 100 else f"Output: {result['output']}")
            print(f"Tokens: {result['tokens']}\n")
        except Exception as e:
            print(f"\n[ERROR] Test {i} failed: {e}\n")

    if not results:
        print("[ERROR] All routing tests failed")
        sys.exit(1)