|
48 | 48 | _byok_config: dict = {} |
49 | 49 |
|
50 | 50 |
|
51 | | -def create_byok_workflow(openrouter_key: str) -> HedAnnotationWorkflow: |
| 51 | +def create_byok_workflow( |
| 52 | + openrouter_key: str, |
| 53 | + model: str | None = None, |
| 54 | + provider: str | None = None, |
| 55 | + temperature: float | None = None, |
| 56 | +) -> HedAnnotationWorkflow: |
52 | 57 | """Create a workflow instance using the user's OpenRouter key (BYOK mode). |
53 | 58 |
|
54 | 59 | Args: |
55 | 60 | openrouter_key: User's OpenRouter API key |
| 61 | + model: Override model for all agents (uses server default if None) |
| 62 | + provider: Override provider preference (uses server default if None) |
| 63 | + temperature: Override LLM temperature (uses server default if None) |
56 | 64 |
|
57 | 65 | Returns: |
58 | | - Configured HedAnnotationWorkflow using the user's key |
| 66 | + Configured HedAnnotationWorkflow using the user's key and model settings |
59 | 67 | """ |
60 | 68 | global _byok_config |
61 | 69 |
|
62 | 70 | # Get configuration (cached from server startup) |
63 | | - llm_temperature = _byok_config.get("temperature", 0.1) |
64 | | - provider_preference = _byok_config.get("provider_preference") |
65 | 71 | schema_dir = _byok_config.get("schema_dir") |
66 | 72 | validator_path = _byok_config.get("validator_path") |
67 | 73 | use_js_validator = _byok_config.get("use_js_validator", True) |
68 | 74 |
|
69 | | - # Get model configuration from headers or use defaults |
70 | | - annotation_model = get_model_name(os.getenv("ANNOTATION_MODEL", "openai/gpt-oss-120b")) |
71 | | - evaluation_model = get_model_name(os.getenv("EVALUATION_MODEL", "qwen/qwen3-235b-a22b-2507")) |
72 | | - assessment_model = get_model_name(os.getenv("ASSESSMENT_MODEL", "openai/gpt-oss-120b")) |
| 75 | + # Use user-provided settings or fall back to server defaults |
| 76 | + llm_temperature = ( |
| 77 | + temperature if temperature is not None else _byok_config.get("temperature", 0.1) |
| 78 | + ) |
| 79 | + |
| 80 | + # Provider logic: |
| 81 | + # - If user specifies a custom model, clear provider (Cerebras only works with default models) |
| 82 | + # - Unless user also explicitly specifies a provider |
| 83 | + if provider is not None: |
| 84 | + # User explicitly set provider (could be empty string to clear it) |
| 85 | + provider_preference = provider if provider else None |
| 86 | + elif model is not None: |
| 87 | + # User specified custom model but no provider → clear provider |
| 88 | + # (Cerebras only works with default models) |
| 89 | + provider_preference = None |
| 90 | + else: |
| 91 | + # No custom model or provider → use server defaults |
| 92 | + provider_preference = _byok_config.get("provider_preference") |
| 93 | + |
| 94 | + # Get model configuration: user override > server env var > default |
| 95 | + default_annotation_model = os.getenv("ANNOTATION_MODEL", "openai/gpt-oss-120b") |
| 96 | + default_evaluation_model = os.getenv("EVALUATION_MODEL", "qwen/qwen3-235b-a22b-2507") |
| 97 | + default_assessment_model = os.getenv("ASSESSMENT_MODEL", "openai/gpt-oss-120b") |
73 | 98 |
|
74 | | - # Create LLMs with user's key |
| 99 | + # If user provides a model, use it for all agents (default override) |
| 100 | + annotation_model = get_model_name(model if model else default_annotation_model) |
| 101 | + evaluation_model = get_model_name(model if model else default_evaluation_model) |
| 102 | + assessment_model = get_model_name(model if model else default_assessment_model) |
| 103 | + |
| 104 | + # Create LLMs with user's key and settings |
75 | 105 | annotation_llm = create_openrouter_llm( |
76 | 106 | model=annotation_model, |
77 | 107 | api_key=openrouter_key, |
@@ -102,23 +132,46 @@ def create_byok_workflow(openrouter_key: str) -> HedAnnotationWorkflow: |
102 | 132 | ) |
103 | 133 |
|
104 | 134 |
|
105 | | -def create_byok_vision_agent(openrouter_key: str) -> VisionAgent: |
| 135 | +def create_byok_vision_agent( |
| 136 | + openrouter_key: str, |
| 137 | + vision_model: str | None = None, |
| 138 | + provider: str | None = None, |
| 139 | + temperature: float | None = None, |
| 140 | +) -> VisionAgent: |
106 | 141 | """Create a vision agent instance using the user's OpenRouter key (BYOK mode). |
107 | 142 |
|
108 | 143 | Args: |
109 | 144 | openrouter_key: User's OpenRouter API key |
| 145 | + vision_model: Override vision model (uses server default if None) |
| 146 | + provider: Override provider preference (uses server default if None) |
| 147 | + temperature: Override temperature (uses 0.3 default if None) |
110 | 148 |
|
111 | 149 | Returns: |
112 | | - Configured VisionAgent using the user's key |
| 150 | + Configured VisionAgent using the user's key and model settings |
113 | 151 | """ |
114 | | - vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-30b-a3b-instruct") |
115 | | - vision_provider = os.getenv("VISION_PROVIDER", "deepinfra/fp8") |
| 152 | + # Use user-provided settings or fall back to server defaults |
| 153 | + default_vision_model = os.getenv("VISION_MODEL", "qwen/qwen3-vl-30b-a3b-instruct") |
| 154 | + default_vision_provider = os.getenv("VISION_PROVIDER", "deepinfra/fp8") |
| 155 | + |
| 156 | + actual_model = vision_model if vision_model else default_vision_model |
| 157 | + actual_temperature = temperature if temperature is not None else 0.3 |
| 158 | + |
| 159 | + # Provider logic: |
| 160 | + # - If user specifies a custom vision model, clear provider |
| 161 | + # - Unless user also explicitly specifies a provider |
| 162 | + if provider is not None: |
| 163 | + actual_provider = provider if provider else None |
| 164 | + elif vision_model is not None: |
| 165 | + # Custom vision model → clear provider |
| 166 | + actual_provider = None |
| 167 | + else: |
| 168 | + actual_provider = default_vision_provider |
116 | 169 |
|
117 | 170 | vision_llm = create_openrouter_llm( |
118 | | - model=vision_model, |
| 171 | + model=actual_model, |
119 | 172 | api_key=openrouter_key, |
120 | | - temperature=0.3, |
121 | | - provider=vision_provider, |
| 173 | + temperature=actual_temperature, |
| 174 | + provider=actual_provider, |
122 | 175 | ) |
123 | 176 |
|
124 | 177 | return VisionAgent(llm=vision_llm) |
@@ -365,6 +418,10 @@ def get_default_path(docker_path: str, local_path: str) -> str: |
365 | 418 | "X-Requested-With", |
366 | 419 | "X-API-Key", |
367 | 420 | "X-OpenRouter-Key", # BYOK mode |
| 421 | + "X-OpenRouter-Model", # BYOK model override |
| 422 | + "X-OpenRouter-Vision-Model", # BYOK vision model override |
| 423 | + "X-OpenRouter-Provider", # BYOK provider preference |
| 424 | + "X-OpenRouter-Temperature", # BYOK temperature override |
368 | 425 | ], |
369 | 426 | max_age=3600, # Cache preflight requests for 1 hour |
370 | 427 | ) |
@@ -447,12 +504,29 @@ async def annotate( |
447 | 504 | """ |
448 | 505 | # Determine which workflow to use |
449 | 506 | if api_key == "byok": |
450 | | - # BYOK mode: Create workflow with user's key |
| 507 | + # BYOK mode: Create workflow with user's key and model settings |
451 | 508 | openrouter_key = req.headers.get("x-openrouter-key") |
452 | 509 | if not openrouter_key: |
453 | 510 | raise HTTPException(status_code=401, detail="Missing X-OpenRouter-Key header") |
| 511 | + |
| 512 | + # Get model config: request body > headers > server defaults |
| 513 | + model = request.model or req.headers.get("x-openrouter-model") |
| 514 | + provider = request.provider or req.headers.get("x-openrouter-provider") |
| 515 | + temp_header = req.headers.get("x-openrouter-temperature") |
| 516 | + temperature = request.temperature |
| 517 | + if temperature is None and temp_header: |
| 518 | + try: |
| 519 | + temperature = float(temp_header) |
| 520 | + except ValueError: |
| 521 | + pass # Invalid header value, use default |
| 522 | + |
454 | 523 | try: |
455 | | - active_workflow = create_byok_workflow(openrouter_key) |
| 524 | + active_workflow = create_byok_workflow( |
| 525 | + openrouter_key, |
| 526 | + model=model, |
| 527 | + provider=provider, |
| 528 | + temperature=temperature, |
| 529 | + ) |
456 | 530 | except Exception as e: |
457 | 531 | raise HTTPException( |
458 | 532 | status_code=500, detail=f"Failed to initialize BYOK workflow: {str(e)}" |
@@ -530,13 +604,36 @@ async def annotate_from_image( |
530 | 604 | """ |
531 | 605 | # Determine which workflow and vision agent to use |
532 | 606 | if api_key == "byok": |
533 | | - # BYOK mode: Create workflow and vision agent with user's key |
| 607 | + # BYOK mode: Create workflow and vision agent with user's key and model settings |
534 | 608 | openrouter_key = req.headers.get("x-openrouter-key") |
535 | 609 | if not openrouter_key: |
536 | 610 | raise HTTPException(status_code=401, detail="Missing X-OpenRouter-Key header") |
| 611 | + |
| 612 | + # Get model config: request body > headers > server defaults |
| 613 | + model = request.model or req.headers.get("x-openrouter-model") |
| 614 | + vision_model = request.vision_model or req.headers.get("x-openrouter-vision-model") |
| 615 | + provider = request.provider or req.headers.get("x-openrouter-provider") |
| 616 | + temp_header = req.headers.get("x-openrouter-temperature") |
| 617 | + temperature = request.temperature |
| 618 | + if temperature is None and temp_header: |
| 619 | + try: |
| 620 | + temperature = float(temp_header) |
| 621 | + except ValueError: |
| 622 | + pass # Invalid header value, use default |
| 623 | + |
537 | 624 | try: |
538 | | - active_workflow = create_byok_workflow(openrouter_key) |
539 | | - active_vision_agent = create_byok_vision_agent(openrouter_key) |
| 625 | + active_workflow = create_byok_workflow( |
| 626 | + openrouter_key, |
| 627 | + model=model, |
| 628 | + provider=provider, |
| 629 | + temperature=temperature, |
| 630 | + ) |
| 631 | + active_vision_agent = create_byok_vision_agent( |
| 632 | + openrouter_key, |
| 633 | + vision_model=vision_model, |
| 634 | + provider=provider, |
| 635 | + temperature=temperature, |
| 636 | + ) |
540 | 637 | except Exception as e: |
541 | 638 | raise HTTPException( |
542 | 639 | status_code=500, detail=f"Failed to initialize BYOK agents: {str(e)}" |
|
0 commit comments