q2api-v2/app.py at main · bhabgs/q2api-v2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import json
import traceback
import uuid
import time
import asyncio
import importlib.util
import random
import secrets
import re
import hashlib
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, Optional, List, Any, AsyncGenerator, Tuple

from fastapi import FastAPI, Depends, HTTPException, Header
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse, FileResponse
from pydantic import BaseModel
from dotenv import load_dotenv
import httpx
import tiktoken

from db import init_db, close_db, row_to_dict
from license_client import verify_license_or_exit, periodic_license_check

# ------------------------------------------------------------------------------
# Tokenizer
# ------------------------------------------------------------------------------

try:
    # cl100k_base is used by gpt-4, gpt-3.5-turbo, text-embedding-ada-002
    ENCODING = tiktoken.get_encoding("cl100k_base")
except Exception:
    ENCODING = None

def count_tokens(text: str, apply_multiplier: bool = False) -> int:
    """Counts tokens with tiktoken."""
    if not text or not ENCODING:
        return 0
    token_count = len(ENCODING.encode(text))
    if apply_multiplier:
        token_count = int(token_count * TOKEN_COUNT_MULTIPLIER)
    return token_count

# ------------------------------------------------------------------------------
# Bootstrap
# ------------------------------------------------------------------------------

BASE_DIR = Path(__file__).resolve().parent

load_dotenv(BASE_DIR / ".env", override=True)

app = FastAPI(title="v2 OpenAI-compatible Server (Amazon Q Backend)")

# CORS for simple testing in browser
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ------------------------------------------------------------------------------
# Dynamic import of replicate.py to avoid package __init__ needs
# ------------------------------------------------------------------------------

def _load_replicate_module():
    mod_path = BASE_DIR / "replicate.py"
    spec = importlib.util.spec_from_file_location("v2_replicate", str(mod_path))
    module = importlib.util.module_from_spec(spec)  # type: ignore[arg-type]
    assert spec is not None and spec.loader is not None
    spec.loader.exec_module(module)  # type: ignore[attr-defined]
    return module

_replicate = _load_replicate_module()
send_chat_request = _replicate.send_chat_request

# ------------------------------------------------------------------------------
# Dynamic import of Claude modules
# ------------------------------------------------------------------------------

def _load_claude_modules():
    # claude_types
    spec_types = importlib.util.spec_from_file_location("v2_claude_types", str(BASE_DIR / "claude_types.py"))
    mod_types = importlib.util.module_from_spec(spec_types)
    spec_types.loader.exec_module(mod_types)

    # claude_converter
    spec_conv = importlib.util.spec_from_file_location("v2_claude_converter", str(BASE_DIR / "claude_converter.py"))
    mod_conv = importlib.util.module_from_spec(spec_conv)
    # We need to inject claude_types into converter's namespace if it uses relative imports or expects them
    # But since we used relative import in claude_converter.py (.claude_types), we need to be careful.
    # Actually, since we are loading dynamically, relative imports might fail if not in sys.modules correctly.
    # Let's patch sys.modules temporarily or just rely on file location.
    # A simpler way for this single-file script style is to just load them.
    # However, claude_converter does `from .claude_types import ...`
    # To make that work, we should probably just use standard import if v2 is a package,
    # but v2 is just a folder.
    # Let's assume the user runs this with v2 in pythonpath or we just fix imports in the files.
    # But I wrote `from .claude_types` in the file.
    # Let's try to load it. If it fails, we might need to adjust.
    # Actually, for simplicity in this `app.py` dynamic loading context,
    # it is better if `claude_converter.py` used absolute import or we mock the package.
    # BUT, let's try to just load them and see.
    # To avoid relative import issues, I will inject the module into sys.modules
    import sys
    sys.modules["v2.claude_types"] = mod_types

    spec_conv.loader.exec_module(mod_conv)

    # claude_stream
    spec_stream = importlib.util.spec_from_file_location("v2_claude_stream", str(BASE_DIR / "claude_stream.py"))
    mod_stream = importlib.util.module_from_spec(spec_stream)
    spec_stream.loader.exec_module(mod_stream)

    return mod_types, mod_conv, mod_stream

try:
    _claude_types, _claude_converter, _claude_stream = _load_claude_modules()
    ClaudeRequest = _claude_types.ClaudeRequest
    convert_claude_to_amazonq_request = _claude_converter.convert_claude_to_amazonq_request
    map_model_name = _claude_converter.map_model_name
    ClaudeStreamHandler = _claude_stream.ClaudeStreamHandler
except Exception as e:
    print(f"Failed to load Claude modules: {e}")
    traceback.print_exc()
    # Define dummy classes to avoid NameError on startup if loading fails
    class ClaudeRequest(BaseModel):
        pass
    convert_claude_to_amazonq_request = None
    map_model_name = lambda m: m  # Pass through if module fails to load
    ClaudeStreamHandler = None

# ------------------------------------------------------------------------------
# Global HTTP Client
# ------------------------------------------------------------------------------

GLOBAL_CLIENT: Optional[httpx.AsyncClient] = None

def _get_proxies() -> Optional[Dict[str, str]]:
    proxy = os.getenv("HTTP_PROXY", "").strip()
    if proxy:
        return {"http": proxy, "https": proxy}
    return None

async def _init_global_client():
    global GLOBAL_CLIENT
    proxies = _get_proxies()
    mounts = None
    if proxies:
        proxy_url = proxies.get("https") or proxies.get("http")
        if proxy_url:
            mounts = {
                "https://": httpx.AsyncHTTPTransport(proxy=proxy_url),
                "http://": httpx.AsyncHTTPTransport(proxy=proxy_url),
            }
    # Increased limits for high concurrency with streaming
    # max_connections: 总连接数上限
    # max_keepalive_connections: 保持活跃的连接数
    # keepalive_expiry: 连接保持时间
    limits = httpx.Limits(
        max_keepalive_connections=60,
        max_connections=60,  # 提高到500以支持更高并发
        keepalive_expiry=30.0  # 30秒后释放空闲连接
    )
    # 为流式响应设置更长的超时
    timeout = httpx.Timeout(
        connect=1.0,  # 连接超时
        read=300.0,    # 读取超时(流式响应需要更长时间)
        write=1.0,    # 写入超时
        pool=1.0      # 从连接池获取连接的超时时间(关键!)
    )
    GLOBAL_CLIENT = httpx.AsyncClient(mounts=mounts, timeout=timeout, limits=limits)

async def _close_global_client():
    global GLOBAL_CLIENT
    if GLOBAL_CLIENT:
        await GLOBAL_CLIENT.aclose()
        GLOBAL_CLIENT = None

# ------------------------------------------------------------------------------
# Database helpers
# ------------------------------------------------------------------------------

# Database backend instance (initialized on startup)
_db = None

async def _ensure_db():
    """Initialize database backend."""
    global _db
    _db = await init_db()

def _row_to_dict(r: Dict[str, Any]) -> Dict[str, Any]:
    """Convert database row to dict with JSON parsing."""
    return row_to_dict(r)

# _ensure_db() will be called in startup event

# ------------------------------------------------------------------------------
# Background token refresh thread
# ------------------------------------------------------------------------------

async def _refresh_stale_tokens():
    while True:
        try:
            await asyncio.sleep(300)  # 5 minutes
            if _db is None:
                print("[Error] Database not initialized, skipping token refresh cycle.")
                continue
            now = time.time()

            if LAZY_ACCOUNT_POOL_ENABLED:
                limit = LAZY_ACCOUNT_POOL_SIZE + LAZY_ACCOUNT_POOL_REFRESH_OFFSET
                order_direction = "DESC" if LAZY_ACCOUNT_POOL_ORDER_DESC else "ASC"
                query = f"SELECT id, last_refresh_time FROM accounts WHERE enabled=1 ORDER BY {LAZY_ACCOUNT_POOL_ORDER_BY} {order_direction} LIMIT {limit}"
                rows = await _db.fetchall(query)
            else:
                rows = await _db.fetchall("SELECT id, last_refresh_time FROM accounts WHERE enabled=1")

            for row in rows:
                acc_id, last_refresh = row['id'], row['last_refresh_time']
                should_refresh = False
                if not last_refresh or last_refresh == "never":
                    should_refresh = True
                else:
                    try:
                        last_time = time.mktime(time.strptime(last_refresh, "%Y-%m-%dT%H:%M:%S"))
                        if now - last_time > 1500:  # 25 minutes
                            should_refresh = True
                    except Exception:
                        # Malformed or unparsable timestamp; force refresh
                        should_refresh = True

                if should_refresh:
                    try:
                        await refresh_access_token_in_db(acc_id)
                    except Exception:
                        traceback.print_exc()
                        # Ignore per-account refresh failure; timestamp/status are recorded inside
                        pass
        except Exception:
            traceback.print_exc()
            pass

# ------------------------------------------------------------------------------
# Env and API Key authorization (keys are independent of AWS accounts)
# ------------------------------------------------------------------------------
def _parse_allowed_keys_env() -> List[str]:
    """
    OPENAI_KEYS is a comma-separated whitelist of API keys for authorization only.
    Example: OPENAI_KEYS="key1,key2,key3"
    - When the list is non-empty, incoming Authorization: Bearer {key} must be one of them.
    - When empty or unset, authorization is effectively disabled (dev mode).
    """
    s = os.getenv("OPENAI_KEYS", "") or ""
    keys: List[str] = []
    for k in [x.strip() for x in s.split(",") if x.strip()]:
        keys.append(k)
    return keys

ALLOWED_API_KEYS: List[str] = _parse_allowed_keys_env()
MAX_ERROR_COUNT: int = int(os.getenv("MAX_ERROR_COUNT", "100"))
TOKEN_COUNT_MULTIPLIER: float = float(os.getenv("TOKEN_COUNT_MULTIPLIER", "1.0"))

def generate_api_key() -> str:
    """Generate a secure API key using URL-safe base64 encoding."""
    return secrets.token_urlsafe(32)  # 32 bytes = 43 characters in base64url

# Cost calculation - simulating Claude API pricing (configurable)
# Default: $0.003 per 1K input tokens, $0.015 per 1K output tokens
COST_PER_1K_INPUT_TOKENS: float = float(os.getenv("COST_PER_1K_INPUT_TOKENS", "0.003"))
COST_PER_1K_OUTPUT_TOKENS: float = float(os.getenv("COST_PER_1K_OUTPUT_TOKENS", "0.015"))

def calculate_cost(input_tokens: int, output_tokens: int) -> float:
    """Calculate the cost in USD based on token usage."""
    input_cost = (input_tokens / 1000) * COST_PER_1K_INPUT_TOKENS
    output_cost = (output_tokens / 1000) * COST_PER_1K_OUTPUT_TOKENS
    return round(input_cost + output_cost, 6)

# Lazy Account Pool settings
LAZY_ACCOUNT_POOL_ENABLED: bool = os.getenv("LAZY_ACCOUNT_POOL_ENABLED", "false").lower() in ("true", "1", "yes")
LAZY_ACCOUNT_POOL_SIZE: int = int(os.getenv("LAZY_ACCOUNT_POOL_SIZE", "20"))
LAZY_ACCOUNT_POOL_REFRESH_OFFSET: int = int(os.getenv("LAZY_ACCOUNT_POOL_REFRESH_OFFSET", "10"))
LAZY_ACCOUNT_POOL_ORDER_BY: str = os.getenv("LAZY_ACCOUNT_POOL_ORDER_BY", "created_at")
LAZY_ACCOUNT_POOL_ORDER_DESC: bool = os.getenv("LAZY_ACCOUNT_POOL_ORDER_DESC", "false").lower() in ("true", "1", "yes")

# Validate LAZY_ACCOUNT_POOL_ORDER_BY to prevent SQL injection
if LAZY_ACCOUNT_POOL_ORDER_BY not in ["created_at", "id", "success_count"]:
    LAZY_ACCOUNT_POOL_ORDER_BY = "created_at"

def _is_console_enabled() -> bool:
    """检查是否启用管理控制台"""
    console_env = os.getenv("ENABLE_CONSOLE", "true").strip().lower()
    return console_env not in ("false", "0", "no", "disabled")

CONSOLE_ENABLED: bool = _is_console_enabled()

# Admin authentication configuration
ADMIN_PASSWORD: str = os.getenv("ADMIN_PASSWORD", "admin")
VIEWER_PASSWORD: str = os.getenv("VIEWER_PASSWORD", "")

# Password hashing
def _hash_password(password: str) -> str:
    """Hash password using SHA256 with a fixed salt (simple but adequate for admin password)"""
    # Using a fixed salt based on app name for simplicity
    # In production, consider using a more secure method like bcrypt
    salt = "q2api-admin-salt-v1"
    return hashlib.sha256((salt + password).encode()).hexdigest()

def _verify_password(password: str, password_hash: str) -> bool:
    """Verify password against hash"""
    return _hash_password(password) == password_hash

async def _get_admin_password_from_db() -> Optional[str]:
    """Get admin password hash from database, return None if not set"""
    try:
        row = await _db.fetchone("SELECT value FROM config WHERE key=?", ("admin_password",))
        return row["value"] if row else None
    except Exception:
        return None

async def _set_admin_password_in_db(password: str) -> None:
    """Set admin password hash in database"""
    password_hash = _hash_password(password)
    now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())

    # Check if exists, then update or insert
    existing = await _db.fetchone("SELECT key FROM config WHERE key=?", ("admin_password",))
    if existing:
        await _db.execute(
            "UPDATE config SET value=?, updated_at=? WHERE key=?",
            (password_hash, now, "admin_password")
        )
    else:
        await _db.execute(
            "INSERT INTO config (key, value, updated_at) VALUES (?, ?, ?)",
            ("admin_password", password_hash, now)
        )

async def _verify_admin_password_internal(password: str) -> bool:
    """Verify admin password, checking database first, then environment variable"""
    # Check database first
    db_hash = await _get_admin_password_from_db()
    if db_hash:
        return _verify_password(password, db_hash)
    # Fallback to environment variable (backward compatibility)
    return password == ADMIN_PASSWORD

async def _get_viewer_password_from_db() -> Optional[str]:
    """Get viewer password hash from database, return None if not set"""
    try:
        row = await _db.fetchone("SELECT value FROM config WHERE key=?", ("viewer_password",))
        return row["value"] if row else None
    except Exception:
        return None

async def _set_viewer_password_in_db(password: str) -> None:
    """Set viewer password hash in database"""
    password_hash = _hash_password(password)
    now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
    existing = await _db.fetchone("SELECT key FROM config WHERE key=?", ("viewer_password",))
    if existing:
        await _db.execute("UPDATE config SET value=?, updated_at=? WHERE key=?", (password_hash, now, "viewer_password"))
    else:
        await _db.execute("INSERT INTO config (key, value, updated_at) VALUES (?, ?, ?)", ("viewer_password", password_hash, now))

async def _verify_viewer_password_internal(password: str) -> bool:
    """Verify viewer password, checking database first, then environment variable"""
    db_hash = await _get_viewer_password_from_db()
    if db_hash:
        return _verify_password(password, db_hash)
    return VIEWER_PASSWORD and password == VIEWER_PASSWORD

async def _get_user_role(password: str) -> Optional[str]:
    """Get user role based on password. Returns 'admin', 'viewer', or None"""
    if await _verify_admin_password_internal(password):
        return "admin"
    if await _verify_viewer_password_internal(password):
        return "viewer"
    return None

def _extract_bearer(token_header: Optional[str]) -> Optional[str]:
    if not token_header:
        return None
    if token_header.startswith("Bearer "):
        return token_header.split(" ", 1)[1].strip()
    return token_header.strip()

async def _list_enabled_accounts(limit: Optional[int] = None) -> List[Dict[str, Any]]:
    if LAZY_ACCOUNT_POOL_ENABLED:
        order_direction = "DESC" if LAZY_ACCOUNT_POOL_ORDER_DESC else "ASC"
        query = f"SELECT * FROM accounts WHERE enabled=1 ORDER BY {LAZY_ACCOUNT_POOL_ORDER_BY} {order_direction}"
        if limit:
            query += f" LIMIT {limit}"
        rows = await _db.fetchall(query)
    else:
        query = "SELECT * FROM accounts WHERE enabled=1 ORDER BY created_at DESC"
        if limit:
            query += f" LIMIT {limit}"
        rows = await _db.fetchall(query)
    return [_row_to_dict(r) for r in rows]

async def _list_disabled_accounts() -> List[Dict[str, Any]]:
    rows = await _db.fetchall("SELECT * FROM accounts WHERE enabled=0 ORDER BY created_at DESC")
    return [_row_to_dict(r) for r in rows]

async def verify_account(account: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
    """验证账号可用性"""
    try:
        account = await refresh_access_token_in_db(account['id'])
        test_request = {
            "conversationState": {
                "currentMessage": {"userInputMessage": {"content": "hello"}},
                "chatTriggerType": "MANUAL"
            }
        }
        _, _, tracker, event_gen = await send_chat_request(
            access_token=account['accessToken'],
            messages=[],
            stream=True,
            raw_payload=test_request
        )
        if event_gen:
            async for _ in event_gen:
                break
        return True, None
    except Exception as e:
        if "AccessDenied" in str(e) or "403" in str(e):
            return False, "AccessDenied"
        return False, None

def _check_account_limits(account: Dict[str, Any]) -> None:
    """Check if account has exceeded its limits. Raises HTTPException if exceeded."""
    max_requests = account.get('max_requests')
    max_cost = account.get('max_cost')
    success_count = account.get('success_count') or 0
    total_cost = account.get('total_cost') or 0

    if max_requests is not None and success_count >= max_requests:
        raise HTTPException(
            status_code=429,
            detail=f"Account has reached request limit ({success_count}/{max_requests})"
        )
    if max_cost is not None and total_cost >= max_cost:
        raise HTTPException(
            status_code=429,
            detail=f"Account has reached cost limit (${total_cost:.4f}/${max_cost:.4f})"
        )

async def resolve_account_for_key(bearer_key: Optional[str]) -> Dict[str, Any]:
    """
    Authorize request by API key and resolve to account.
    Priority:
    1. If bearer_key matches an account's api_key in database, use that account
    2. If ALLOWED_API_KEYS is configured and bearer_key is in it, select random enabled account (backward compatibility)
    3. Otherwise, raise 401
    """
    if not bearer_key:
        # No key provided
        if ALLOWED_API_KEYS:
            raise HTTPException(status_code=401, detail="Invalid or missing API key")
        # Development mode: allow without key, but still need enabled accounts
        candidates = await _list_enabled_accounts() if not LAZY_ACCOUNT_POOL_ENABLED else await _list_enabled_accounts(limit=LAZY_ACCOUNT_POOL_SIZE)
        if not candidates:
            raise HTTPException(status_code=401, detail="No enabled account available")
        account = random.choice(candidates)
        _check_account_limits(account)
        return account

    # First, try to find account by api_key
    row = await _db.fetchone("SELECT * FROM accounts WHERE api_key=?", (bearer_key,))
    if row:
        account = _row_to_dict(row)
        if not account.get('enabled'):
            # Check why it's disabled
            _check_account_limits(account)  # Will raise if limits exceeded
            raise HTTPException(status_code=403, detail="Account is disabled")
        _check_account_limits(account)
        return account

    # If not found by api_key, fall back to legacy ALLOWED_API_KEYS (backward compatibility)
    if ALLOWED_API_KEYS:
        if bearer_key not in ALLOWED_API_KEYS:
            raise HTTPException(status_code=401, detail="Invalid or missing API key")
        # Selection: random among enabled accounts
        if LAZY_ACCOUNT_POOL_ENABLED:
            candidates = await _list_enabled_accounts(limit=LAZY_ACCOUNT_POOL_SIZE)
        else:
            candidates = await _list_enabled_accounts()
        if not candidates:
            raise HTTPException(status_code=401, detail="No enabled account available")
        account = random.choice(candidates)
        _check_account_limits(account)
        return account

    # No matching api_key and no ALLOWED_API_KEYS configured
    raise HTTPException(status_code=401, detail="Invalid or missing API key")

# ------------------------------------------------------------------------------
# Pydantic Schemas
# ------------------------------------------------------------------------------

class AccountCreate(BaseModel):
    label: Optional[str] = None
    clientId: str
    clientSecret: str
    refreshToken: Optional[str] = None
    accessToken: Optional[str] = None
    other: Optional[Dict[str, Any]] = None
    enabled: Optional[bool] = True
    api_key: Optional[str] = None  # If None, will be auto-generated
    max_requests: Optional[int] = None  # Max number of requests, None = unlimited
    max_cost: Optional[float] = None  # Max cost in USD, None = unlimited

class BatchAccountCreate(BaseModel):
    accounts: List[AccountCreate]

class AccountUpdate(BaseModel):
    label: Optional[str] = None
    clientId: Optional[str] = None
    clientSecret: Optional[str] = None
    refreshToken: Optional[str] = None
    accessToken: Optional[str] = None
    other: Optional[Dict[str, Any]] = None
    enabled: Optional[bool] = None
    api_key: Optional[str] = None  # Set to empty string to clear, or new key to update
    max_requests: Optional[int] = None  # Set to -1 to clear (unlimited)
    max_cost: Optional[float] = None  # Set to -1 to clear (unlimited)
    reset_stats: Optional[bool] = None  # Set to True to reset success_count, error_count, total_cost

class ChatMessage(BaseModel):
    role: str
    content: Any

class ChatCompletionRequest(BaseModel):
    model: Optional[str] = None
    messages: List[ChatMessage]
    stream: Optional[bool] = False

# ------------------------------------------------------------------------------
# Token refresh (OIDC)
# ------------------------------------------------------------------------------

OIDC_BASE = "https://oidc.us-east-1.amazonaws.com"
TOKEN_URL = f"{OIDC_BASE}/token"

def _oidc_headers() -> Dict[str, str]:
    return {
        "content-type": "application/json",
        "user-agent": "aws-sdk-rust/1.3.9 os/windows lang/rust/1.87.0",
        "x-amz-user-agent": "aws-sdk-rust/1.3.9 ua/2.1 api/ssooidc/1.88.0 os/windows lang/rust/1.87.0 m/E app/AmazonQ-For-CLI",
        "amz-sdk-request": "attempt=1; max=3",
        "amz-sdk-invocation-id": str(uuid.uuid4()),
    }

async def refresh_access_token_in_db(account_id: str) -> Dict[str, Any]:
    row = await _db.fetchone("SELECT * FROM accounts WHERE id=?", (account_id,))
    if not row:
        raise HTTPException(status_code=404, detail="Account not found")
    acc = _row_to_dict(row)

    if not acc.get("clientId") or not acc.get("clientSecret") or not acc.get("refreshToken"):
        raise HTTPException(status_code=400, detail="Account missing clientId/clientSecret/refreshToken for refresh")

    payload = {
        "grantType": "refresh_token",
        "clientId": acc["clientId"],
        "clientSecret": acc["clientSecret"],
        "refreshToken": acc["refreshToken"],
    }

    try:
        # Use global client if available, else fallback (though global should be ready)
        client = GLOBAL_CLIENT
        if not client:
            # Fallback for safety
            async with httpx.AsyncClient(timeout=60.0) as temp_client:
                r = await temp_client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
                r.raise_for_status()
                data = r.json()
        else:
            r = await client.post(TOKEN_URL, headers=_oidc_headers(), json=payload)
            r.raise_for_status()
            data = r.json()

        new_access = data.get("accessToken")
        new_refresh = data.get("refreshToken", acc.get("refreshToken"))
        expires_in = data.get("expiresIn", 3600)  # Default 1 hour if not provided
        expires_at = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.time() + expires_in))
        now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
        status = "success"
    except httpx.HTTPError as e:
        now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
        status = "failed"
        await _db.execute(
            """
            UPDATE accounts
            SET last_refresh_time=?, last_refresh_status=?, updated_at=?
            WHERE id=?
            """,
            (now, status, now, account_id),
        )
        # 记录刷新失败次数
        await _update_stats(account_id, False)
        raise HTTPException(status_code=502, detail=f"Token refresh failed: {str(e)}")
    except Exception as e:
        # Ensure last_refresh_time is recorded even on unexpected errors
        now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
        status = "failed"
        await _db.execute(
            """
            UPDATE accounts
            SET last_refresh_time=?, last_refresh_status=?, updated_at=?
            WHERE id=?
            """,
            (now, status, now, account_id),
        )
        # 记录刷新失败次数
        await _update_stats(account_id, False)
        raise

    await _db.execute(
        """
        UPDATE accounts
        SET accessToken=?, refreshToken=?, expires_at=?, last_refresh_time=?, last_refresh_status=?, updated_at=?
        WHERE id=?
        """,
        (new_access, new_refresh, expires_at, now, status, now, account_id),
    )

    row2 = await _db.fetchone("SELECT * FROM accounts WHERE id=?", (account_id,))
    return _row_to_dict(row2)

async def get_account(account_id: str) -> Dict[str, Any]:
    row = await _db.fetchone("SELECT * FROM accounts WHERE id=?", (account_id,))
    if not row:
        raise HTTPException(status_code=404, detail="Account not found")
    return _row_to_dict(row)

async def _update_stats(account_id: str, success: bool, input_tokens: int = 0, output_tokens: int = 0) -> None:
    now = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
    cost = calculate_cost(input_tokens, output_tokens) if success else 0

    if success:
        await _db.execute(
            "UPDATE accounts SET success_count=success_count+1, error_count=0, total_cost=COALESCE(total_cost,0)+?, updated_at=? WHERE id=?",
            (cost, now, account_id)
        )
        # Check if account exceeded limits after update
        row = await _db.fetchone("SELECT success_count, total_cost, max_requests, max_cost FROM accounts WHERE id=?", (account_id,))
        if row:
            exceeded = False
            if row.get('max_requests') is not None and (row.get('success_count') or 0) >= row['max_requests']:
                exceeded = True
            if row.get('max_cost') is not None and (row.get('total_cost') or 0) >= row['max_cost']:
                exceeded = True
            if exceeded:
                await _db.execute("UPDATE accounts SET enabled=0, updated_at=? WHERE id=?", (now, account_id))
    else:
        row = await _db.fetchone("SELECT error_count FROM accounts WHERE id=?", (account_id,))
        if row:
            new_count = (row['error_count'] or 0) + 1
            if new_count >= MAX_ERROR_COUNT:
                await _db.execute("UPDATE accounts SET error_count=?, enabled=0, updated_at=? WHERE id=?",
                           (new_count, now, account_id))
            else:
                await _db.execute("UPDATE accounts SET error_count=?, updated_at=? WHERE id=?",
                           (new_count, now, account_id))

# ------------------------------------------------------------------------------
# Dependencies
# ------------------------------------------------------------------------------

async def require_account(
    authorization: Optional[str] = Header(default=None),
    x_api_key: Optional[str] = Header(default=None)
) -> Dict[str, Any]:
    key = _extract_bearer(authorization) if authorization else x_api_key
    return await resolve_account_for_key(key)

async def verify_admin_password(authorization: Optional[str] = Header(None)) -> str:
    """Verify password for console access, returns user role ('admin' or 'viewer')"""
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(
            status_code=401,
            detail={"error": "Unauthorized access", "code": "UNAUTHORIZED"}
        )

    password = authorization[7:]  # Remove "Bearer " prefix
    role = await _get_user_role(password)

    if not role:
        raise HTTPException(
            status_code=401,
            detail={"error": "Invalid password", "code": "INVALID_PASSWORD"}
        )

    return role

async def verify_admin_only(authorization: Optional[str] = Header(None)) -> str:
    """Verify admin password only, reject viewer access"""
    role = await verify_admin_password(authorization)
    if role != "admin":
        raise HTTPException(
            status_code=403,
            detail={"error": "Admin access required", "code": "FORBIDDEN"}
        )
    return role

# ------------------------------------------------------------------------------
# OpenAI-compatible Chat endpoint
# ------------------------------------------------------------------------------

def _openai_non_streaming_response(
    text: str,
    model: Optional[str],
    prompt_tokens: int = 0,
    completion_tokens: int = 0,
) -> Dict[str, Any]:
    created = int(time.time())
    return {
        "id": f"chatcmpl-{uuid.uuid4()}",
        "object": "chat.completion",
        "created": created,
        "model": model or "unknown",
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": text,
                },
                "finish_reason": "stop",
            }
        ],
        "usage": {
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens,
            "total_tokens": prompt_tokens + completion_tokens,
        },
    }

def _sse_format(obj: Dict[str, Any]) -> str:
    return f"data: {json.dumps(obj, ensure_ascii=False)}\n\n"

@app.post("/v1/messages")
async def claude_messages(
    req: ClaudeRequest,
    account: Dict[str, Any] = Depends(require_account),
    x_conversation_id: Optional[str] = Header(default=None, alias="x-conversation-id")
):
    """
    Claude-compatible messages endpoint.
    """
    # 1. Convert request
    # Always generate a new conversation_id like amq2api does
    # Using the same conversation_id can cause Amazon Q to return cached/stale data
    try:
        aq_request = convert_claude_to_amazonq_request(req, conversation_id=None)
    except Exception as e:
        traceback.print_exc()
        raise HTTPException(status_code=400, detail=f"Request conversion failed: {str(e)}")

    # Post-process history to fix message ordering (prevents infinite loops)
    from message_processor import process_claude_history_for_amazonq
    conversation_state = aq_request.get("conversationState", {})
    history = conversation_state.get("history", [])
    if history:
        processed_history = process_claude_history_for_amazonq(history)
        aq_request["conversationState"]["history"] = processed_history

    # Remove duplicate tail userInputMessage that matches currentMessage content
    # This prevents the model from repeatedly responding to the same user message
    conversation_state = aq_request.get("conversationState", {})
    current_msg = conversation_state.get("currentMessage", {}).get("userInputMessage", {})
    current_content = (current_msg.get("content") or "").strip()
    history = conversation_state.get("history", [])

    if history and current_content:
        last = history[-1]
        if "userInputMessage" in last:
            last_content = (last["userInputMessage"].get("content") or "").strip()
            if last_content and last_content == current_content:
                # Remove duplicate tail userInputMessage
                history = history[:-1]
                aq_request["conversationState"]["history"] = history
                import logging
                logging.getLogger(__name__).info("Removed duplicate tail userInputMessage to prevent repeated response")

    conversation_state = aq_request.get("conversationState", {})
    conversation_id = conversation_state.get("conversationId")
    response_headers: Dict[str, str] = {}
    if conversation_id:
        response_headers["x-conversation-id"] = conversation_id

    # Always stream from upstream to get full event details
    event_iter = None
    try:
        access = account.get("accessToken")
        if not access:
            refreshed = await refresh_access_token_in_db(account["id"])
            access = refreshed.get("accessToken")

        # We call with stream=True to get the event iterator
        _, _, tracker, event_iter = await send_chat_request(
            access_token=access,
            messages=[],
            model=map_model_name(req.model),
            stream=True,
            client=GLOBAL_CLIENT,
            raw_payload=aq_request
        )

        if not event_iter:
             raise HTTPException(status_code=502, detail="No event stream returned")

        # Handler
        # Calculate input tokens
        text_to_count = ""
        if req.system:
            if isinstance(req.system, str):
                text_to_count += req.system
            elif isinstance(req.system, list):
                for item in req.system:
                    if isinstance(item, dict) and item.get("type") == "text":
                        text_to_count += item.get("text", "")

        for msg in req.messages:
            if isinstance(msg.content, str):
                text_to_count += msg.content
            elif isinstance(msg.content, list):
                for item in msg.content:
                    if isinstance(item, dict) and item.get("type") == "text":
                        text_to_count += item.get("text", "")

        input_tokens = count_tokens(text_to_count, apply_multiplier=True)
        handler = ClaudeStreamHandler(model=req.model, input_tokens=input_tokens, conversation_id=conversation_id)

        # Try to get the first event to ensure the connection is valid
        # This allows us to return proper HTTP error codes before starting the stream
        first_event = None
        try:
            first_event = await event_iter.__anext__()
        except StopAsyncIteration:
            raise HTTPException(status_code=502, detail="Empty response from upstream")
        except Exception as e:
            # If we get an error before the first event, we can still return proper status code
            err_msg = str(e)
            # Extract upstream status code from "Upstream error {code}: {message}"
            if err_msg.startswith("Upstream error "):
                match = re.match(r"Upstream error (\d+):", err_msg)
                if match:
                    raise HTTPException(status_code=int(match.group(1)), detail=err_msg)
            raise HTTPException(status_code=502, detail=f"Upstream error: {err_msg}")

        async def event_generator():
            try:
                # Process the first event we already fetched
                if first_event:
                    event_type, payload = first_event
                    async for sse in handler.handle_event(event_type, payload):
                        yield sse

                # Process remaining events
                async for event_type, payload in event_iter:
                    async for sse in handler.handle_event(event_type, payload):
                        yield sse
                async for sse in handler.finish():
                    yield sse
                await _update_stats(account["id"], True)
            except GeneratorExit:
                # Client disconnected - update stats but don't re-raise
                await _update_stats(account["id"], tracker.has_content if tracker else False)
            except Exception:
                await _update_stats(account["id"], False)
                raise

        if req.stream:
            return StreamingResponse(
                event_generator(),
                media_type="text/event-stream",
                headers=response_headers or None
            )
        else:
            # Accumulate for non-streaming
            # This is a bit complex because we need to reconstruct the full response object
            # For now, let's just support streaming as it's the main use case for Claude Code
            # But to be nice, let's try to support non-streaming by consuming the generator

            content_blocks = []
            usage = {"input_tokens": 0, "output_tokens": 0}
            stop_reason = None

            # We need to parse the SSE strings back to objects... inefficient but works
            # Or we could refactor handler to yield objects.
            # For now, let's just raise error for non-streaming or implement basic text
            # Claude Code uses streaming.

            # Let's implement a basic accumulator from the SSE stream
            final_content = []

            async for sse_chunk in event_generator():
                data_str = None
                # Each chunk from the generator can have multiple lines ('event:', 'data:').
                # We need to find the 'data:' line.
                for line in sse_chunk.strip().split('\n'):
                    if line.startswith("data:"):
                        data_str = line[6:].strip()
                        break

                if not data_str or data_str == "[DONE]":
                    continue

                try:
                    data = json.loads(data_str)
                    dtype = data.get("type")

                    if dtype == "content_block_start":
                        idx = data.get("index", 0)
                        while len(final_content) <= idx:
                            final_content.append(None)
                        final_content[idx] = data.get("content_block")

                    elif dtype == "content_block_delta":
                        idx = data.get("index", 0)
                        delta = data.get("delta", {})
                        if final_content[idx]:
                            if delta.get("type") == "text_delta":
                                final_content[idx]["text"] += delta.get("text", "")
                            elif delta.get("type") == "thinking_delta":
                                final_content[idx].setdefault("thinking", "")
                                final_content[idx]["thinking"] += delta.get("thinking", "")
                            elif delta.get("type") == "input_json_delta":
                                if "partial_json" not in final_content[idx]:
                                    final_content[idx]["partial_json"] = ""
                                final_content[idx]["partial_json"] += delta.get("partial_json", "")

                    elif dtype == "content_block_stop":
                        idx = data.get("index", 0)
                        if final_content[idx] and final_content[idx].get("type") == "tool_use":
                            if "partial_json" in final_content[idx]:
                                try:
                                    final_content[idx]["input"] = json.loads(final_content[idx]["partial_json"])
                                except json.JSONDecodeError:
                                    # Keep partial if invalid
                                    final_content[idx]["input"] = {"error": "invalid json", "partial": final_content[idx]["partial_json"]}
                                del final_content[idx]["partial_json"]

                    elif dtype == "message_delta":
                        usage = data.get("usage", usage)
                        stop_reason = data.get("delta", {}).get("stop_reason")

                except json.JSONDecodeError:
                    # Ignore lines that are not valid JSON
                    pass
                except Exception:
                    # Broad exception to prevent accumulator from crashing on one bad event
                    traceback.print_exc()
                    pass

            # Final assembly
            final_content_cleaned = []
            for c in final_content:
                if c is not None:
                    # Remove internal state like 'partial_json' before returning
                    c.pop("partial_json", None)
                    final_content_cleaned.append(c)

            response_body = {
                "id": f"msg_{uuid.uuid4()}",
                "type": "message",
                "role": "assistant",
                "model": req.model,
                "content": final_content_cleaned,
                "stop_reason": stop_reason,
                "stop_sequence": None,
                "usage": usage
            }
            if conversation_id:
                response_body["conversation_id"] = conversation_id
                response_body["conversationId"] = conversation_id
            return JSONResponse(content=response_body, headers=response_headers or None)

    except Exception as e:
        # Ensure event_iter (if created) is closed to release upstream connection
        try:
            if event_iter and hasattr(event_iter, "aclose"):
                await event_iter.aclose()
        except Exception:
            pass
        await _update_stats(account["id"], False)

        # Extract upstream status code from "Upstream error {code}: {message}"
        err_msg = str(e)
        if err_msg.startswith("Upstream error "):
            match = re.match(r"Upstream error (\d+):", err_msg)
            if match:
                raise HTTPException(status_code=int(match.group(1)), detail=err_msg)
        raise