ai-forever
diff --git a/‎badges/coverage.svg‎
Lines changed: 2 additions & 2 deletions b/‎badges/coverage.svg‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/anthropic/count_tokens.py‎
Lines changed: 71 additions & 0 deletions b/‎examples/anthropic/count_tokens.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎gpt2giga/api_server.py‎
Lines changed: 7 additions & 2 deletions b/‎gpt2giga/api_server.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎gpt2giga/middlewares/path_normalizer.py‎
Lines changed: 14 additions & 6 deletions b/‎gpt2giga/middlewares/path_normalizer.py‎
Lines changed: 14 additions & 6 deletions
@@ -0,0 +1,71 @@
+"""Anthropic Messages API token counting example.
+
+Count input tokens before sending a request, useful for
+estimating costs and staying within context limits.
+"""
+
+from anthropic import Anthropic
+
+client = Anthropic(base_url="http://localhost:8090/v1", api_key="any-key")
+
+# 1. Simple message token count
+result = client.messages.count_tokens(
+    model="GigaChat-2-Max",
+    messages=[
+        {"role": "user", "content": "Расскажи коротко о Python."},
+    ],
+)
+
+print(f"Простое сообщение: {result.input_tokens} токенов")
+
+# 2. Token count with system prompt
+result_with_system = client.messages.count_tokens(
+    model="GigaChat-2-Max",
+    system="Ты — опытный программист. Отвечай кратко и по делу.",
+    messages=[
+        {"role": "user", "content": "Что такое декораторы в Python?"},
+    ],
+)
+
+print(f"С системным промптом: {result_with_system.input_tokens} токенов")
+
+# 3. Token count with tools
+tools = [
+    {
+        "name": "get_weather",
+        "description": "Получить текущую погоду для указанного города.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type": "string",
+                    "description": "Название города, например: Москва",
+                },
+            },
+            "required": ["city"],
+        },
+    },
+]
+
+result_with_tools = client.messages.count_tokens(
+    model="GigaChat-2-Max",
+    messages=[
+        {"role": "user", "content": "Какая погода в Москве?"},
+    ],
+    tools=tools,
+)
+
+print(f"С определениями инструментов: {result_with_tools.input_tokens} токенов")
+
+# 4. Multi-turn conversation token count
+result_multi = client.messages.count_tokens(
+    model="GigaChat-2-Max",
+    messages=[
+        {"role": "user", "content": "Привет!"},
+        {"role": "assistant", "content": "Здравствуйте! Чем могу помочь?"},
+        {"role": "user", "content": "Расскажи о машинном обучении."},
+    ],
+)
+
+print(result_multi)
+print(f"Многоходовый диалог: {result_multi.input_tokens} токенов")
@@ -15,6 +15,7 @@
 from gpt2giga.protocol import AttachmentProcessor, RequestTransformer, ResponseProcessor
 from gpt2giga.routers import anthropic_router, api_router, logs_router
 from gpt2giga.routers import system_router
+from gpt2giga.utils import _get_app_version
 
 
 @asynccontextmanager
@@ -54,7 +55,11 @@ async def lifespan(app: FastAPI):
 
 
 def create_app(config=None) -> FastAPI:
-    app = FastAPI(lifespan=lifespan, title="Gpt2Giga converter proxy")
+    app = FastAPI(
+        lifespan=lifespan,
+        title="Gpt2Giga converter proxy",
+        version=_get_app_version(),
+    )
     if config is None:
         config = load_config()
 
@@ -108,7 +113,7 @@ def run():
     app = create_app(config)
     app.state.logger = logger
 
-    logger.info("Starting Gpt2Giga proxy server...")
+    logger.info(f"Starting Gpt2Giga proxy server, version: {_get_app_version()}")
     logger.info(f"Proxy settings: {proxy_settings}")
     logger.info(
         f"GigaChat settings: {config.gigachat_settings.model_dump(exclude={'password', 'credentials', 'access_token'})}"
 
@@ -3,7 +3,6 @@
 
 from fastapi import Request
 from starlette.middleware.base import BaseHTTPMiddleware
-from starlette.responses import RedirectResponse
 
 
 class PathNormalizationMiddleware(BaseHTTPMiddleware):
@@ -15,7 +14,14 @@ class PathNormalizationMiddleware(BaseHTTPMiddleware):
     def __init__(self, app, valid_roots=None):
         super().__init__(app)
         # Valid entrypoints
-        self.valid_roots = valid_roots or ["v1", "chat", "models", "embeddings"]
+        self.valid_roots = valid_roots or [
+            "v1",
+            "chat",
+            "models",
+            "embeddings",
+            "messages",
+            "responses",
+        ]
         pattern = r".*/(" + "|".join(map(re.escape, self.valid_roots)) + r")(/.*|$)"
         self._pattern = re.compile(pattern)
 
@@ -26,9 +32,11 @@ async def dispatch(self, request: Request, call_next: Callable):
 
         if match and not path.startswith(f"/{match.group(1)}"):
             new_path = f"/{match.group(1)}{match.group(2)}"
-            query = request.url.query
-            if query:
-                new_path += f"?{query}"
-            return RedirectResponse(url=new_path)
+            # IMPORTANT:
+            # Do not redirect (307) here: some clients may re-issue the request
+            # without the original body, which leads to JSONDecodeError in
+            # downstream handlers. Instead, rewrite the ASGI scope path in-place.
+            request.scope["path"] = new_path
+            request.scope["raw_path"] = new_path.encode("utf-8")
 
         return await call_next(request)