aimclub
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎docs/index.md‎
Lines changed: 4 additions & 3 deletions b/‎docs/index.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎osa_tool/config/settings.py‎
Lines changed: 5 additions & 5 deletions b/‎osa_tool/config/settings.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎osa_tool/config/settings/arguments.yaml‎
Lines changed: 11 additions & 5 deletions b/‎osa_tool/config/settings/arguments.yaml‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎osa_tool/config/settings/config.toml‎
Lines changed: 2 additions & 2 deletions b/‎osa_tool/config/settings/config.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎osa_tool/models/models.py‎
Lines changed: 40 additions & 3 deletions b/‎osa_tool/models/models.py‎
Lines changed: 40 additions & 3 deletions
diff --git a/‎osa_tool/run.py‎
Lines changed: 28 additions & 19 deletions b/‎osa_tool/run.py‎
Lines changed: 28 additions & 19 deletions
@@ -196,9 +196,10 @@ documentation, see the [Workflow Generator README](./osa_tool/workflow/README.md
 | `--api`              | LLM API service provider                                                            | `itmo`                      |
 | `--base-url`         | URL of the provider compatible with API OpenAI                                      | `https://api.openai.com/v1` |
 | `--model`            | Specific LLM model to use                                                           | `gpt-3.5-turbo`             |
-| `--top_p`            | Nucleus sampling probability                                                        | `None`                      |
-| `--temperature`      | Sampling temperature to use for the LLM output (0 = deterministic, 1 = creative).   | `None`                      |
-| `--max_tokens`       | Maximum number of tokens the model can generate in a single response                | `None`                      |
+| `--top_p`            | Nucleus sampling probability                                                        | `0.95`                      |
+| `--temperature`      | Sampling temperature to use for the LLM output (0 = deterministic, 1 = creative).   | `0.05`                      |
+| `--max_tokens`       | Maximum number of output tokens the model can generate in a single response         | `4096`                      |
+| `--context_window`   | Total number of model context (Input + Output)                                      | `16385`                     |
 | `--attachment`       | Path to a local PDF or .docx file, or a URL to a PDF resource                       | `None`                      |
 | `-m`, `--mode`       | Operation mode for repository processing: `basic`, `auto` (default), or `advanced`. | `auto`                      |
 | `--delete-dir`       | Enable deleting the downloaded repository after processing                          | `disabled`                  |
 
@@ -165,9 +165,10 @@ documentation, see the [GitHub Action Workflow Generator README](../osa_tool/git
 | `--api`              | LLM API service provider                                                            | `itmo`                      |
 | `--base-url`         | URL of the provider compatible with API OpenAI                                      | `https://api.openai.com/v1` |
 | `--model`            | Specific LLM model to use                                                           | `gpt-3.5-turbo`             |
-| `--top_p`            | Nucleus sampling probability                                                        | `None`                      |
-| `--temperature`      | Sampling temperature to use for the LLM output (0 = deterministic, 1 = creative).   | `None`                      |
-| `--max_tokens`       | Maximum number of tokens the model can generate in a single response                | `None`                      |
+| `--top_p`            | Nucleus sampling probability                                                        | `0.95`                      |
+| `--temperature`      | Sampling temperature to use for the LLM output (0 = deterministic, 1 = creative).   | `0.05`                      |
+| `--max_tokens`       | Maximum number of output tokens the model can generate in a single response         | `4096`                      |
+| `--context_window`   | Total number of model context (Input + Output)                                      | `16385`                     |
 | `--attachment`       | Path to a local PDF or .docx file, or a URL to a PDF resource                       | `None`                      |
 | `-m`, `--mode`       | Operation mode for repository processing: `basic`, `auto` (default), or `advanced`. | `auto`                      |
 | `--delete-dir`       | Enable deleting the downloaded repository after processing                          | `disabled`                  |
 
@@ -4,20 +4,20 @@
 
 import os.path
 from pathlib import Path
-from typing import Any, Literal, List
+from typing import Any, List, Literal
 
 import tomli
 from pydantic import (
     AnyHttpUrl,
     BaseModel,
     ConfigDict,
     Field,
-    model_validator,
     NonNegativeFloat,
     PositiveInt,
+    model_validator,
 )
 
-from osa_tool.utils.utils import parse_git_url, build_config_path
+from osa_tool.utils.utils import build_config_path, parse_git_url
 
 
 class GitSettings(BaseModel):
@@ -46,14 +46,14 @@ class ModelSettings(BaseModel):
     api: str
     rate_limit: PositiveInt
     base_url: str
-    context_window: PositiveInt
     encoder: str
     host_name: AnyHttpUrl
     localhost: AnyHttpUrl
     model: str
     path: str
     temperature: NonNegativeFloat
-    tokens: PositiveInt
+    max_tokens: PositiveInt
+    context_window: PositiveInt
     top_p: NonNegativeFloat
 
 
 
@@ -56,19 +56,25 @@ model:
 
 temperature:
   aliases: [ "--temperature" ]
-  type: str
+  type: float
   description: "Sampling temperature to use for the LLM output (0 = deterministic, 1 = creative)."
   example: 0.3, 0.9
 
-tokens:
+max_tokens:
   aliases: [ "--max-tokens" ]
-  type: str
-  description: "Maximum number of tokens the model can generate in a single response."
+  type: int
+  description: "Maximum number of output tokens the model can generate in a single response."
   example: 256, 1024
 
+context_window:
+  aliases: [ "--context-window" ]
+  type: int
+  description: "Total number of model context (Input + Output)."
+  example: 16000, 200000
+
 top_p:
   aliases: [ "--top-p" ]
-  type: str
+  type: float
   description: "Nucleus sampling probability (1.0 = all tokens considered)."
   example: 0.8, 0.95
 
 
@@ -9,14 +9,14 @@ no_pull_request = false
 api = "itmo"
 rate_limit = 10
 base_url = "https://api.openai.com/v1"
-context_window = 4096
 encoder = "cl100k_base"
 host_name = "https://api.openai.com/v1"
 localhost = "http://localhost:11434/"
 model = "gpt-3.5-turbo"
 path = "generate"
 temperature = 0.05
-tokens = 4096
+max_tokens = 4096 
+context_window = 16385 
 top_p = 0.95
 
 # General CLI-related defaults
 
@@ -4,6 +4,7 @@
 from uuid import uuid4
 
 import dotenv
+import tiktoken
 from langchain.schema import SystemMessage
 from protollm.connectors import create_llm_connector
 
@@ -81,7 +82,7 @@ def __init__(self, config: Settings, prompt: str):
         """
         self.job_id = str(uuid4())
         self.temperature = config.llm.temperature
-        self.tokens_limit = config.llm.tokens
+        self.tokens_limit = config.llm.max_tokens
         self.prompt = prompt
         self.roles = [
             SystemMessage(content="You are a helpful assistant for analyzing open-source repositories."),
@@ -160,7 +161,8 @@ def send_request(self, prompt: str) -> str:
         Returns:
             str: The response received from the request.
         """
-        self.initialize_payload(self.config, prompt)
+        safe_prompt = self._limit_tokens(prompt)
+        self.initialize_payload(self.config, safe_prompt)
         messages = self.payload["messages"]
         response = self.client.invoke(messages)
         return response.content
@@ -176,7 +178,8 @@ async def async_request(self, prompt: str) -> str:
         Returns:
             str: The response received from the request.
         """
-        self.initialize_payload(self.config, prompt)
+        safe_prompt = self._limit_tokens(prompt)
+        self.initialize_payload(self.config, safe_prompt)
         response = await self.client.ainvoke(self.payload["messages"])
         return response.content
 
@@ -229,6 +232,40 @@ def _configure_api(self, api: str, model_name: str) -> None:
 
         self.client = create_llm_connector(model_url=self._build_model_url(), **self._get_llm_params())
 
+    def _limit_tokens(self, text: str, safety_buffer: int = 100, mode: str = "middle-out") -> str:
+        """
+        Limits text to fit within the model's context window.
+
+        Calculates: Available Input = Total Context - Max Output - Safety Buffer
+        """
+        model_context_limit = getattr(self.config.llm, "context_window")
+        max_output_tokens = self.config.llm.max_tokens
+        encoding_name = self.config.llm.encoder
+
+        max_input_tokens = model_context_limit - max_output_tokens - safety_buffer
+
+        try:
+            encoding = tiktoken.get_encoding(encoding_name)
+        except ValueError:
+            encoding = tiktoken.get_encoding("cl100k_base")
+
+        tokens = encoding.encode(text)
+
+        if len(tokens) <= max_input_tokens:
+            return text
+
+        if mode == "start":
+            truncated_tokens = tokens[:max_input_tokens]
+        elif mode == "end":
+            truncated_tokens = tokens[-max_input_tokens:]
+        elif mode == "middle-out":
+            half_limit = max_input_tokens // 2
+            truncated_tokens = tokens[:half_limit] + tokens[-half_limit:]
+        else:
+            raise ValueError(f"Unknown mode: {mode}")
+
+        return encoding.decode(truncated_tokens)
+
 
 class ModelHandlerFactory:
     """
 
@@ -31,9 +31,14 @@
 from osa_tool.translation.dir_translator import DirectoryTranslator
 from osa_tool.translation.readme_translator import ReadmeTranslator
 from osa_tool.utils.arguments_parser import build_parser_from_yaml
-from osa_tool.utils.logger import setup_logging, logger
+from osa_tool.utils.logger import logger, setup_logging
 from osa_tool.utils.prompts_builder import PromptLoader
-from osa_tool.utils.utils import delete_repository, parse_folder_name, rich_section, osa_project_root
+from osa_tool.utils.utils import (
+    delete_repository,
+    osa_project_root,
+    parse_folder_name,
+    rich_section,
+)
 from osa_tool.validation.doc_validator import DocValidator
 from osa_tool.validation.paper_validator import PaperValidator
 from osa_tool.validation.report_generator import (
@@ -78,13 +83,14 @@ def main():
             logger.info(f"Output path changed to {output_path}")
 
         # Load configurations and update
-        config = load_configuration(
+        config_loader = load_configuration(
             repo_url=args.repository,
             api=args.api,
             base_url=args.base_url,
             model_name=args.model,
             temperature=args.temperature,
             max_tokens=args.max_tokens,
+            context_window=args.context_window,
             top_p=args.top_p,
         )
 
@@ -107,8 +113,8 @@ def main():
         git_agent.clone_repository()
 
         # Initialize ModeScheduler
-        sourcerank = SourceRank(config)
-        scheduler = ModeScheduler(config, sourcerank, prompts, args, workflow_manager, git_agent.metadata)
+        sourcerank = SourceRank(config_loader)
+        scheduler = ModeScheduler(config_loader, sourcerank, prompts, args, workflow_manager, git_agent.metadata)
         plan = scheduler.plan
 
         if create_fork:
@@ -118,25 +124,25 @@ def main():
         # NOTE: Must run first - switches GitHub branches
         if plan.get("report"):
             rich_section("Report generation")
-            analytics = ReportGenerator(config, sourcerank, prompts, git_agent.metadata)
+            analytics = ReportGenerator(config_loader, sourcerank, prompts, git_agent.metadata)
             analytics.build_pdf()
             if create_fork:
                 git_agent.upload_report(analytics.filename, analytics.output_path)
 
         # NOTE: Must run first - switches GitHub branches
         if plan.get("validate_doc"):
             rich_section("Document validation")
-            content = DocValidator(config, prompts).validate(plan.get("attachment"))
-            va_re_gen = ValidationReportGenerator(config, git_agent.metadata, sourcerank)
+            content = DocValidator(config_loader, prompts).validate(plan.get("attachment"))
+            va_re_gen = ValidationReportGenerator(config_loader, git_agent.metadata, sourcerank)
             va_re_gen.build_pdf("Document", content)
             if create_fork:
                 git_agent.upload_report(va_re_gen.filename, va_re_gen.output_path)
 
         # NOTE: Must run first - switches GitHub branches
         if plan.get("validate_paper"):
             rich_section("Paper validation")
-            content = PaperValidator(config, prompts).validate(plan.get("attachment"))
-            va_re_gen = ValidationReportGenerator(config, git_agent.metadata, sourcerank)
+            content = PaperValidator(config_loader, prompts).validate(plan.get("attachment"))
+            va_re_gen = ValidationReportGenerator(config_loader, git_agent.metadata, sourcerank)
             va_re_gen.build_pdf("Paper", content)
             if create_fork:
                 git_agent.upload_report(va_re_gen.filename, va_re_gen.output_path)
@@ -149,13 +155,13 @@ def main():
         # Auto translating names of directories
         if plan.get("translate_dirs"):
             rich_section("Directory and file translation")
-            translation = DirectoryTranslator(config)
+            translation = DirectoryTranslator(config_loader)
             translation.rename_directories_and_files()
 
         # Docstring generation
         if plan.get("docstring"):
             rich_section("Docstrings generation")
-            generate_docstrings(config, loop)
+            generate_docstrings(config_loader, loop)
 
         # License compiling
         if license_type := plan.get("ensure_license"):
@@ -165,7 +171,7 @@ def main():
         # Generate community documentation
         if plan.get("community_docs"):
             rich_section("Community docs generation")
-            generate_documentation(config, git_agent.metadata)
+            generate_documentation(config_loader, git_agent.metadata)
 
         # Requirements generation
         if plan.get("requirements"):
@@ -176,21 +182,21 @@ def main():
         if plan.get("readme"):
             rich_section("README generation")
             readme_agent = ReadmeAgent(
-                config, prompts, plan.get("attachment"), plan.get("refine_readme"), git_agent.metadata
+                config_loader, prompts, plan.get("attachment"), plan.get("refine_readme"), git_agent.metadata
             )
             readme_agent.generate_readme()
 
         # Readme translation
         translate_readme = plan.get("translate_readme")
         if translate_readme:
             rich_section("README translation")
-            ReadmeTranslator(config, prompts, git_agent.metadata, translate_readme).translate_readme()
+            ReadmeTranslator(config_loader, prompts, git_agent.metadata, translate_readme).translate_readme()
 
         # About section generation
         about_gen = None
         if plan.get("about"):
             rich_section("About Section generation")
-            about_gen = AboutGenerator(config, prompts, git_agent)
+            about_gen = AboutGenerator(config_loader, prompts, git_agent)
             about_gen.generate_about_content()
             if create_fork:
                 git_agent.update_about_section(about_gen.get_about_content())
@@ -200,8 +206,8 @@ def main():
         # Generate platform-specified CI/CD files
         if plan.get("generate_workflows"):
             rich_section("Workflows generation")
-            workflow_manager.update_workflow_config(config, plan)
-            workflow_manager.generate_workflow(config)
+            workflow_manager.update_workflow_config(config_loader, plan)
+            workflow_manager.generate_workflow(config_loader)
 
         # Organize repository by adding 'tests' and 'examples' directories if they aren't exist
         if plan.get("organize"):
@@ -349,6 +355,7 @@ def load_configuration(
     model_name: str,
     temperature: Optional[str] = None,
     max_tokens: Optional[str] = None,
+    context_window: Optional[str] = None,
     top_p: Optional[str] = None,
 ) -> ConfigLoader:
     """
@@ -360,7 +367,8 @@ def load_configuration(
         base_url: URL of the provider compatible with API OpenAI
         model_name: Specific LLM model to use.
         temperature: Sampling temperature for the model.
-        max_tokens: Maximum number of tokens to generate.
+        max_tokens: Maximum number of output tokens to generate.
+        context_window: Total number of model context (Input + Output).
         top_p: Nucleus sampling value.
 
     Returns:
@@ -380,6 +388,7 @@ def load_configuration(
             "model": model_name,
             "temperature": temperature,
             "max_tokens": max_tokens,
+            "context_window": context_window,
             "top_p": top_p,
         }
     )