Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: 'Open Source Advisor Action'
description: 'Automatically generates missing docstrings and audits your repository using various LLM providers'
author: 'aimclub'

inputs:
# Tokens (Secrets)
openai_api_key:
description: 'API key for OpenAI, VseGPT or OpenRouter'
required: false
authorization_key:
description: 'API key for GigaChat provider'
required: false
github_token:
description: 'GitHub Token for repository access'
required: true
default: ${{ github.token }}

# LLM Settings
api:
description: 'LLM Provider (openai, itmo, ollama, gigachat)'
required: false
base_url:
description: 'API Base URL'
required: false
model:
description: 'LLM Model name'
required: false
mode:
description: 'OSA processing mode (basic, auto, advanced)'
required: false
default: 'advanced'

runs:
using: "composite"
steps:
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.11'

- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install -r ${{ github.action_path }}/requirements.txt
pip install ${{ github.action_path }}

- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v47
with:
files: '**.py'

- name: Run OSA
shell: bash
env:
# Try and get all possible tokens that we have
OPENAI_API_KEY: ${{ inputs.openai_api_key }}
AUTHORIZATION_KEY: ${{ inputs.authorization_key }}
GIT_TOKEN: ${{ inputs.github_token }}
run: |
REPO_NAME=$(basename ${{ github.repository }})
rm -rf "$REPO_NAME"

# Base command
CMD="python -m osa_tool.run -r https://github.com/${{ github.repository }} -b ${{ github.head_ref || github.ref_name }} --web-mode --no-fork --no-pull-request --docstring --incremental --target-files ${{ steps.changed-files.outputs.all_changed_files }}"

# Dynamic argument adding (if they are added by user)
if [ -n "${{ inputs.mode }}" ]; then CMD="$CMD --mode ${{ inputs.mode }}"; fi
if [ -n "${{ inputs.api }}" ]; then CMD="$CMD --api ${{ inputs.api }}"; fi
if [ -n "${{ inputs.base_url }}" ]; then CMD="$CMD --base-url ${{ inputs.base_url }}"; fi
if [ -n "${{ inputs.model }}" ]; then CMD="$CMD --model ${{ inputs.model }}"; fi

echo "Executing: $CMD"
$CMD

- name: Sync modified files
shell: bash
if: steps.changed-files.outputs.any_changed == 'true'
run: |
REPO_NAME=$(basename ${{ github.repository }})
if [ -d "$REPO_NAME" ]; then
echo "Syncing ONLY target files from cloned directory..."
CHANGED_FILES="${{ steps.changed-files.outputs.all_changed_files }}"
for file in $CHANGED_FILES; do
if [ -f "./$REPO_NAME/$file" ]; then
mkdir -p $(dirname "./$file")
cp -a "./$REPO_NAME/$file" "./$file"
echo "Synced: $file"
fi
done
rm -rf "./$REPO_NAME"
fi

- name: Commit and Push
if: steps.changed-files.outputs.any_changed == 'true'
uses: stefanzweifel/git-auto-commit-action@v7
with:
commit_message: "docs(osa): auto-generate missing docstrings"
file_pattern: "*.py"
commit_user_name: "osa-bot"
commit_user_email: "osa-bot@users.noreply.github.com"
1 change: 1 addition & 0 deletions osa_tool/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class GitSettings(BaseModel):
host_domain: str | None = None
host: str | None = None
name: str = ""
osa_branch_name: str = "osa_tool"

@model_validator(mode="after")
def set_git_attributes(self):
Expand Down
11 changes: 11 additions & 0 deletions osa_tool/config/settings/arguments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ arguments:
If flag omitted only __init__.py will be ignored.
example: "tests moduleA/featureB __init__.py"

incremental:
aliases: [ "--incremental" ]
type: flag
description: "Enable incremental mode for tasks (e.g., generate docstrings only for missing ones and skip main idea update)."

target_files:
aliases: [ "--target-files" ]
type: list
description: "Space-separated list of specific files to analyze. If set, OSA will focus only on these files."
example: "path/to/file1.py path/to/file2.py"

report:
aliases: [ "--report" ]
type: flag
Expand Down
2 changes: 2 additions & 0 deletions osa_tool/config/settings/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ refine_readme = false
requirements = false
organize = false
about = false
incremental = false
target_files = []

#Workflow Settings
[workflows]
Expand Down
4 changes: 2 additions & 2 deletions osa_tool/core/git/git_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ def clone_repository(self) -> None:
logger.error(f"Directory {self.clone_dir} exists but is not a valid Git repository")
raise

elif self._check_branch_existence():
self._clone_chosen_branch()
elif self._check_branch_existence(None):
self._clone_chosen_branch(None)
else:
self._clone_default_branch()

Expand Down
19 changes: 9 additions & 10 deletions osa_tool/operations/codebase/docstring_generation/docgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,24 +456,23 @@ def insert_docstring_in_code(
using the method's body from method_details['source_code'] to locate the method.
Handles multi-line signatures, decorators, async definitions, and existing docstrings.
"""
method_body = DocGen.strip_docstring_from_body(method_details["source_code"].strip())
docstring_clean = DocGen.extract_pure_docstring(generated_docstring)
source_code = source_code.replace("\r\n", "\n")
method_source = method_details["source_code"].replace("\r\n", "\n")

docstring_clean = DocGen.extract_pure_docstring(generated_docstring.replace("\r\n", f"\n"))

# Find method within a source code
match = re.search(re.escape(method_details["source_code"]), source_code)
if not match:
return source_code
body_start = match.start()
body_start = source_code.find(method_source)

if not body_start:
if body_start == -1:
return source_code

start = body_start

while start > 0 and source_code[start - 1] in " \t\n":
start -= 1

end = body_start + len(method_body)
end = body_start + len(method_source)

method_block = source_code[start:end]
method_lines = method_block.splitlines(keepends=True)
Expand All @@ -483,7 +482,7 @@ def insert_docstring_in_code(
def indent_docstring(docstring: str) -> str:
lines = docstring.strip().splitlines()
if len(lines) == 1:
return f'{indent}"""{lines[0]}"""'
return f'{indent}"""{lines[0]}"""\n'
indented = [f"{indent}" + lines[0]]
for line in lines[1:]:
indented.append(f"{indent}{line}")
Expand All @@ -492,7 +491,7 @@ def indent_docstring(docstring: str) -> str:
# Check for existing docstring right after signature
signature_end_index = None
for i, line in enumerate(method_lines):
if line.strip().endswith(":"):
if line.split("#")[0].strip().endswith(":"):
signature_end_index = i
break

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ def __init__(
config_manager: ConfigManager,
ignore_list: list[str],
plan: Plan,
incremental: bool = False,
target_files: list[str] = None,
) -> None:
self.config_manager = config_manager
self.ignore_list = ignore_list
self.incremental = incremental
self.target_files = target_files

self.sem = asyncio.Semaphore(100)
self.workers = multiprocessing.cpu_count()
Expand All @@ -26,7 +30,11 @@ def __init__(
self.repo_path = parse_folder_name(self.repo_url)

self.dg = DocGen(self.config_manager)
self.ts = OSA_TreeSitter(self.repo_path, self.ignore_list)
self.ts = OSA_TreeSitter(
self.repo_path,
self.ignore_list,
target_files=self.target_files,
)
self.plan = plan

def run(self) -> None:
Expand Down Expand Up @@ -92,6 +100,11 @@ async def _run_async(self) -> None:

await self.dg._write_augmented_code(res, cl_augmented, self.sem)

if self.incremental:
logger.info("Incremental mode active. Skipping main idea generation and full codebase update.")
self.plan.mark_done("docstring")
return

# generate the main idea
await self.dg.generate_the_main_idea(res)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ class OSA_TreeSitter(object):
cwd: A current working directory with source code files.
"""

def __init__(self, scripts_path: str, ignore_list: list[str] = None):
def __init__(self, scripts_path: str, ignore_list: list[str] = None, target_files: list[str] = None):
"""Initialization of the instance based on the provided path to the scripts.

Args:
scripts_path: provided by user path to the scripts.
ignore_list: files that will be ignored.
target_files: files that need to be checked.
"""
self.cwd = scripts_path
self.import_map = {}
if ignore_list:
self.ignore_list = ignore_list
else:
self.ignore_list = ["__init__.py"]
self.target_files = target_files

def files_list(self, path: str) -> tuple[list, 0] | tuple[list[str], 1]:
"""Method provides a list of files occurring in the provided path.
Expand All @@ -45,6 +48,15 @@ def files_list(self, path: str) -> tuple[list, 0] | tuple[list[str], 1]:
"""
script_files = []

print(self.target_files)

if self.target_files is not None:
for file_path in self.target_files:
p = Path(os.path.join(self.cwd, file_path)).resolve()
if p.exists() and str(p).endswith(".py") and not self._is_ignored(p) and p.name not in self.ignore_list:
script_files.append(str(p))
return script_files, 0

if os.path.isdir(path):
for root, _, files in os.walk(path):
for file in files:
Expand Down Expand Up @@ -632,7 +644,7 @@ def _extract_function_details(
arguments.append(param_node.text.decode("utf-8"))

source_bytes = source_code.encode("utf-8")
source = source_bytes[function_node.start_byte : node.end_byte].decode("utf-8")
source = source_bytes[function_node.start_byte : function_node.end_byte].decode("utf-8")

return_node = function_node.child_by_field_name("return_type")
return_type = None
Expand Down
7 changes: 5 additions & 2 deletions osa_tool/operations/codebase/docstring_generation/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ def _build_graph(self):
dependency_node = self._resolve_call(node_id, call)

if dependency_node and dependency_node in self.nodes:
self.graph[node_id].add(dependency_node)
self.reverse_graph[dependency_node].add(node_id)
if node_id != dependency_node: # Avoiding self-recursion
self.graph[node_id].add(dependency_node)
self.reverse_graph[dependency_node].add(node_id)
else:
logger.debug(f"Skipping self-recursion for node {node_id}")

def _resolve_call(self, caller_node_id: str, call_name: str) -> str:
"""
Expand Down
30 changes: 24 additions & 6 deletions osa_tool/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def main():
config_manager = ConfigManager(args)

# Initialize Git agent and Workflow Manager for used platform, perform operations
git_agent, workflow_manager = initialize_git_platform(args)
git_agent, workflow_manager = initialize_git_platform(args, config_manager)

if create_fork:
git_agent.star_repository()
Expand Down Expand Up @@ -144,7 +144,14 @@ def main():
# Docstring generation
if plan.get("docstring"):
rich_section("Docstrings generation")
DocstringsGenerator(config_manager, args.ignore_list, plan).run()

DocstringsGenerator(
config_manager=config_manager,
ignore_list=args.ignore_list,
plan=plan,
incremental=args.incremental,
target_files=args.target_files,
).run()

# License compiling
if plan.get("ensure_license"):
Expand Down Expand Up @@ -232,15 +239,26 @@ def main():
sys.exit(1)


def initialize_git_platform(args) -> tuple[GitAgent, WorkflowManager]:
def initialize_git_platform(args, config_manager: ConfigManager) -> tuple[GitAgent, WorkflowManager]:
if os.getenv("GITHUB_ACTIONS").lower() == "true":
target_branch = args.branch
else:
target_branch = config_manager.config.osa_branch_name

if "github.com" in args.repository:
git_agent = GitHubAgent(args.repository, args.branch, author=args.author)
git_agent = GitHubAgent(
args.repository, repo_branch_name=args.branch, branch_name=target_branch, author=args.author
)
workflow_manager = GitHubWorkflowManager(args.repository, git_agent.metadata, args)
elif "gitlab." in args.repository:
git_agent = GitLabAgent(args.repository, args.branch, author=args.author)
git_agent = GitLabAgent(
args.repository, repo_branch_name=args.branch, branch_name=target_branch, author=args.author
)
workflow_manager = GitLabWorkflowManager(args.repository, git_agent.metadata, args)
elif "gitverse.ru" in args.repository:
git_agent = GitverseAgent(args.repository, args.branch, author=args.author)
git_agent = GitverseAgent(
args.repository, repo_branch_name=args.branch, branch_name=target_branch, author=args.author
)
workflow_manager = GitverseWorkflowManager(args.repository, git_agent.metadata, args)
else:
raise ValueError(f"Cannot initialize Git Agent and Workflow Manager for this platform: {args.repository}")
Expand Down
Loading