Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 56 additions & 44 deletions src/codegen/sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections.abc import Iterable
from contextlib import contextmanager
from pathlib import Path
from typing import Literal
from xml.dom.minidom import parseString

import dicttoxml
Expand Down Expand Up @@ -242,54 +243,65 @@ def get_language_file_extensions(language: ProgrammingLanguage):
return set(TSFile.get_extensions())


def determine_project_language(folder_path: str):
from codegen.sdk.python import PyFile
from codegen.sdk.typescript.file import TSFile
def determine_project_language(folder_path: str, strategy: Literal["most_common", "package_json"] = "package_json") -> ProgrammingLanguage:
if strategy == "most_common":
# Analyzes a folder to determine the primary programming language based on file extensions.
# Returns the language with the most matching files.
from codegen.sdk.python import PyFile
from codegen.sdk.typescript.file import TSFile

EXTENSIONS = {
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
}
EXTENSIONS = {
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
}

"""
Analyzes a folder to determine the primary programming language based on file extensions.
Returns the language with the most matching files.
"""
Analyzes a folder to determine the primary programming language based on file extensions.
Returns the language with the most matching files.

Args:
folder_path (str): Path to the folder to analyze
Args:
folder_path (str): Path to the folder to analyze

Returns:
Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
"""
folder = Path(folder_path)
if not folder.exists() or not folder.is_dir():
msg = f"Invalid folder path: {folder_path}"
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

# Walk through the directory
for file_path in folder.rglob("*"):
# Skip directories and hidden files
if file_path.is_dir() or file_path.name.startswith("."):
continue

# Skip common directories to ignore
if any(ignore in str(file_path) for ignore in [".git", "node_modules", "__pycache__", "venv", ".env"]):
continue

# Count files for each language based on extensions
for language, exts in EXTENSIONS.items():
if file_path.suffix in exts:
language_counts[language] += 1

# If no files found, return None
if not language_counts:
return ProgrammingLanguage.UNSUPPORTED

# Return the language with the highest count
return language_counts.most_common(1)[0][0]
Returns:
Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
"""
folder = Path(folder_path)
if not folder.exists() or not folder.is_dir():
msg = f"Invalid folder path: {folder_path}"
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

# Walk through the directory
for file_path in folder.rglob("*"):
# Skip directories and hidden files
if file_path.is_dir() or file_path.name.startswith("."):
continue

# Skip common directories to ignore
if any(ignore in str(file_path) for ignore in [".git", "node_modules", "__pycache__", "venv", ".env"]):
continue

# Count files for each language based on extensions
for language, exts in EXTENSIONS.items():
if file_path.suffix in exts:
language_counts[language] += 1

# If no files found, return None
if not language_counts:
return ProgrammingLanguage.UNSUPPORTED

# Return the language with the highest count
return language_counts.most_common(1)[0][0]
elif strategy == "package_json":
# TODO: Hacky implementation that checks for package.json.
# Faster but less accurate than the most_common strategy.
package_json_path = Path(folder_path) / "package.json"
if package_json_path.exists():
return ProgrammingLanguage.TYPESCRIPT
else:
return ProgrammingLanguage.PYTHON


def split_git_path(filepath: str) -> tuple[str, str | None]:
Expand Down