|
2 | 2 |
|
3 | 3 | import ast |
4 | 4 | import logging |
| 5 | +import re |
5 | 6 |
|
6 | 7 | from ..models.attribute import Attribute |
7 | 8 | from ..models.finding import Citation, Finding, Remediation |
@@ -412,3 +413,217 @@ def _create_remediation(self) -> Remediation: |
412 | 413 | ) |
413 | 414 | ], |
414 | 415 | ) |
| 416 | + |
| 417 | + |
| 418 | +class SemanticNamingAssessor(BaseAssessor): |
| 419 | + """Assesses naming conventions and semantic clarity. |
| 420 | +
|
| 421 | + Tier 3 Important (1.5% weight) - Consistent naming improves code |
| 422 | + readability and helps LLMs understand intent. |
| 423 | + """ |
| 424 | + |
| 425 | + @property |
| 426 | + def attribute_id(self) -> str: |
| 427 | + return "semantic_naming" |
| 428 | + |
| 429 | + @property |
| 430 | + def tier(self) -> int: |
| 431 | + return 3 # Important |
| 432 | + |
| 433 | + @property |
| 434 | + def attribute(self) -> Attribute: |
| 435 | + return Attribute( |
| 436 | + id=self.attribute_id, |
| 437 | + name="Semantic Naming", |
| 438 | + category="Code Quality", |
| 439 | + tier=self.tier, |
| 440 | + description="Systematic naming patterns following language conventions", |
| 441 | + criteria="Language conventions followed, avoid generic names", |
| 442 | + default_weight=0.015, |
| 443 | + ) |
| 444 | + |
| 445 | + def is_applicable(self, repository: Repository) -> bool: |
| 446 | + """Only applicable to code repositories.""" |
| 447 | + return len(repository.languages) > 0 |
| 448 | + |
| 449 | + def assess(self, repository: Repository) -> Finding: |
| 450 | + """Check naming conventions and patterns.""" |
| 451 | + if "Python" in repository.languages: |
| 452 | + return self._assess_python_naming(repository) |
| 453 | + else: |
| 454 | + return Finding.not_applicable( |
| 455 | + self.attribute, |
| 456 | + reason=f"Naming check not implemented for {list(repository.languages.keys())}", |
| 457 | + ) |
| 458 | + |
| 459 | + def _assess_python_naming(self, repository: Repository) -> Finding: |
| 460 | + """Assess Python naming conventions using AST parsing.""" |
| 461 | + # Get list of Python files |
| 462 | + try: |
| 463 | + result = safe_subprocess_run( |
| 464 | + ["git", "ls-files", "*.py"], |
| 465 | + cwd=repository.path, |
| 466 | + capture_output=True, |
| 467 | + text=True, |
| 468 | + timeout=30, |
| 469 | + check=True, |
| 470 | + ) |
| 471 | + python_files = [f for f in result.stdout.strip().split("\n") if f] |
| 472 | + except Exception: |
| 473 | + python_files = [ |
| 474 | + str(f.relative_to(repository.path)) |
| 475 | + for f in repository.path.rglob("*.py") |
| 476 | + ] |
| 477 | + |
| 478 | + # Sample files for large repositories (max 50 files) |
| 479 | + if len(python_files) > 50: |
| 480 | + import random |
| 481 | + |
| 482 | + python_files = random.sample(python_files, 50) |
| 483 | + |
| 484 | + total_functions = 0 |
| 485 | + compliant_functions = 0 |
| 486 | + total_classes = 0 |
| 487 | + compliant_classes = 0 |
| 488 | + generic_names_count = 0 |
| 489 | + |
| 490 | + # Patterns |
| 491 | + snake_case_pattern = re.compile(r"^[a-z_][a-z0-9_]*$") |
| 492 | + pascal_case_pattern = re.compile(r"^[A-Z][a-zA-Z0-9]*$") |
| 493 | + generic_names = {"temp", "data", "info", "obj", "var", "tmp", "x", "y", "z"} |
| 494 | + |
| 495 | + for file_path in python_files: |
| 496 | + full_path = repository.path / file_path |
| 497 | + try: |
| 498 | + with open(full_path, "r", encoding="utf-8") as f: |
| 499 | + content = f.read() |
| 500 | + |
| 501 | + tree = ast.parse(content, filename=str(file_path)) |
| 502 | + |
| 503 | + for node in ast.walk(tree): |
| 504 | + # Check function names |
| 505 | + if isinstance(node, ast.FunctionDef): |
| 506 | + # Skip private/magic methods |
| 507 | + if node.name.startswith("_"): |
| 508 | + continue |
| 509 | + |
| 510 | + total_functions += 1 |
| 511 | + if snake_case_pattern.match(node.name): |
| 512 | + compliant_functions += 1 |
| 513 | + |
| 514 | + # Check for generic names |
| 515 | + if node.name.lower() in generic_names: |
| 516 | + generic_names_count += 1 |
| 517 | + |
| 518 | + # Check class names |
| 519 | + elif isinstance(node, ast.ClassDef): |
| 520 | + # Skip private classes |
| 521 | + if node.name.startswith("_"): |
| 522 | + continue |
| 523 | + |
| 524 | + total_classes += 1 |
| 525 | + if pascal_case_pattern.match(node.name): |
| 526 | + compliant_classes += 1 |
| 527 | + |
| 528 | + except (OSError, UnicodeDecodeError, SyntaxError): |
| 529 | + continue |
| 530 | + |
| 531 | + if total_functions == 0 and total_classes == 0: |
| 532 | + return Finding.not_applicable( |
| 533 | + self.attribute, reason="No Python functions or classes found" |
| 534 | + ) |
| 535 | + |
| 536 | + # Calculate scores |
| 537 | + function_compliance = ( |
| 538 | + (compliant_functions / total_functions * 100) |
| 539 | + if total_functions > 0 |
| 540 | + else 100 |
| 541 | + ) |
| 542 | + class_compliance = ( |
| 543 | + (compliant_classes / total_classes * 100) if total_classes > 0 else 100 |
| 544 | + ) |
| 545 | + |
| 546 | + # Overall score: 60% functions, 40% classes |
| 547 | + naming_score = (function_compliance * 0.6) + (class_compliance * 0.4) |
| 548 | + |
| 549 | + # Penalize generic names |
| 550 | + if generic_names_count > 0: |
| 551 | + penalty = min(20, generic_names_count * 5) |
| 552 | + naming_score = max(0, naming_score - penalty) |
| 553 | + |
| 554 | + status = "pass" if naming_score >= 75 else "fail" |
| 555 | + |
| 556 | + # Build evidence |
| 557 | + evidence = [ |
| 558 | + f"Functions: {compliant_functions}/{total_functions} follow snake_case ({function_compliance:.1f}%)", |
| 559 | + f"Classes: {compliant_classes}/{total_classes} follow PascalCase ({class_compliance:.1f}%)", |
| 560 | + ] |
| 561 | + |
| 562 | + if generic_names_count > 0: |
| 563 | + evidence.append( |
| 564 | + f"Generic names detected: {generic_names_count} occurrences" |
| 565 | + ) |
| 566 | + else: |
| 567 | + evidence.append("No generic names (temp, data, obj) detected") |
| 568 | + |
| 569 | + return Finding( |
| 570 | + attribute=self.attribute, |
| 571 | + status=status, |
| 572 | + score=naming_score, |
| 573 | + measured_value=f"functions:{function_compliance:.0f}%, classes:{class_compliance:.0f}%", |
| 574 | + threshold="≥75% compliance", |
| 575 | + evidence=evidence, |
| 576 | + remediation=self._create_remediation() if status == "fail" else None, |
| 577 | + error_message=None, |
| 578 | + ) |
| 579 | + |
| 580 | + def _create_remediation(self) -> Remediation: |
| 581 | + """Create remediation guidance for naming issues.""" |
| 582 | + return Remediation( |
| 583 | + summary="Improve naming consistency and semantic clarity", |
| 584 | + steps=[ |
| 585 | + "Follow language naming conventions (PEP 8 for Python)", |
| 586 | + "Use snake_case for functions/variables in Python", |
| 587 | + "Use PascalCase for classes in Python", |
| 588 | + "Use descriptive names (>3 characters, no abbreviations)", |
| 589 | + "Avoid generic names: temp, data, obj, var, info", |
| 590 | + "Use verbs for functions: get_user, calculate_total", |
| 591 | + "Use nouns for classes: User, OrderService", |
| 592 | + "Enforce with linters: pylint --enable=invalid-name", |
| 593 | + ], |
| 594 | + tools=["pylint", "black"], |
| 595 | + commands=[ |
| 596 | + "# Check naming conventions", |
| 597 | + "pylint --disable=all --enable=invalid-name src/", |
| 598 | + ], |
| 599 | + examples=[ |
| 600 | + """# Good naming |
| 601 | +class UserService: |
| 602 | + MAX_LOGIN_ATTEMPTS = 5 |
| 603 | +
|
| 604 | + def create_user(self, email: str) -> User: |
| 605 | + pass |
| 606 | +
|
| 607 | + def delete_user(self, user_id: str) -> None: |
| 608 | + pass |
| 609 | +
|
| 610 | +# Bad naming |
| 611 | +class userservice: # Should be PascalCase |
| 612 | + maxLoginAttempts = 5 # Should be UPPER_CASE |
| 613 | +
|
| 614 | + def CreateUser(self, e: str) -> User: # Should be snake_case |
| 615 | + pass |
| 616 | +
|
| 617 | + def data(self, temp): # Generic names |
| 618 | + pass |
| 619 | +""", |
| 620 | + ], |
| 621 | + citations=[ |
| 622 | + Citation( |
| 623 | + source="Python.org", |
| 624 | + title="PEP 8 - Style Guide for Python Code", |
| 625 | + url="https://peps.python.org/pep-0008/#naming-conventions", |
| 626 | + relevance="Official Python naming conventions", |
| 627 | + ), |
| 628 | + ], |
| 629 | + ) |
0 commit comments