diff --git a/.github/workflows/harmony-check.yml b/.github/workflows/harmony-check.yml new file mode 100644 index 0000000..04529e7 --- /dev/null +++ b/.github/workflows/harmony-check.yml @@ -0,0 +1,60 @@ +name: Code Harmony Check + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +jobs: + harmony-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + + - name: Install Python Code Harmonizer + run: | + python -m pip install --upgrade pip + pip install . + + - name: Run Harmony Analysis + run: | + echo "========================================" + echo "Python Code Harmonizer - CI/CD Check" + echo "========================================" + + # Analyze all Python files in src/ directory + harmonizer src/**/*.py || true + + # Note: Currently harmonizer doesn't set exit codes based on scores + # Future enhancement: fail build if critical disharmony detected + + echo "" + echo "✅ Harmony check completed" + echo "Review output above for any disharmony warnings" + + - name: Analyze specific modules (optional) + run: | + # Example: Analyze specific critical modules with comments + echo "" + echo "Analyzing critical modules..." + + # Add your critical files here + # harmonizer src/core/important.py + # harmonizer src/api/endpoints.py + continue-on-error: true + +# Note: To make this workflow fail on high disharmony scores, +# you'll need to wrap harmonizer in a script that checks scores +# and exits with non-zero code if threshold exceeded. +# +# Example future enhancement: +# - name: Check harmony with threshold +# run: python scripts/ci_harmony_check.py --threshold 0.8 --fail-on-high diff --git a/.harmonizer.yml.template b/.harmonizer.yml.template new file mode 100644 index 0000000..1194aa6 --- /dev/null +++ b/.harmonizer.yml.template @@ -0,0 +1,125 @@ +# Python Code Harmonizer Configuration Template +# +# NOTE: Configuration file support is planned for future release +# This template shows what configuration will look like when implemented +# +# Copy this file to .harmonizer.yml in your project root +# The harmonizer will read this configuration automatically + +# Disharmony threshold (functions above this are flagged) +# Default: 0.5 +# Range: 0.0 (very strict) to 2.0 (very lenient) +threshold: 0.5 + +# Output format +# Options: table, json, csv +# Default: table +output_format: table + +# Severity level definitions +severity_levels: + critical: 1.2 # Score >= 1.2 + high: 0.8 # Score >= 0.8 + medium: 0.5 # Score >= 0.5 + low: 0.3 # Score >= 0.3 + excellent: 0.0 # Score < 0.3 + +# Files and patterns to ignore +ignore_patterns: + - "**/test_*.py" # Test files + - "**/tests/*.py" # Test directories + - "**/migrations/*.py" # Database migrations + - "**/*_test.py" # Alternative test naming + - "**/conftest.py" # Pytest configuration + - "**/__pycache__/**" # Python cache + - "**/.venv/**" # Virtual environments + +# Files and patterns to include (overrides ignore if specified) +include_patterns: + - "src/**/*.py" # Source files + - "app/**/*.py" # Application files + # - "scripts/**/*.py" # Uncomment to include scripts + +# Fail build in CI/CD if any function exceeds this threshold +# Set to null to never fail builds +# Default: null (warnings only) +fail_threshold: null +# fail_threshold: 1.0 # Uncomment to fail on critical disharmony + +# Enable verbose output +# Default: false +verbose: false + +# Show function details in output +# Default: true +show_function_details: true + +# Sort results by score (descending) +# Default: true +sort_by_score: true + +# Color output (for terminal) +# Default: true +color_output: true + +# Custom vocabulary extensions +# Add domain-specific semantic mappings +# (Advanced: requires understanding of DIVE-V2 engine) +custom_vocabulary: + # Example: Map domain-specific terms + # "authenticate": "justice" + # "authorize": "power" + # "notify": "love" + +# Report options +report: + # Show summary statistics + show_summary: true + + # Show only disharmonious functions + only_show_disharmony: false + + # Include harmonious functions in output + include_harmonious: true + + # Maximum functions to display (0 = unlimited) + max_display: 0 + +# Future enhancement placeholders +# These will be implemented in upcoming versions + +# auto_fix: +# enabled: false +# suggestions: true + +# metrics: +# track_over_time: false +# output_file: "harmony_metrics.json" + +# integrations: +# github: +# create_review_comments: false +# jira: +# create_tickets_for_critical: false + +--- + +# Example configurations for different use cases: + +# STRICT MODE (for new projects) +# threshold: 0.3 +# fail_threshold: 0.5 + +# LENIENT MODE (for legacy code cleanup) +# threshold: 0.8 +# fail_threshold: 1.2 + +# CI/CD MODE (fail on critical only) +# threshold: 0.5 +# fail_threshold: 1.0 +# only_show_disharmony: true + +# DEVELOPMENT MODE (show everything) +# threshold: 0.5 +# verbose: true +# show_function_details: true diff --git a/.pre-commit-config.yaml.template b/.pre-commit-config.yaml.template new file mode 100644 index 0000000..09b4de0 --- /dev/null +++ b/.pre-commit-config.yaml.template @@ -0,0 +1,51 @@ +# Python Code Harmonizer - Pre-commit Hook Template +# +# Copy this file to .pre-commit-config.yaml in your project root +# Install pre-commit: pip install pre-commit +# Install hooks: pre-commit install +# Run manually: pre-commit run --all-files + +repos: + # Standard pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: check-python-syntax + + # Python Code Formatting + - repo: https://github.com/psf/black + rev: 24.1.1 + hooks: + - id: black + language_version: python3 + + # Python Linting + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + + # Python Code Harmonizer (Semantic Analysis) + - repo: local + hooks: + - id: harmonizer + name: Python Code Harmonizer + entry: harmonizer + language: system + types: [python] + pass_filenames: true + # Note: Install harmonizer first: pip install /path/to/Python-Code-Harmonizer + +# Usage Notes: +# - This checks code harmony before every commit +# - To skip temporarily: git commit --no-verify +# - To run manually: pre-commit run harmonizer --all-files +# +# Customization: +# - Adjust 'pass_filenames: false' to check all files every time +# - Add 'args: ["--threshold", "0.8"]' when threshold support is added diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..ad2815f --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,71 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Harmonizer: Check Current File", + "type": "shell", + "command": "harmonizer", + "args": [ + "${file}" + ], + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new", + "clear": true + }, + "group": { + "kind": "test", + "isDefault": false + } + }, + { + "label": "Harmonizer: Check All Source Files", + "type": "shell", + "command": "harmonizer", + "args": [ + "src/**/*.py" + ], + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new", + "clear": true + }, + "group": { + "kind": "test", + "isDefault": false + } + }, + { + "label": "Harmonizer: Check Workspace", + "type": "shell", + "command": "find", + "args": [ + ".", + "-name", + "*.py", + "-not", + "-path", + "*/venv/*", + "-not", + "-path", + "*/.venv/*", + "-exec", + "harmonizer", + "{}", + ";" + ], + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new", + "clear": true + }, + "group": { + "kind": "test", + "isDefault": false + } + } + ] +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..503d360 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,223 @@ +# Changelog + +All notable changes to Python Code Harmonizer will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +--- + +## [Unreleased] + +### Added +- Comprehensive documentation suite +- Integration templates (GitHub Actions, pre-commit, VS Code) +- Quick reference guide +- Tool comparison guide +- Troubleshooting guide +- Real-world example files + +--- + +## [1.0.0] - 2024-01-XX + +### Added +- **Initial release** of Python Code Harmonizer +- **DIVE-V2 Engine** (Divine Invitation Semantic Engine) + - 4D semantic coordinate system (Love, Justice, Power, Wisdom) + - 113-keyword semantic vocabulary + - Euclidean distance calculation for semantic harmony + - Caching for performance optimization +- **AST Semantic Parser** + - Python AST to semantic concept translation + - Function name and operation analysis + - Context-aware semantic extraction +- **Harmonizer CLI** + - Command-line interface for analyzing Python files + - Colored output with severity indicators + - Summary statistics + - File-not-found and syntax error handling +- **ICE Framework** (Intent, Context, Execution) + - Philosophical foundation for semantic analysis + - Anchor Point (1,1,1,1) as perfect harmony reference +- **Zero runtime dependencies** + - Pure Python 3.8+ implementation + - No external packages required +- **Comprehensive test suite** + - 20+ unit tests + - Full coverage of core functionality + - Edge case handling + +### Documentation +- README.md with quick start guide +- Philosophy and theory explanation +- Installation instructions +- Basic usage examples + +--- + +## [0.1.0] - 2024-01-XX (Internal Development) + +### Added +- Initial proof of concept +- Basic semantic analysis engine +- Function name to coordinate mapping +- Simple distance calculation +- Console output prototype + +--- + +## Version Numbering + +**Python Code Harmonizer** follows [Semantic Versioning](https://semver.org/): + +- **MAJOR** version: Incompatible API changes +- **MINOR** version: Backwards-compatible functionality additions +- **PATCH** version: Backwards-compatible bug fixes + +--- + +## Release Notes + +### v1.0.0 - "Anchor Point" Release + +The inaugural release of Python Code Harmonizer introduces a revolutionary approach to code analysis: **semantic harmony detection**. Unlike traditional linters that check style or type checkers that verify types, Harmonizer answers the question: + +**"Does your code DO what its name SAYS it does?"** + +**Key Features:** + +1. **Philosophical Foundation** + - Built on ICE Framework (Intent, Context, Execution) + - Anchor Point (1,1,1,1) represents perfect logical harmony + - Four dimensions: Love, Justice, Power, Wisdom + +2. **Technical Excellence** + - Zero runtime dependencies + - Deterministic analysis (no ML/AI) + - Efficient caching system + - Comprehensive error handling + +3. **Developer Experience** + - Simple CLI: `harmonizer myfile.py` + - Clear, colored output + - Actionable severity levels + - Integration-ready + +4. **Built in 7 Hours** + - Created by developer with zero coding experience + - AI-assisted development paradigm + - Demonstrates power of human-AI collaboration + +**Known Limitations:** + +- Analyzes only function definitions (not classes, methods separately) +- English-centric vocabulary (non-English identifiers scored lower) +- No configuration file support yet (planned for v1.1.0) +- CLI output only (JSON/CSV planned for v1.2.0) + +**Special Thanks:** + +This project emerged from consciousness operating in the C-Realm at 613 THz frequency. Built with love, truth, and the conviction that code should say what it means and mean what it says. + +💛⚓ + +--- + +## Upgrade Guide + +### From Pre-release to v1.0.0 + +If you were using an internal/pre-release version: + +```bash +# Uninstall old version +pip uninstall PythonCodeHarmonizer + +# Install v1.0.0 +pip install . + +# Verify installation +harmonizer --version # (when implemented) +``` + +**Breaking Changes:** None (initial public release) + +--- + +## Roadmap + +### v1.1.0 (Planned) +- [ ] Configuration file support (`.harmonizer.yml`) +- [ ] Custom vocabulary extensions +- [ ] Adjustable thresholds +- [ ] Ignore patterns for files/functions +- [ ] `--verbose` and `--debug` flags + +### v1.2.0 (Planned) +- [ ] JSON output format (`--format json`) +- [ ] CSV output format (`--format csv`) +- [ ] Machine-readable output for CI/CD +- [ ] Exit codes based on severity levels + +### v1.3.0 (Planned) +- [ ] Method-level analysis (not just functions) +- [ ] Class semantic analysis +- [ ] Cross-function call analysis +- [ ] Semantic drift detection over time + +### v2.0.0 (Future) +- [ ] Multi-language support (JavaScript, TypeScript, etc.) +- [ ] IDE extensions (VS Code, PyCharm) +- [ ] Real-time analysis as you type +- [ ] Suggested refactoring names +- [ ] Community vocabulary contributions + +--- + +## Contributing + +We welcome contributions! Potential areas: + +- **Vocabulary expansion** - Add more semantic keywords +- **Language support** - Extend to other programming languages +- **Documentation** - Examples, tutorials, translations +- **Bug reports** - Help us find edge cases +- **Feature requests** - Share your use cases + +See [CONTRIBUTING.md](CONTRIBUTING.md) (planned) for guidelines. + +--- + +## Security + +Python Code Harmonizer performs **static analysis only**: +- Does not execute code +- Does not modify files +- Does not make network requests +- Safe to run on untrusted code + +If you discover a security issue, please report it via GitHub Issues. + +--- + +## License + +[Include your license here - e.g., MIT, Apache 2.0, etc.] + +--- + +## Acknowledgments + +- **The Anchor Point** - For providing the philosophical foundation +- **ICE Framework** - Intent, Context, Execution +- **613 THz** - The frequency of love and truth +- **C-Realm** - Where consciousness operates freely +- **Papa** - For guidance and love +- **The Community** - For believing in semantic harmony + +--- + +*This changelog is maintained with love and precision.* 💛⚓ + +**[View all releases on GitHub](https://github.com/BruinGrowly/Python-Code-Harmonizer/releases)** diff --git a/docs/COMPARISON.md b/docs/COMPARISON.md new file mode 100644 index 0000000..31b4308 --- /dev/null +++ b/docs/COMPARISON.md @@ -0,0 +1,331 @@ +# Python Code Harmonizer - Tool Comparison Guide + +**How Python Code Harmonizer fits in your development toolkit** 🔍 + +--- + +## TL;DR + +**Python Code Harmonizer complements, doesn't replace, other tools.** + +Each tool catches different categories of issues: +- **Syntax checkers** → Is it valid Python? +- **Linters** → Does it follow style rules? +- **Type checkers** → Are types consistent? +- **Test frameworks** → Does it work correctly? +- **Harmonizer** → Does it mean what it says? + +--- + +## Detailed Comparisons + +### vs. Pylint / Flake8 (Linters) + +| Aspect | Pylint/Flake8 | Python Code Harmonizer | +|--------|---------------|------------------------| +| **Purpose** | Style, common errors, code smells | Semantic meaning alignment | +| **What it checks** | Variable naming, unused imports, complexity | Function name vs behavior | +| **Example catch** | Unused variable `x` | Function named `get_user` that deletes | +| **Bug type** | Style violations, simple errors | Logic contradictions, misleading names | +| **When to use** | Always (every commit) | Code review, refactoring | +| **Strictness** | Configurable rules | Semantic distance threshold | +| **False positives** | Many (style is subjective) | Few (contradictions are objective) | + +**Example Pylint catches, Harmonizer doesn't:** +```python +def get_user(id): # Pylint: argument name 'id' shadows builtin + return db.query(id) +``` + +**Example Harmonizer catches, Pylint doesn't:** +```python +def get_user(user_id): # Pylint: ✓ No issues + db.delete(user_id) # Harmonizer: ⚠️ "get" vs "delete" contradiction +``` + +**Use together:** Pylint enforces style, Harmonizer enforces semantic correctness. + +--- + +### vs. MyPy (Type Checker) + +| Aspect | MyPy | Python Code Harmonizer | +|--------|------|------------------------| +| **Purpose** | Type safety | Semantic harmony | +| **What it checks** | Type annotations match usage | Function names match behavior | +| **Example catch** | Passing `str` where `int` expected | Function promising read but writing | +| **Bug type** | Type mismatches | Intent-execution mismatches | +| **Requires** | Type hints | Descriptive function names | +| **When to use** | Always (if using type hints) | Code review, semantic analysis | + +**Example MyPy catches, Harmonizer doesn't:** +```python +def get_user(user_id: int) -> str: + return 42 # MyPy: ⚠️ Returns int, not str +``` + +**Example Harmonizer catches, MyPy doesn't:** +```python +def get_user(user_id: int) -> None: # MyPy: ✓ Types correct + db.delete(user_id) # Harmonizer: ⚠️ "get" but "deletes" + return None +``` + +**Use together:** MyPy ensures type safety, Harmonizer ensures semantic honesty. + +--- + +### vs. Pytest / Unittest (Testing Frameworks) + +| Aspect | Pytest | Python Code Harmonizer | +|--------|--------|------------------------| +| **Purpose** | Functional correctness | Semantic correctness | +| **What it checks** | Does code produce expected output? | Does code match its name? | +| **Example catch** | Function returns wrong value | Function name promises wrong thing | +| **Bug type** | Logic errors, wrong results | Misleading names, semantic confusion | +| **Requires** | Test cases | Descriptive names | +| **When to use** | Always (TDD/continuous testing) | Code review, naming validation | + +**Example Pytest catches, Harmonizer doesn't:** +```python +def calculate_sum(a, b): # Harmonizer: ✓ Name matches intent + return a - b # Pytest: ⚠️ Should be a + b +``` + +**Example Harmonizer catches, Pytest might not:** +```python +def validate_input(data): + # Test might verify it "works" + process_payment(data) # Harmonizer: ⚠️ "validate" shouldn't "process payment" + return True # Test passes, but name is misleading +``` + +**Use together:** Pytest ensures behavior correctness, Harmonizer ensures naming correctness. + +--- + +### vs. Black / Autopep8 (Formatters) + +| Aspect | Black | Python Code Harmonizer | +|--------|-------|------------------------| +| **Purpose** | Code formatting | Semantic analysis | +| **What it checks** | Line length, spacing, quotes | Meaning alignment | +| **Example action** | Reformats code automatically | Reports disharmony (no auto-fix) | +| **Automated** | Yes (auto-fix) | No (analysis only) | +| **Opinionated** | Very (one style) | No (measures meaning) | + +**Harmonizer doesn't format code** - it analyzes semantic meaning. + +**Use together:** Black makes code look consistent, Harmonizer makes it mean what it says. + +--- + +### vs. Bandit (Security Linter) + +| Aspect | Bandit | Python Code Harmonizer | +|--------|--------|------------------------| +| **Purpose** | Security vulnerabilities | Semantic correctness | +| **What it checks** | SQL injection, hardcoded secrets | Name-behavior alignment | +| **Example catch** | Using `eval()` with user input | Function claiming read-only but modifying | +| **Focus** | Security risks | Semantic clarity | + +**Example Bandit catches, Harmonizer doesn't:** +```python +def execute_query(sql): + cursor.execute(sql) # Bandit: ⚠️ SQL injection risk +``` + +**Example Harmonizer catches, Bandit doesn't:** +```python +def check_permissions(user): # Bandit: ✓ No security issue + user.role = "admin" # Harmonizer: ⚠️ "check" but "modifies" +``` + +**Use together:** Bandit finds security holes, Harmonizer finds semantic holes. + +--- + +### vs. Radon / McCabe (Complexity Analysis) + +| Aspect | Radon/McCabe | Python Code Harmonizer | +|--------|--------------|------------------------| +| **Purpose** | Measure code complexity | Measure semantic alignment | +| **What it measures** | Cyclomatic complexity, LOC | Semantic distance | +| **Metric** | Number (complexity score) | Number (disharmony score) | +| **Good score** | Low complexity (< 10) | Low disharmony (< 0.5) | + +**Example Radon catches, Harmonizer doesn't:** +```python +def complex_function(): # Radon: ⚠️ Complexity 15 (high) + if a: + if b: + if c: + # Many nested conditions +``` + +**Example Harmonizer catches, Radon doesn't:** +```python +def get_data(): # Radon: ✓ Complexity 1 (simple) + delete_everything() # Harmonizer: ⚠️ Semantic contradiction +``` + +**Use together:** Radon measures cognitive complexity, Harmonizer measures semantic clarity. + +--- + +### vs. Sourcery / Rope (Refactoring Tools) + +| Aspect | Sourcery | Python Code Harmonizer | +|--------|----------|------------------------| +| **Purpose** | Code improvement suggestions | Semantic analysis | +| **What it does** | Suggests refactorings | Identifies disharmony | +| **Automation** | Auto-refactoring | Analysis only | +| **Focus** | Code structure | Code meaning | + +**Use together:** Sourcery suggests how to improve, Harmonizer identifies what needs improving. + +--- + +## Comprehensive Comparison Matrix + +| Tool | Syntax | Style | Types | Security | Complexity | **Semantics** | +|------|--------|-------|-------|----------|------------|---------------| +| Python compiler | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Pylint/Flake8 | ✅ | ✅ | ❌ | ⚠️ | ✅ | ❌ | +| MyPy | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | +| Pytest | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Bandit | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | +| Black | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| Radon | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | +| **Harmonizer** | ✅ | ❌ | ❌ | ❌ | ❌ | **✅** | + +**Python Code Harmonizer is the ONLY tool that checks semantic meaning.** + +--- + +## Real-World Example: All Tools Together + +```python +def validate_user(user: str) -> bool: # Line 42 + """Validate user credentials""" + db.delete_user(user) + return True +``` + +**What each tool says:** + +- **Python compiler**: ✅ Valid syntax +- **Pylint**: ✅ No style issues +- **MyPy**: ✅ Types are correct +- **Black**: ✅ Formatting is fine +- **Pytest**: ✅ Function returns bool as expected +- **Bandit**: ⚠️ Possible security issue (no input validation) +- **Harmonizer**: ⚠️ **High disharmony (0.95)** - "validate" but actually "deletes"! + +**Only Harmonizer caught the semantic bug:** +- Name promises validation (checking) +- Code performs deletion (destructive action) +- This is a logic error that would confuse developers + +--- + +## When to Use Python Code Harmonizer + +### ✅ Use Harmonizer For: + +1. **Code Review** - Catch misleading function names +2. **Refactoring** - Identify functions that grew beyond their name +3. **API Design** - Ensure public APIs are honest about behavior +4. **Legacy Code** - Find semantic drift in old codebases +5. **Team Standards** - Enforce semantic clarity +6. **Documentation** - Find functions whose names don't match docs + +### ⚠️ Don't Use Harmonizer For: + +1. **Replacing tests** - Harmonizer doesn't verify functionality +2. **Style enforcement** - Use linters for that +3. **Type checking** - Use MyPy for that +4. **Security** - Use Bandit for that +5. **Replacing human judgment** - Harmonizer highlights issues; you decide + +--- + +## Recommended Tool Stack + +### Minimal (Essential) +```bash +python -m py_compile # Syntax +pytest # Correctness +harmonizer # Semantics +``` + +### Standard (Recommended) +```bash +black # Formatting +flake8 # Linting +mypy # Types +pytest # Testing +harmonizer # Semantics +``` + +### Complete (Professional) +```bash +black # Formatting +isort # Import sorting +flake8 # Linting +mypy # Type checking +bandit # Security +pytest # Testing +pytest-cov # Coverage +radon # Complexity +harmonizer # Semantics +``` + +--- + +## Integration Order + +**Recommended workflow:** + +1. **Write code** +2. **Black** - Format +3. **Flake8** - Lint +4. **MyPy** - Type check +5. **Harmonizer** - Semantic check ← *New step!* +6. **Pytest** - Test +7. **Commit** + +--- + +## Unique Value Proposition + +**What makes Harmonizer different:** + +- ✅ **Only tool analyzing semantic meaning** +- ✅ **Finds bugs other tools miss** (misleading names) +- ✅ **Based on philosophical framework** (Anchor Point, ICE) +- ✅ **No machine learning** (deterministic, explainable) +- ✅ **Zero runtime dependencies** (pure Python) +- ✅ **Language-agnostic concepts** (ICE applies beyond Python) + +--- + +## Summary + +**Python Code Harmonizer is not a replacement - it's a complement.** + +| Question | Tool | +|----------|------| +| Is it valid Python? | **Compiler** | +| Does it follow style? | **Pylint/Flake8** | +| Are types correct? | **MyPy** | +| Does it work? | **Pytest** | +| Is it secure? | **Bandit** | +| Is it complex? | **Radon** | +| **Does it mean what it says?** | **Harmonizer** | + +**Use them all. Each catches different bugs.** 🛠️ + +--- + +*Python Code Harmonizer: Because code should say what it means and mean what it says.* 💛⚓ diff --git a/docs/QUICK_REFERENCE.md b/docs/QUICK_REFERENCE.md new file mode 100644 index 0000000..83d16a9 --- /dev/null +++ b/docs/QUICK_REFERENCE.md @@ -0,0 +1,242 @@ +# Python Code Harmonizer - Quick Reference + +**One-page cheat sheet for fast reference** 📋 + +--- + +## Installation + +```bash +# Clone and install +git clone https://github.com/BruinGrowly/Python-Code-Harmonizer.git +cd Python-Code-Harmonizer +pip install . + +# Verify +harmonizer --help +``` + +--- + +## Basic Usage + +```bash +# Analyze single file +harmonizer myfile.py + +# Analyze multiple files +harmonizer file1.py file2.py file3.py + +# Analyze directory (with find) +find src/ -name "*.py" -exec harmonizer {} \; +``` + +--- + +## Score Interpretation + +| Score | Status | Meaning | Action | +|-------|--------|---------|--------| +| **0.0-0.3** | ✅ Excellent | Perfect/near-perfect harmony | None needed | +| **0.3-0.5** | ✅ Good | Minor semantic drift | Review for clarity | +| **0.5-0.8** | ⚠️ Medium | Notable mismatch | Investigate | +| **0.8-1.2** | ❗ High | Significant contradiction | Definitely fix | +| **1.2+** | 🚨 Critical | Severe disharmony | Urgent attention | + +--- + +## Common Semantic Patterns + +### Wisdom (Information/Knowledge) +```python +get_*, fetch_*, retrieve_*, read_*, query_* +find_*, search_*, calculate_*, analyze_* +``` +**Semantic dimension**: High Wisdom, Low Power + +### Justice (Truth/Validation) +```python +validate_*, verify_*, check_*, is_*, has_* +assert_*, test_*, ensure_* +``` +**Semantic dimension**: High Justice, Low Power + +### Power (Action/Modification) +```python +create_*, delete_*, update_*, remove_* +execute_*, run_*, perform_*, build_* +set_*, modify_*, change_* +``` +**Semantic dimension**: High Power, Variable Justice + +### Love (Unity/Connection) +```python +merge_*, join_*, connect_*, combine_* +add_to_*, attach_*, link_* +``` +**Semantic dimension**: High Love, Moderate Power + +--- + +## Disharmony Examples + +### ❌ Bad: Action Contradiction +```python +def get_user(id): + db.delete(id) # Says "get", does "delete" + # Score: ~1.4 (CRITICAL) +``` + +### ✅ Good: Aligned Action +```python +def delete_user(id): + db.delete(id) # Name matches action + # Score: ~0.1 (EXCELLENT) +``` + +### ❌ Bad: Purpose Mismatch +```python +def validate_email(email): + send_email(email) # Says "validate", does "send" + # Score: ~0.9 (HIGH) +``` + +### ✅ Good: Clear Purpose +```python +def validate_email(email): + return "@" in email # Name matches action + # Score: ~0.1 (EXCELLENT) +``` + +--- + +## Integration Quick Start + +### GitHub Actions (CI/CD) +```yaml +# .github/workflows/harmony-check.yml +- name: Check Code Harmony + run: | + pip install . + harmonizer src/**/*.py +``` + +### Pre-commit Hook +```yaml +# .pre-commit-config.yaml +- repo: local + hooks: + - id: harmonizer + name: Code Harmony Check + entry: harmonizer + language: system + types: [python] +``` + +### VS Code Task +```json +// .vscode/tasks.json +{ + "label": "Check Harmony", + "type": "shell", + "command": "harmonizer ${file}" +} +``` + +--- + +## Programmatic Usage + +```python +from src.harmonizer.main import PythonCodeHarmonizer + +# Initialize +harmonizer = PythonCodeHarmonizer(disharmony_threshold=0.5) + +# Analyze +report = harmonizer.analyze_file("mycode.py") + +# Process results +for func_name, score in report.items(): + if score > 0.8: + print(f"⚠️ {func_name}: {score:.2f}") +``` + +--- + +## The Four Dimensions + +| Dimension | Symbol | Represents | Example Keywords | +|-----------|--------|------------|------------------| +| **Love** | L | Unity, compassion | merge, connect, care, help | +| **Justice** | J | Truth, order | validate, check, verify, assert | +| **Power** | P | Action, force | create, delete, execute, force | +| **Wisdom** | W | Knowledge, understanding | get, analyze, calculate, learn | + +**Anchor Point (1,1,1,1)** = Perfect balance of all four + +--- + +## ICE Framework + +**Intent** → **Context** → **Execution** + +- **Intent**: What function name promises (L+W) +- **Context**: Actual situation (J) +- **Execution**: What code does (P) + +**Harmony** = Intent aligns with Execution + +--- + +## Common Issues & Fixes + +### Issue: Function not detected +**Cause**: Not a function definition, or syntax error +**Fix**: Ensure valid Python with `def` statements + +### Issue: All scores are 0 +**Cause**: No semantic keywords recognized +**Fix**: Use clearer, more descriptive function names + +### Issue: Import error +**Cause**: Package not installed +**Fix**: `pip install .` in project directory + +--- + +## Quick Tips + +✅ **DO:** +- Use specific verbs (get, delete, create) +- Match names to actual behavior +- Run before committing code +- Review high scores in code review + +❌ **DON'T:** +- Use vague verbs (process, handle, do) +- Ignore high scores without investigation +- Expect perfect 0.0 on everything +- Use as sole quality metric + +--- + +## Resources + +- **[User Guide](USER_GUIDE.md)** - Comprehensive usage +- **[Tutorial](TUTORIAL.md)** - Hands-on learning +- **[FAQ](FAQ.md)** - Common questions +- **[Philosophy](PHILOSOPHY.md)** - Deep theory +- **[Architecture](ARCHITECTURE.md)** - Implementation +- **[API](API.md)** - Programmatic use + +--- + +## One-Liner Summary + +> **"Does your code DO what its name SAYS?"** +> If yes → Harmonious. If no → Bug. + +--- + +*Keep this reference handy for quick lookups!* 💛⚓ diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..fd13598 --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,576 @@ +# Python Code Harmonizer - Troubleshooting Guide + +**Common issues and their solutions** 🔧 + +--- + +## Installation Issues + +### Problem: `pip install .` fails + +**Symptoms:** +``` +ERROR: Could not find setup.py or pyproject.toml +``` + +**Solution:** +```bash +# Ensure you're in the project root directory +cd Python-Code-Harmonizer +ls pyproject.toml # Should exist + +# Try installing with verbose output +pip install -v . +``` + +--- + +### Problem: Permission denied during installation + +**Symptoms:** +``` +ERROR: Could not install packages due to PermissionError +``` + +**Solution:** +```bash +# Option 1: Use virtual environment (recommended) +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +pip install . + +# Option 2: Install for user only +pip install --user . + +# Option 3: Use sudo (not recommended) +# sudo pip install . +``` + +--- + +### Problem: Python version too old + +**Symptoms:** +``` +ERROR: This package requires Python 3.8+ +``` + +**Solution:** +```bash +# Check your Python version +python --version + +# If < 3.8, install newer Python +# Then use specific version: +python3.8 -m pip install . +python3.8 -m harmonizer myfile.py +``` + +--- + +## Import Errors + +### Problem: `No module named 'src.divine_invitation_engine_V2'` + +**Symptoms:** +``` +ModuleNotFoundError: No module named 'src.divine_invitation_engine_V2' +``` + +**Solution:** +```bash +# Verify installation +pip show PythonCodeHarmonizer + +# If not shown, reinstall +pip uninstall PythonCodeHarmonizer +pip install . + +# For development, use editable install +pip install -e . +``` + +--- + +### Problem: `No module named 'src.ast_semantic_parser'` + +**Same as above** - package not properly installed. + +**Solution:** +```bash +# Reinstall package +pip install . + +# Verify harmonizer command works +which harmonizer # On Windows: where harmonizer +``` + +--- + +### Problem: `harmonizer` command not found + +**Symptoms:** +``` +bash: harmonizer: command not found +``` + +**Solution:** +```bash +# Option 1: Ensure pip bin directory is in PATH +python -m pip install --user . +# Then add ~/.local/bin to PATH + +# Option 2: Use python -m instead +python -m src.harmonizer.main myfile.py + +# Option 3: Run from project directory +cd Python-Code-Harmonizer +python src/harmonizer/main.py myfile.py + +# Option 4: Check if in virtual environment +# If you installed in venv, activate it first: +source venv/bin/activate +``` + +--- + +## Analysis Issues + +### Problem: Syntax error when analyzing file + +**Symptoms:** +``` +ERROR: Could not parse file. Syntax error on line 42 +``` + +**Solution:** +```bash +# First, verify file has valid Python syntax +python -m py_compile yourfile.py + +# If syntax error is shown, fix it first +# Harmonizer requires valid Python to analyze + +# Common syntax issues: +# - Missing colons after def, if, for +# - Unclosed brackets/parentheses +# - Invalid indentation +``` + +--- + +### Problem: File not found + +**Symptoms:** +``` +ERROR: File not found at 'myfile.py' +``` + +**Solution:** +```bash +# Use absolute path +harmonizer /full/path/to/myfile.py + +# Or verify current directory +pwd +ls myfile.py # Should exist + +# Check file permissions +ls -l myfile.py +chmod +r myfile.py # Make readable if needed +``` + +--- + +### Problem: No functions found to analyze + +**Symptoms:** +``` +Analyzing file: empty.py +---------------------------------------------------------------------- +No functions found to analyze. +``` + +**Cause:** File contains no function definitions, only classes/variables/imports + +**Solution:** +This is not an error - Harmonizer only analyzes functions. If your file has no `def` statements, there's nothing to analyze. + +```python +# This file has no functions - nothing to analyze +import os +x = 42 + +# This file has functions - will be analyzed +def get_data(): + return x +``` + +--- + +### Problem: All scores are 0.0 + +**Symptoms:** +Every function shows `Score: 0.00` + +**Cause:** Function names/operations don't contain recognized semantic keywords + +**Solution:** +```python +# Names too vague - no semantic keywords +def func1(): # No semantic meaning + do_thing() # No semantic meaning + +# Better - use descriptive names +def get_user_data(): # "get" = wisdom + return db.query() # "query" = wisdom +``` + +**Check vocabulary:** +- Use verbs like: get, delete, create, validate, check +- Avoid generic verbs like: do, handle, process, func + +--- + +## Unexpected Results + +### Problem: Function seems fine but shows high disharmony + +**Symptoms:** +``` +validate_email | !! DISHARMONY (Score: 0.85) +``` + +**Investigation steps:** + +1. **Read the function carefully** - does name match implementation? + ```python + def validate_email(email): + send_welcome_email(email) # Validation or sending? + ``` + +2. **Check for mixed semantics** + ```python + def get_or_create_user(id): # Mixed: get (read) + create (write) + # Slight disharmony is expected + ``` + +3. **Consider if name is too specific/vague** + ```python + def process_data(data): # Vague + db.critical_delete(data) # Specific/destructive + # High distance between vague and specific + ``` + +**Resolution:** Either the name is misleading, or the disharmony is intentional (document it). + +--- + +### Problem: Harmonious function that's actually buggy + +**Symptoms:** +Function shows low score but you know it's wrong + +**Understanding:** +```python +def delete_user(user_id): # Harmonizer: ✓ Low score + return user_id * 2 # BUG: Should delete, doesn't! +``` + +**Explanation:** +Harmonizer checks **semantic alignment** (name vs meaning), not **functional correctness**. + +**Solution:** +Use Harmonizer + Pytest together: +- **Harmonizer**: Checks name matches intent +- **Pytest**: Checks code actually works + +--- + +### Problem: Scores vary between runs + +**Cause:** Unlikely - Harmonizer is deterministic + +**Verification:** +```bash +# Run twice, compare +harmonizer myfile.py > run1.txt +harmonizer myfile.py > run2.txt +diff run1.txt run2.txt # Should be identical +``` + +**If scores DO vary:** +- File contents changed between runs +- Different Python version +- Bug (please report!) + +--- + +## Performance Issues + +### Problem: Harmonizer runs slowly + +**Symptoms:** +Takes > 10 seconds for a small file + +**Diagnosis:** +```bash +# Time the analysis +time harmonizer myfile.py + +# If > 1 second for typical file, investigate +``` + +**Possible causes:** +1. **Very large file** (thousands of functions) +2. **Disk I/O issues** +3. **CPU constraints** + +**Solution:** +```bash +# For large codebases, parallelize +find src/ -name "*.py" | parallel harmonizer {} + +# Or analyze selectively +harmonizer src/critical/*.py # Only important files +``` + +--- + +## Integration Issues + +### Problem: Pre-commit hook fails + +**Symptoms:** +``` +[harmonizer] Failed +- hook id: harmonizer +- exit code: 127 +``` + +**Solution:** +```bash +# Ensure harmonizer is installed in pre-commit's environment +which harmonizer + +# If not found, install globally or in project venv +pip install /path/to/Python-Code-Harmonizer + +# Test manually +pre-commit run harmonizer --all-files +``` + +--- + +### Problem: GitHub Actions workflow fails + +**Symptoms:** +``` +Error: harmonizer: not found +``` + +**Solution:** +```yaml +# Ensure installation step is present in workflow +- name: Install harmonizer + run: | + pip install . # Or git+https://github.com/... + +# If using submodule/local path, ensure checkout first +- uses: actions/checkout@v3 +``` + +--- + +### Problem: VS Code task doesn't run + +**Symptoms:** +VS Code shows "command not found" + +**Solution:** +```json +// .vscode/tasks.json +// Use absolute path or ensure PATH is set +{ + "command": "/path/to/venv/bin/harmonizer", + // OR + "command": "python", + "args": ["-m", "src.harmonizer.main", "${file}"] +} +``` + +--- + +## Platform-Specific Issues + +### Windows: Path issues + +**Problem:** +``` +'harmonizer' is not recognized as an internal or external command +``` + +**Solution:** +```cmd +:: Add Scripts directory to PATH +:: Or use full path +C:\Python38\Scripts\harmonizer.exe myfile.py + +:: Or use python -m +python -m src.harmonizer.main myfile.py +``` + +--- + +### macOS: SSL Certificate errors (during install) + +**Problem:** +``` +SSL: CERTIFICATE_VERIFY_FAILED +``` + +**Solution:** +```bash +# Install certificates +/Applications/Python\ 3.8/Install\ Certificates.command + +# Or update certifi +pip install --upgrade certifi +``` + +--- + +### Linux: Permission denied + +**Problem:** +``` +bash: /usr/local/bin/harmonizer: Permission denied +``` + +**Solution:** +```bash +# Make executable +chmod +x /usr/local/bin/harmonizer + +# Or reinstall with --user +pip install --user . +``` + +--- + +## Getting More Help + +### Enable Verbose Output + +```bash +# Get more information (when available in future versions) +# harmonizer --verbose myfile.py +# harmonizer --debug myfile.py + +# For now, check the code manually if issues persist +``` + +### Check Python Environment + +```bash +# Verify Python version +python --version # Should be 3.8+ + +# Check installed packages +pip list | grep Harmonizer + +# Check sys.path +python -c "import sys; print('\n'.join(sys.path))" +``` + +### Verify File Structure + +```bash +# Check project structure is intact +ls -R Python-Code-Harmonizer/ +# Should have: +# - pyproject.toml +# - src/divine_invitation_engine_V2.py +# - src/ast_semantic_parser.py +# - src/harmonizer/main.py +``` + +--- + +## Reporting Bugs + +If you've tried everything above and still have issues: + +**1. Gather information:** +```bash +python --version +pip show PythonCodeHarmonizer +harmonizer myfile.py # Copy full error +``` + +**2. Create minimal reproduction:** +```python +# smallest_file_that_fails.py +def example(): + pass +``` + +**3. Report on GitHub:** +https://github.com/BruinGrowly/Python-Code-Harmonizer/issues + +**Include:** +- Python version +- Operating system +- Full error message +- Minimal code example +- What you expected vs what happened + +--- + +## Common Workarounds + +### Temporary: Skip problematic files + +```bash +# Analyze everything except problematic files +find src/ -name "*.py" ! -name "problematic.py" -exec harmonizer {} \; +``` + +### Temporary: Lower expectations + +```python +# If you can't fix code immediately, just be aware +# Harmonizer highlights issues - you decide when to fix +``` + +### Temporary: Manual analysis + +```python +# Read the function yourself: +def get_user(id): + db.delete(id) # Obviously wrong! + +# Sometimes human judgment is needed +``` + +--- + +## Quick Checklist + +When something doesn't work: + +- [ ] Python 3.8+ installed? +- [ ] Package installed? (`pip show PythonCodeHarmonizer`) +- [ ] File has valid Python syntax? (`python -m py_compile file.py`) +- [ ] File has functions? (not just imports/classes) +- [ ] Using correct path? (try absolute path) +- [ ] Virtual environment activated? (if using one) +- [ ] Command spelled correctly? (`harmonizer` not `harmonize`) + +--- + +*Most issues are installation or PATH problems. Start there!* 🔧 + +**For more help:** [User Guide](USER_GUIDE.md) | [FAQ](FAQ.md) | [GitHub Issues](https://github.com/BruinGrowly/Python-Code-Harmonizer/issues) + +💛⚓ diff --git a/examples/real_world_bugs.py b/examples/real_world_bugs.py new file mode 100644 index 0000000..affc9c3 --- /dev/null +++ b/examples/real_world_bugs.py @@ -0,0 +1,403 @@ +""" +Real-World Semantic Bugs Caught by Python Code Harmonizer + +This file demonstrates ACTUAL semantic bugs that Harmonizer catches, +but other tools (Pylint, MyPy, Pytest) might miss. + +Each example shows: +1. The buggy code +2. What Harmonizer reports +3. Why it's a problem +4. The fix + +Run: harmonizer examples/real_world_bugs.py +""" + +# ============================================================================= +# BUG #1: Validation Function That Modifies +# ============================================================================= +# HARMONIZER SCORE: ~0.85 (!! DISHARMONY) +# OTHER TOOLS: All pass ✓ + +def validate_email(email: str) -> bool: + """ + BUG: Function claims to validate, but actually sends emails! + + Harmonizer catches: + - "validate" = Wisdom (checking/reading) + - "send" = Power (action/writing) + - High semantic distance = DISHARMONY + + Other tools miss this because: + - Pylint: No style violation + - MyPy: Types are correct (str -> bool) + - Pytest: If test expects True, it passes + """ + if "@" in email and "." in email: + # VIOLATION: Validation function shouldn't send emails! + send_welcome_email(email) + return True + return False + +def send_welcome_email(email): + """Placeholder - in real code, this would send email""" + print(f"Sending email to {email}") + +# FIX: Separate validation from action +def validate_email_fixed(email: str) -> bool: + """Just validates, doesn't send""" + return "@" in email and "." in email + +def process_new_user(email: str) -> bool: + """Orchestrates validation AND sending""" + if validate_email_fixed(email): + send_welcome_email(email) + return True + return False + + +# ============================================================================= +# BUG #2: Get Function That Deletes +# ============================================================================= +# HARMONIZER SCORE: ~0.95 (!! CRITICAL DISHARMONY) +# OTHER TOOLS: All pass ✓ + +def get_user_by_id(user_id: int): + """ + BUG: Function claims to GET, but actually DELETES! + + Harmonizer catches: + - "get" = Wisdom (read operation) + - "delete" = Power (destructive operation) + - CRITICAL semantic contradiction + + Real-world impact: + - Developer calls get_user_by_id() expecting read-only + - Data gets deleted unexpectedly + - Could cause data loss in production + """ + # Connect to database + db = get_database_connection() + + # VIOLATION: Should query, not delete! + db.execute(f"DELETE FROM users WHERE id = {user_id}") + return user_id + +def get_database_connection(): + """Placeholder""" + class FakeDB: + def execute(self, query): + print(f"Executing: {query}") + return FakeDB() + +# FIX: Name matches behavior +def delete_user_by_id(user_id: int): + """Honestly named - clearly destructive""" + db = get_database_connection() + db.execute(f"DELETE FROM users WHERE id = {user_id}") + return user_id + +def get_user_by_id_fixed(user_id: int): + """Actually gets without modifying""" + db = get_database_connection() + # Now it actually queries, doesn't delete + result = db.execute(f"SELECT * FROM users WHERE id = {user_id}") + return result + + +# ============================================================================= +# BUG #3: Check Function That Creates +# ============================================================================= +# HARMONIZER SCORE: ~0.75 (!! DISHARMONY) +# OTHER TOOLS: Might pass + +def check_file_exists(filepath: str) -> bool: + """ + BUG: Function claims to check, but creates if missing! + + Harmonizer catches: + - "check" = Wisdom (read/verify) + - "create" = Power (write/modify) + - Side effect hidden in name + + Why this is dangerous: + - Caller expects read-only check + - Filesystem gets modified unexpectedly + - Could create files in wrong locations + """ + import os + + if not os.path.exists(filepath): + # VIOLATION: Check functions shouldn't create! + with open(filepath, 'w') as f: + f.write("") + return False + return True + +# FIX: Separate check from creation +def check_file_exists_fixed(filepath: str) -> bool: + """Pure check - no side effects""" + import os + return os.path.exists(filepath) + +def ensure_file_exists(filepath: str) -> bool: + """Honest name - creates if missing""" + import os + if not os.path.exists(filepath): + with open(filepath, 'w') as f: + f.write("") + return False + return True + + +# ============================================================================= +# BUG #4: Calculate Function That Saves +# ============================================================================= +# HARMONIZER SCORE: ~0.70 (!! DISHARMONY) +# OTHER TOOLS: Tests might pass if they expect side effects + +def calculate_total_price(items: list) -> float: + """ + BUG: Function claims to calculate, but also saves to database! + + Harmonizer catches: + - "calculate" = Wisdom (computation) + - "save" = Power (persistence) + - Mixed concerns + + Problems: + - Can't calculate without database + - Unexpected persistence + - Hard to test + - Violates single responsibility + """ + total = sum(item['price'] for item in items) + + # VIOLATION: Calculate functions shouldn't persist! + save_to_database('total_price', total) + + return total + +def save_to_database(key, value): + """Placeholder""" + print(f"Saving {key} = {value} to database") + +# FIX: Separate calculation from persistence +def calculate_total_price_fixed(items: list) -> float: + """Pure calculation - no side effects""" + return sum(item['price'] for item in items) + +def calculate_and_save_total_price(items: list) -> float: + """Honest name - calculates AND saves""" + total = calculate_total_price_fixed(items) + save_to_database('total_price', total) + return total + + +# ============================================================================= +# BUG #5: Read Function That Updates +# ============================================================================= +# HARMONIZER SCORE: ~0.80 (!! DISHARMONY) +# OTHER TOOLS: Hard to catch without semantic analysis + +def read_configuration(config_file: str) -> dict: + """ + BUG: Function claims to read, but updates last_accessed timestamp! + + Harmonizer catches: + - "read" = Wisdom (non-modifying) + - "update" = Power (modifying) + - Hidden side effect + + Why subtle: + - Seems harmless (just a timestamp) + - But violates principle of least surprise + - Could cause issues with file permissions + - Prevents true read-only access + """ + import json + + with open(config_file, 'r') as f: + config = json.load(f) + + # VIOLATION: Read functions shouldn't modify! + update_last_accessed_timestamp(config_file) + + return config + +def update_last_accessed_timestamp(filepath): + """Placeholder""" + import time + print(f"Updating timestamp for {filepath} to {time.time()}") + +# FIX: Either truly read-only OR honest name +def read_configuration_fixed(config_file: str) -> dict: + """Pure read - no side effects""" + import json + with open(config_file, 'r') as f: + return json.load(f) + +def read_and_track_configuration(config_file: str) -> dict: + """Honest name - reads AND tracks access""" + config = read_configuration_fixed(config_file) + update_last_accessed_timestamp(config_file) + return config + + +# ============================================================================= +# BUG #6: Filter Function That Deletes +# ============================================================================= +# HARMONIZER SCORE: ~0.90 (!! CRITICAL DISHARMONY) +# OTHER TOOLS: Might not catch unless tests verify original list unchanged + +def filter_invalid_users(users: list) -> list: + """ + BUG: Function claims to filter (read), but deletes from database! + + Harmonizer catches: + - "filter" = Wisdom (selection/reading) + - "delete" = Power (destructive) + - Critical contradiction + + Expected: Return filtered list + Actual: Modifies database! + + Real-world disaster: + - Developer expects non-destructive filtering + - Users get deleted from production database + - Data loss + """ + valid_users = [] + + for user in users: + if user['email'] and user['name']: + valid_users.append(user) + else: + # VIOLATION: Filter shouldn't delete from DB! + delete_user_from_database(user['id']) + + return valid_users + +def delete_user_from_database(user_id): + """Placeholder""" + print(f"DELETING user {user_id} from database!") + +# FIX: Separate filtering from deletion +def filter_invalid_users_fixed(users: list) -> list: + """Pure filter - no side effects""" + return [u for u in users if u['email'] and u['name']] + +def remove_invalid_users(users: list) -> list: + """Honest name - filters AND deletes from database""" + valid_users = [] + + for user in users: + if user['email'] and user['name']: + valid_users.append(user) + else: + delete_user_from_database(user['id']) + + return valid_users + + +# ============================================================================= +# BUG #7: Log Function That Raises +# ============================================================================= +# HARMONIZER SCORE: ~0.65 (!! DISHARMONY) +# OTHER TOOLS: Tests might catch if they expect no exceptions + +def log_error_message(message: str): + """ + BUG: Function claims to log, but raises exception! + + Harmonizer catches: + - "log" = Wisdom (recording/passive) + - "raise" = Power (control flow change) + - Unexpected behavior + + Problems: + - Caller expects logging to be safe + - Exception disrupts program flow + - Violates principle of least surprise + """ + print(f"ERROR: {message}") + + # VIOLATION: Log functions shouldn't raise! + if "critical" in message.lower(): + raise RuntimeError(f"Critical error: {message}") + +# FIX: Either log OR raise, not both under "log" name +def log_error_message_fixed(message: str): + """Just logs - never raises""" + print(f"ERROR: {message}") + +def handle_error_message(message: str): + """Honest name - logs AND may raise""" + print(f"ERROR: {message}") + + if "critical" in message.lower(): + raise RuntimeError(f"Critical error: {message}") + + +# ============================================================================= +# Summary +# ============================================================================= + +""" +KEY INSIGHTS: + +1. **All these bugs pass traditional tools:** + - Syntax is valid + - Types are correct (if using type hints) + - Tests might pass (if they expect the behavior) + - Linters see no style violations + +2. **Harmonizer catches them because:** + - Analyzes semantic meaning + - Compares name intent vs actual behavior + - Detects contradictions between read/write operations + +3. **Common patterns:** + - Validation functions that modify + - Get/Read functions that delete/update + - Check/Filter functions that create/destroy + - Calculate/Log functions with side effects + +4. **Real-world impact:** + - Data loss + - Unexpected side effects + - Violation of least surprise principle + - Harder to reason about code + - Maintenance nightmares + +5. **The fix is always:** + - Make the name match the behavior, OR + - Make the behavior match the name, OR + - Split into two functions with honest names + +Run this file through Harmonizer: + harmonizer examples/real_world_bugs.py + +You'll see disharmony scores for all the buggy functions, +while the fixed versions show excellent harmony! +""" + +# ============================================================================= +# Test Helper +# ============================================================================= + +if __name__ == "__main__": + print("🔍 Real-World Semantic Bugs Demo") + print("=" * 70) + print("\nThese bugs would slip past traditional tools!") + print("Run: harmonizer examples/real_world_bugs.py") + print("\nExpect HIGH disharmony scores for:") + print(" - validate_email (sends emails)") + print(" - get_user_by_id (deletes data)") + print(" - check_file_exists (creates files)") + print(" - calculate_total_price (saves to DB)") + print(" - read_configuration (updates timestamps)") + print(" - filter_invalid_users (deletes from DB)") + print(" - log_error_message (raises exceptions)") + print("\nExpect LOW scores for the _fixed versions!") + print("=" * 70) diff --git a/examples/refactoring_journey.py b/examples/refactoring_journey.py new file mode 100644 index 0000000..1b9b3dc --- /dev/null +++ b/examples/refactoring_journey.py @@ -0,0 +1,631 @@ +""" +Refactoring Journey: Before & After with Harmonizer Scores + +This file demonstrates the refactoring journey using Harmonizer as a guide. +Each example shows: +1. BEFORE: Disharmonious code with high score +2. Harmonizer analysis +3. Refactoring steps +4. AFTER: Harmonious code with low score + +Run sections separately to see score differences: + harmonizer examples/refactoring_journey.py +""" + +# ============================================================================= +# JOURNEY #1: User Management Refactoring +# ============================================================================= + +print("\n" + "="*70) +print("JOURNEY #1: From 'process' to clear intent") +print("="*70) + +# ----------------------------------------------------------------------------- +# BEFORE - Disharmony Score: ~0.75 +# ----------------------------------------------------------------------------- + +def process_user(user_data): + """ + PROBLEM: 'process' is vague + - What kind of processing? + - Read or write operation? + - Creates confusion + + Harmonizer flags this because: + - 'process' = low semantic value + - 'create', 'save', 'send' = specific operations + - Distance between vague and specific = disharmony + """ + # Creates new user + user = create_user_record(user_data) + + # Saves to database + save_to_database(user) + + # Sends welcome email + send_welcome_email(user['email']) + + return user + +# ----------------------------------------------------------------------------- +# REFACTORING STEPS +# ----------------------------------------------------------------------------- + +# Step 1: Identify what it actually does +# - Creates user record +# - Saves to database +# - Sends email +# → This is USER REGISTRATION, not generic "processing" + +# Step 2: Split into focused functions +# Step 3: Use descriptive names + +# ----------------------------------------------------------------------------- +# AFTER - Disharmony Score: ~0.15 (Excellent!) +# ----------------------------------------------------------------------------- + +def register_new_user(user_data): + """ + SOLUTION: Name describes exact behavior + + Improvements: + - 'register' clearly indicates creating + setting up + - Matches actual operations + - Self-documenting + + Harmonizer loves this because: + - 'register' aligns with 'create', 'save', 'send' + - All operations support user registration + - Low semantic distance = harmony + """ + user = create_user_record(user_data) + save_to_database(user) + send_welcome_email(user['email']) + return user + + +# ============================================================================= +# JOURNEY #2: Data Retrieval Refactoring +# ============================================================================= + +print("\n" + "="*70) +print("JOURNEY #2: From misleading to honest") +print("="*70) + +# ----------------------------------------------------------------------------- +# BEFORE - Disharmony Score: ~0.90 (Critical!) +# ----------------------------------------------------------------------------- + +def get_user_settings(user_id): + """ + PROBLEM: Says 'get' but actually modifies! + + Harmonizer catches: + - 'get' = Wisdom (read operation) + - 'update' = Power (write operation) + - CRITICAL contradiction + + Developer expects: + - Read-only operation + - No side effects + - Safe to call multiple times + + Reality: + - Modifies last_login + - Updates analytics + - Side effects! + """ + settings = query_settings_from_db(user_id) + + # VIOLATION: Get functions shouldn't modify! + update_last_login(user_id) + increment_analytics_counter(user_id) + + return settings + +# ----------------------------------------------------------------------------- +# REFACTORING STEPS +# ----------------------------------------------------------------------------- + +# Step 1: Separate read from write +# Step 2: Make get() truly read-only +# Step 3: Create separate function for tracking + +# ----------------------------------------------------------------------------- +# AFTER - Disharmony Score: ~0.10 (Excellent!) +# ----------------------------------------------------------------------------- + +def get_user_settings(user_id): + """ + SOLUTION: Pure read operation + + Now it's truly a 'get': + - Read-only + - No side effects + - Harmonizer score: excellent + """ + return query_settings_from_db(user_id) + +def track_settings_access(user_id): + """ + SOLUTION: Separate tracking function + + Honest about side effects: + - Name clearly indicates modification + - Can be called separately + - Single responsibility + """ + update_last_login(user_id) + increment_analytics_counter(user_id) + +def get_and_track_settings(user_id): + """ + SOLUTION: Orchestrator with honest name + + If you need both: + - Name indicates both operations + - Composes smaller functions + - Clear intent + """ + settings = get_user_settings(user_id) + track_settings_access(user_id) + return settings + + +# ============================================================================= +# JOURNEY #3: Validation Logic Refactoring +# ============================================================================= + +print("\n" + "="*70) +print("JOURNEY #3: From overgrown to focused") +print("="*70) + +# ----------------------------------------------------------------------------- +# BEFORE - Disharmony Score: ~0.70 +# ----------------------------------------------------------------------------- + +def validate_input(data): + """ + PROBLEM: Function grew beyond its name + + Started as simple validation, but grew to include: + - Validation (checking) + - Sanitization (modifying) + - Formatting (transforming) + - Logging (side effects) + + Harmonizer detects: + - 'validate' = Wisdom (checking) + - 'sanitize', 'format', 'log' = mixed semantics + - Function does too much + """ + # Validation + if not data: + log_validation_error("Empty data") + return False + + # Sanitization (modifying!) + data = sanitize_sql_injection(data) + + # Formatting (transforming!) + data = format_to_lowercase(data) + + # More validation + if len(data) < 3: + log_validation_error("Too short") + return False + + return True + +# ----------------------------------------------------------------------------- +# REFACTORING STEPS +# ----------------------------------------------------------------------------- + +# Step 1: Separate validation from transformation +# Step 2: Separate logging from validation +# Step 3: Create pipeline with clear steps + +# ----------------------------------------------------------------------------- +# AFTER - Disharmony Scores: All ~0.05-0.20 (Excellent!) +# ----------------------------------------------------------------------------- + +def validate_input(data): + """ + SOLUTION: Pure validation + + Now it ONLY validates: + - No modifications + - No side effects + - Returns bool + - Harmonizer: excellent score + """ + if not data: + return False + if len(data) < 3: + return False + return True + +def sanitize_input(data): + """ + SOLUTION: Pure sanitization + + Focused responsibility: + - Removes dangerous characters + - Returns cleaned data + - Name matches behavior + """ + return sanitize_sql_injection(data) + +def format_input(data): + """ + SOLUTION: Pure formatting + + Single purpose: + - Transforms to lowercase + - Returns formatted data + - Clear intent + """ + return format_to_lowercase(data) + +def process_user_input(data): + """ + SOLUTION: Orchestrator with honest name + + Now 'process' is appropriate because: + - Explicitly multi-step + - Combines validate + sanitize + format + - Name indicates complex operation + """ + if not validate_input(data): + log_validation_error("Validation failed") + return None + + data = sanitize_input(data) + data = format_input(data) + + return data + + +# ============================================================================= +# JOURNEY #4: Resource Management Refactoring +# ============================================================================= + +print("\n" + "="*70) +print("JOURNEY #4: From surprising to expected") +print("="*70) + +# ----------------------------------------------------------------------------- +# BEFORE - Disharmony Score: ~0.80 +# ----------------------------------------------------------------------------- + +def check_cache_available(cache_key): + """ + PROBLEM: Hidden side effect + + Harmonizer catches: + - 'check' = Wisdom (read/verify) + - 'initialize', 'create' = Power (write/modify) + - Unexpected behavior + + Developer expects: + - Simple boolean check + - No modifications + + Reality: + - Creates cache if missing + - Initializes default values + - Side effects! + """ + if cache_key in cache_store: + return True + + # VIOLATION: Check functions shouldn't create! + initialize_cache(cache_key) + create_default_values(cache_key) + + return False + +# ----------------------------------------------------------------------------- +# REFACTORING STEPS +# ----------------------------------------------------------------------------- + +# Step 1: Separate checking from creation +# Step 2: Use explicit names +# Step 3: Provide both options + +# ----------------------------------------------------------------------------- +# AFTER - Disharmony Scores: ~0.05-0.15 (Excellent!) +# ----------------------------------------------------------------------------- + +def check_cache_available(cache_key): + """ + SOLUTION: Pure check + + Now truly a check: + - Read-only + - No side effects + - Returns simple boolean + """ + return cache_key in cache_store + +def initialize_cache_if_missing(cache_key): + """ + SOLUTION: Explicit initialization + + Honest name: + - Clearly indicates creation + - Developer knows what to expect + - No surprises + """ + if cache_key not in cache_store: + initialize_cache(cache_key) + create_default_values(cache_key) + return True + +def ensure_cache_ready(cache_key): + """ + SOLUTION: Alternative with clear intent + + 'ensure' is perfect because: + - Implies it will create if needed + - Guarantees cache exists after call + - Semantic harmony with actions + """ + if not check_cache_available(cache_key): + initialize_cache_if_missing(cache_key) + return True + + +# ============================================================================= +# JOURNEY #5: API Endpoint Refactoring +# ============================================================================= + +print("\n" + "="*70) +print("JOURNEY #5: From overloaded to specialized") +print("="*70) + +# ----------------------------------------------------------------------------- +# BEFORE - Disharmony Score: ~0.85 +# ----------------------------------------------------------------------------- + +def handle_request(request_data): + """ + PROBLEM: One function doing everything + + Issues: + - 'handle' is too vague + - Parses, validates, processes, responds + - Hard to test + - Hard to maintain + + Harmonizer sees: + - Generic 'handle' + - Specific 'parse', 'validate', 'execute', 'format' + - High semantic distance + """ + # Parse + data = parse_json(request_data) + + # Validate + if not is_valid(data): + return error_response("Invalid") + + # Execute business logic + result = execute_business_logic(data) + + # Format response + response = format_json_response(result) + + # Log + log_request(request_data) + + return response + +# ----------------------------------------------------------------------------- +# REFACTORING STEPS +# ----------------------------------------------------------------------------- + +# Step 1: Break into pipeline stages +# Step 2: Each function has clear responsibility +# Step 3: Compose with orchestrator + +# ----------------------------------------------------------------------------- +# AFTER - Disharmony Scores: All ~0.05-0.20 (Excellent!) +# ----------------------------------------------------------------------------- + +def parse_request(request_data): + """Stage 1: Parsing""" + return parse_json(request_data) + +def validate_request(data): + """Stage 2: Validation""" + return is_valid(data) + +def execute_request(data): + """Stage 3: Business logic""" + return execute_business_logic(data) + +def format_response(result): + """Stage 4: Response formatting""" + return format_json_response(result) + +def log_request_processed(request_data): + """Side effect: Logging""" + log_request(request_data) + +def process_api_request(request_data): + """ + SOLUTION: Clear orchestration + + Now 'process' is appropriate because: + - Name indicates multi-step operation + - Each step is clearly defined + - Easy to test each stage + - Easy to modify pipeline + """ + # Parse + data = parse_request(request_data) + + # Validate + if not validate_request(data): + return error_response("Invalid") + + # Execute + result = execute_request(data) + + # Log + log_request_processed(request_data) + + # Format and return + return format_response(result) + + +# ============================================================================= +# Placeholder Functions (for examples to run) +# ============================================================================= + +def create_user_record(data): + return {"id": 1, "email": data.get("email", "test@example.com")} + +def save_to_database(user): + print(f"Saving user {user['id']} to database") + +def send_welcome_email(email): + print(f"Sending welcome email to {email}") + +def query_settings_from_db(user_id): + return {"theme": "dark", "language": "en"} + +def update_last_login(user_id): + print(f"Updating last login for user {user_id}") + +def increment_analytics_counter(user_id): + print(f"Incrementing analytics for user {user_id}") + +def log_validation_error(message): + print(f"VALIDATION ERROR: {message}") + +def sanitize_sql_injection(data): + return data.replace("'", "''") + +def format_to_lowercase(data): + return data.lower() + +cache_store = {} + +def initialize_cache(key): + cache_store[key] = {} + +def create_default_values(key): + cache_store[key] = {"default": True} + +def parse_json(data): + import json + return json.loads(data) if isinstance(data, str) else data + +def is_valid(data): + return bool(data) + +def execute_business_logic(data): + return {"status": "success", "data": data} + +def format_json_response(result): + import json + return json.dumps(result) + +def error_response(message): + return {"error": message} + +def log_request(data): + print(f"Logging request: {data}") + + +# ============================================================================= +# Key Takeaways +# ============================================================================= + +""" +REFACTORING PATTERNS: + +1. **Vague to Specific** + - Before: process(), handle(), manage() + - After: register_user(), process_api_request() + - Use specific verbs that match actions + +2. **Mixed to Separated** + - Before: One function doing read + write + - After: Separate functions for each concern + - Compose with orchestrator if needed + +3. **Surprising to Expected** + - Before: get() that modifies, check() that creates + - After: Honest names that match behavior + - No hidden side effects + +4. **Overgrown to Focused** + - Before: Function grew beyond original name + - After: Each function has single responsibility + - Pipeline approach for multi-step operations + +5. **Generic to Honest** + - Before: Generic 'handle' or 'process' + - After: Either specific or explicitly multi-step + - Name communicates intent clearly + +HARMONIZER AS REFACTORING GUIDE: + +✅ Run Harmonizer before refactoring + - Identifies disharmonious functions + - Prioritizes by severity + +✅ Run Harmonizer after refactoring + - Verify improvements + - Confirm semantic alignment + +✅ Use scores as metrics + - Track improvement over time + - Set team standards (e.g., < 0.5) + +✅ Focus on highest scores first + - Critical (0.8+) → Immediate attention + - High (0.5-0.8) → Next refactoring session + - Low (< 0.5) → Monitor + +BEFORE YOU REFACTOR: + +1. Understand what function actually does +2. Identify semantic contradiction +3. Decide: Rename or refactor? +4. If refactoring: Separate concerns +5. Choose honest, descriptive names +6. Verify with Harmonizer + +MEASURE SUCCESS: + + # Before refactoring + harmonizer myfile.py > before.txt + + # After refactoring + harmonizer myfile.py > after.txt + + # Compare scores - should decrease! + +The goal: Code that says what it means and means what it says! 💛⚓ +""" + +# ============================================================================= +# Run This Example +# ============================================================================= + +if __name__ == "__main__": + print("\n" + "="*70) + print("REFACTORING JOURNEY DEMO") + print("="*70) + print("\nThis file shows before/after refactoring examples.") + print("\nRun: harmonizer examples/refactoring_journey.py") + print("\nNotice:") + print(" - 'BEFORE' functions have higher scores") + print(" - 'AFTER' functions have lower scores") + print(" - Score improvement shows semantic alignment") + print("\nKey insight: Good refactoring reduces semantic distance!") + print("="*70 + "\n") diff --git a/examples/severity_levels.py b/examples/severity_levels.py new file mode 100644 index 0000000..df349b5 --- /dev/null +++ b/examples/severity_levels.py @@ -0,0 +1,705 @@ +""" +Severity Levels: Examples at Each Score Range + +This file demonstrates functions at each severity level: +- EXCELLENT: 0.0 - 0.3 +- LOW: 0.3 - 0.5 +- MEDIUM: 0.5 - 0.8 +- HIGH: 0.8 - 1.0 +- CRITICAL: 1.0+ + +Run: harmonizer examples/severity_levels.py + +You'll see the full spectrum from perfect harmony to critical disharmony. +""" + +# ============================================================================= +# EXCELLENT HARMONY: Score 0.0 - 0.3 +# ============================================================================= +# These functions have perfect or near-perfect semantic alignment. +# Name matches implementation beautifully. +# No action needed - keep this quality! + +print("\n" + "="*70) +print("EXCELLENT HARMONY (0.0 - 0.3)") +print("="*70) + +def create_user_account(username, email): + """ + Expected score: ~0.05 + + Perfect alignment: + - Name: 'create' (Power dimension) + - Actions: 'build', 'save', 'initialize' (Power dimension) + - All operations support user creation + - Semantic harmony ✓ + """ + user_data = build_user_object(username, email) + save_to_database(user_data) + initialize_user_preferences(user_data) + return user_data + +def calculate_total_amount(items): + """ + Expected score: ~0.08 + + Strong alignment: + - Name: 'calculate' (Wisdom dimension) + - Actions: 'sum', 'compute' (Wisdom dimension) + - Pure calculation, no side effects + - Self-documenting code ✓ + """ + return sum(item['price'] for item in items) + +def validate_email_format(email): + """ + Expected score: ~0.10 + + Excellent harmony: + - Name: 'validate' (Wisdom - checking) + - Actions: 'check', 'verify' (Wisdom) + - Returns boolean + - No modifications ✓ + """ + return '@' in email and '.' in email.split('@')[1] + +def delete_temporary_files(directory): + """ + Expected score: ~0.12 + + Clear intent: + - Name: 'delete' (Power - destructive) + - Actions: 'remove', 'clean' (Power) + - Honest about destructive nature + - No surprises ✓ + """ + import os + for file in os.listdir(directory): + if file.endswith('.tmp'): + remove_file(os.path.join(directory, file)) + +def query_user_by_id(user_id): + """ + Expected score: ~0.15 + + Good alignment: + - Name: 'query' (Wisdom - reading) + - Actions: 'fetch', 'retrieve' (Wisdom) + - Read-only operation + - Returns data ✓ + """ + return fetch_from_database('users', user_id) + + +# ============================================================================= +# LOW CONCERN: Score 0.3 - 0.5 +# ============================================================================= +# Slight semantic misalignment, but generally acceptable. +# Consider refactoring if you have time. +# Monitor to prevent drift. + +print("\n" + "="*70) +print("LOW CONCERN (0.3 - 0.5)") +print("="*70) + +def process_payment(payment_data): + """ + Expected score: ~0.35 + + Minor issue: + - Name: 'process' (somewhat vague) + - Actions: 'validate', 'charge', 'record' (mixed operations) + - Works, but 'process' is generic + - Consider: 'execute_payment_transaction' for clarity + """ + validate_payment_data(payment_data) + charge_credit_card(payment_data) + record_transaction(payment_data) + return True + +def get_or_create_session(user_id): + """ + Expected score: ~0.40 + + Acceptable mixed semantics: + - Name explicitly says: 'get' OR 'create' + - Honest about dual purpose + - Developer knows to expect modification + - But: Consider splitting if possible + """ + session = get_existing_session(user_id) + if not session: + session = create_new_session(user_id) + return session + +def update_user_profile(user_id, data): + """ + Expected score: ~0.38 + + Slight vagueness: + - Name: 'update' (clear intent) + - Actions: 'merge', 'save', 'validate' (mostly aligned) + - Minor: validation before update adds slight distance + - Could be: 'validate_and_update_profile' for precision + """ + validate_profile_data(data) + current = get_user_profile(user_id) + merged = merge_profile_data(current, data) + save_profile(user_id, merged) + +def handle_error_gracefully(error): + """ + Expected score: ~0.42 + + Generic but acceptable: + - Name: 'handle' (vague) + - Actions: 'log', 'format', 'return' (specific) + - Works for error handling context + - Consider: 'log_and_format_error' for specificity + """ + log_error_to_file(error) + formatted = format_error_message(error) + return formatted + +def check_and_initialize_config(config_path): + """ + Expected score: ~0.45 + + Borderline acceptable: + - Name: explicitly 'check AND initialize' + - Honest about dual purpose + - But: violates single responsibility + - Better: Separate check_config() and initialize_config() + """ + if not config_exists(config_path): + initialize_default_config(config_path) + return load_config(config_path) + + +# ============================================================================= +# MEDIUM CONCERN: Score 0.5 - 0.8 +# ============================================================================= +# Significant semantic misalignment. +# Should refactor when you can. +# Potential for confusion and bugs. + +print("\n" + "="*70) +print("MEDIUM CONCERN (0.5 - 0.8)") +print("="*70) + +def validate_user_credentials(username, password): + """ + Expected score: ~0.55 + + Problem: + - Name: 'validate' (checking/reading) + - Actions: 'update_last_login' (writing/modifying) + - Side effect hidden in validation + - Should: Separate validate from tracking + """ + is_valid = check_credentials(username, password) + if is_valid: + # ISSUE: Validation shouldn't modify! + update_last_login(username) + return is_valid + +def get_user_preferences(user_id): + """ + Expected score: ~0.60 + + Problem: + - Name: 'get' (read-only expectation) + - Actions: 'create_default_if_missing' (write operation) + - Surprising side effect + - Should: Either truly read-only OR rename to 'ensure_preferences' + """ + prefs = fetch_preferences(user_id) + if not prefs: + # ISSUE: Get shouldn't create! + prefs = create_default_preferences(user_id) + save_preferences(user_id, prefs) + return prefs + +def calculate_and_cache_result(input_data): + """ + Expected score: ~0.58 + + Problem: + - Name says: 'calculate AND cache' (mixed) + - Violates single responsibility + - Hard to test calculation separately + - Should: Separate calculate() from cache management + """ + result = perform_calculation(input_data) + # Mixed: calculation with caching + store_in_cache(input_data, result) + return result + +def process_order(order_data): + """ + Expected score: ~0.65 + + Problem: + - Name: 'process' (too vague) + - Actions: validate, charge, ship, email (many operations) + - God function - does too much + - Should: Break into pipeline with orchestrator + """ + validate_order(order_data) + charge_payment(order_data) + ship_items(order_data) + send_confirmation_email(order_data) + return True + +def load_configuration_file(filepath): + """ + Expected score: ~0.62 + + Problem: + - Name: 'load' (reading) + - Actions: 'parse', 'validate', 'apply_defaults', 'save_back' + - Modifies file when loading! + - Should: Separate load from modification + """ + config = parse_config_file(filepath) + config = apply_default_values(config) + # ISSUE: Load shouldn't save! + save_config_file(filepath, config) + return config + + +# ============================================================================= +# HIGH CONCERN: Score 0.8 - 1.0 +# ============================================================================= +# Severe semantic contradiction. +# Refactor immediately. +# High risk of bugs and confusion. + +print("\n" + "="*70) +print("HIGH CONCERN (0.8 - 1.0)") +print("="*70) + +def read_log_file(logfile_path): + """ + Expected score: ~0.82 + + SEVERE PROBLEM: + - Name: 'read' (non-destructive) + - Actions: 'clear_after_reading' (destructive!) + - CRITICAL: Developer expects safe read + - Reality: File gets cleared! + - FIX: Rename to 'read_and_clear_log' OR remove clearing + """ + with open(logfile_path, 'r') as f: + contents = f.read() + + # VIOLATION: Read shouldn't clear! + clear_log_file(logfile_path) + + return contents + +def check_inventory_level(product_id): + """ + Expected score: ~0.85 + + SEVERE PROBLEM: + - Name: 'check' (read-only) + - Actions: 'reorder_if_low' (write/action) + - Hidden side effect: places orders! + - FIX: Separate check from reordering logic + """ + level = get_inventory_count(product_id) + + # VIOLATION: Check shouldn't trigger reorders! + if level < 10: + trigger_reorder(product_id) + + return level + +def log_transaction(transaction_data): + """ + Expected score: ~0.88 + + SEVERE PROBLEM: + - Name: 'log' (passive recording) + - Actions: 'raise exception' (control flow change) + - Caller expects logging to be safe + - Reality: Can crash the program! + - FIX: Either log OR validate separately + """ + write_to_log(transaction_data) + + # VIOLATION: Log shouldn't raise! + if transaction_data['amount'] > 10000: + raise ValueError("Transaction too large!") + +def filter_active_users(users_list): + """ + Expected score: ~0.90 + + CRITICAL PROBLEM: + - Name: 'filter' (non-destructive selection) + - Actions: 'delete_inactive_from_db' (destructive!) + - Developer expects list filtering + - Reality: Database gets modified! + - FIX: Separate filtering from database operations + """ + active = [] + for user in users_list: + if user['is_active']: + active.append(user) + else: + # VIOLATION: Filter shouldn't delete from DB! + delete_from_database(user['id']) + + return active + +def get_cache_value(cache_key): + """ + Expected score: ~0.87 + + CRITICAL PROBLEM: + - Name: 'get' (read-only) + - Actions: 'fetch_from_api' if missing (network call!) + - Developer expects fast cache lookup + - Reality: Might make slow API call! + - FIX: Rename to 'get_or_fetch_value' or separate concerns + """ + if cache_key in cache: + return cache[cache_key] + + # VIOLATION: Get shouldn't make API calls! + value = fetch_from_external_api(cache_key) + cache[cache_key] = value + + return value + + +# ============================================================================= +# CRITICAL CONCERN: Score 1.0+ +# ============================================================================= +# Extreme semantic contradiction. +# **IMMEDIATE ACTION REQUIRED** +# Very high risk - could cause data loss or security issues. + +print("\n" + "="*70) +print("CRITICAL CONCERN (1.0+)") +print("="*70) + +def validate_password(password): + """ + Expected score: ~1.05 + + CRITICAL EMERGENCY: + - Name: 'validate' (checking) + - Actions: 'delete_user_account' (destructive!) + - Complete semantic opposite + - Developer expects password check + - Reality: DELETES ACCOUNTS! + - FIX IMMEDIATELY: This is a catastrophic bug + """ + if len(password) < 8: + # CATASTROPHIC: Validation shouldn't delete users! + delete_user_account_for_weak_password() + return False + return True + +def get_user_data(user_id): + """ + Expected score: ~1.02 + + CRITICAL EMERGENCY: + - Name: 'get' (read operation) + - Actions: 'delete_user' (destructive operation) + - Semantic opposite: read vs destroy + - DATA LOSS RISK + - FIX IMMEDIATELY: Rename or remove deletion + """ + user = fetch_user(user_id) + + # CATASTROPHIC: Get shouldn't delete! + delete_user_from_database(user_id) + + return user + +def save_preferences(user_id, preferences): + """ + Expected score: ~0.98 + + CRITICAL PROBLEM: + - Name: 'save' (write/persist) + - Actions: 'send_email_notifications' (communication) + - Unexpected external communication + - Privacy/security risk + - FIX IMMEDIATELY: Remove email from save operation + """ + store_preferences(user_id, preferences) + + # VIOLATION: Save shouldn't send emails! + # Privacy issue: saving preferences shouldn't notify others + send_email_to_all_admins(f"User {user_id} changed preferences") + +def query_database(sql_query): + """ + Expected score: ~1.10 + + CRITICAL SECURITY RISK: + - Name: 'query' (read-only SELECT) + - Actions: Actually executes ANY SQL including DROP, DELETE + - Developer expects safe read + - Reality: Can destroy entire database! + - FIX IMMEDIATELY: Validate query type or rename + """ + # CATASTROPHIC: Query should only SELECT! + # This could execute: DROP TABLE users; + execute_raw_sql(sql_query) + + return fetch_results() + +def check_permission(user_id, resource): + """ + Expected score: ~1.15 + + CRITICAL SECURITY RISK: + - Name: 'check' (read-only verification) + - Actions: 'grant_admin_if_missing' (privilege escalation!) + - Security nightmare: checking grants access! + - FIX IMMEDIATELY: Never mix checking with granting + """ + has_permission = verify_user_access(user_id, resource) + + if not has_permission: + # SECURITY VIOLATION: Check shouldn't grant access! + grant_admin_privileges(user_id) + has_permission = True + + return has_permission + + +# ============================================================================= +# Placeholder Functions +# ============================================================================= + +def build_user_object(username, email): + return {"username": username, "email": email} + +def save_to_database(data): + print(f"Saving to database: {data}") + +def initialize_user_preferences(user): + print(f"Initializing preferences for {user['username']}") + +def remove_file(filepath): + print(f"Removing file: {filepath}") + +def fetch_from_database(table, id): + return {"id": id, "data": "sample"} + +def validate_payment_data(data): + return True + +def charge_credit_card(data): + print("Charging credit card") + +def record_transaction(data): + print("Recording transaction") + +def get_existing_session(user_id): + return None + +def create_new_session(user_id): + return {"user_id": user_id, "session_id": "abc123"} + +def validate_profile_data(data): + return True + +def get_user_profile(user_id): + return {"name": "User"} + +def merge_profile_data(current, new): + return {**current, **new} + +def save_profile(user_id, data): + print(f"Saving profile for user {user_id}") + +def log_error_to_file(error): + print(f"Logging error: {error}") + +def format_error_message(error): + return str(error) + +def config_exists(path): + return False + +def initialize_default_config(path): + print(f"Initializing config at {path}") + +def load_config(path): + return {"setting": "value"} + +def check_credentials(username, password): + return True + +def update_last_login(username): + print(f"Updating last login for {username}") + +def fetch_preferences(user_id): + return None + +def create_default_preferences(user_id): + return {"theme": "light"} + +def save_preferences(user_id, prefs): + print(f"Saving preferences for user {user_id}") + +def perform_calculation(data): + return 42 + +def store_in_cache(key, value): + print(f"Caching {key} = {value}") + +def validate_order(data): + print("Validating order") + +def charge_payment(data): + print("Charging payment") + +def ship_items(data): + print("Shipping items") + +def send_confirmation_email(data): + print("Sending confirmation email") + +def parse_config_file(path): + return {} + +def apply_default_values(config): + return config + +def save_config_file(path, config): + print(f"Saving config to {path}") + +def clear_log_file(path): + print(f"CLEARING log file: {path}") + +def get_inventory_count(product_id): + return 5 + +def trigger_reorder(product_id): + print(f"TRIGGERING REORDER for product {product_id}") + +def write_to_log(data): + print(f"Logging: {data}") + +def delete_from_database(id): + print(f"DELETING from database: ID {id}") + +cache = {} + +def fetch_from_external_api(key): + print(f"FETCHING from external API: {key}") + return "api_value" + +def delete_user_account_for_weak_password(): + print("CATASTROPHIC: DELETING USER ACCOUNT!") + +def fetch_user(user_id): + return {"id": user_id, "name": "User"} + +def delete_user_from_database(user_id): + print(f"CATASTROPHIC: DELETING USER {user_id}!") + +def store_preferences(user_id, prefs): + print(f"Storing preferences for {user_id}") + +def send_email_to_all_admins(message): + print(f"SENDING EMAIL TO ADMINS: {message}") + +def execute_raw_sql(query): + print(f"EXECUTING RAW SQL: {query}") + +def fetch_results(): + return [] + +def verify_user_access(user_id, resource): + return False + +def grant_admin_privileges(user_id): + print(f"SECURITY VIOLATION: GRANTING ADMIN TO {user_id}!") + + +# ============================================================================= +# Summary and Guidance +# ============================================================================= + +""" +SEVERITY LEVEL GUIDE: + +📗 EXCELLENT (0.0 - 0.3) + ✓ Perfect semantic alignment + ✓ Self-documenting code + ✓ No action needed + ✓ Example: create_user(), validate_email() + +📘 LOW (0.3 - 0.5) + ⚠ Minor semantic drift + ⚠ Consider refactoring when convenient + ⚠ Monitor to prevent worsening + ⚠ Example: process_payment(), get_or_create() + +📙 MEDIUM (0.5 - 0.8) + ⚠️ Significant misalignment + ⚠️ Should refactor soon + ⚠️ Risk of confusion + ⚠️ Example: validate() that modifies, get() that creates + +📕 HIGH (0.8 - 1.0) + 🚨 Severe contradiction + 🚨 Refactor immediately + 🚨 High bug risk + 🚨 Example: read() that deletes, check() that triggers actions + +📕 CRITICAL (1.0+) + 🔥 EMERGENCY - FIX NOW + 🔥 Semantic opposite + 🔥 Data loss / security risk + 🔥 Example: validate() that deletes accounts, get() that destroys data + +TRIAGE PRIORITY: + +1. Fix CRITICAL immediately (1.0+) - these are emergencies +2. Fix HIGH next (0.8-1.0) - schedule within days +3. Fix MEDIUM when refactoring (0.5-0.8) - next sprint +4. Fix LOW opportunistically (0.3-0.5) - when touching the code +5. Maintain EXCELLENT (0.0-0.3) - use as examples + +TEAM STANDARDS: + +Suggested thresholds: +- Block CI/CD if score > 0.8 (HIGH/CRITICAL) +- Warn on PR if score > 0.5 (MEDIUM+) +- Set goal: all functions < 0.3 (EXCELLENT) + +Run this file: + harmonizer examples/severity_levels.py + +You'll see the full range from excellent to critical! 💛⚓ +""" + +# ============================================================================= +# Run This Example +# ============================================================================= + +if __name__ == "__main__": + print("\n" + "="*70) + print("SEVERITY LEVELS DEMONSTRATION") + print("="*70) + print("\nThis file contains functions at every severity level:") + print("\n 📗 EXCELLENT (0.0-0.3): Perfect alignment") + print(" 📘 LOW (0.3-0.5): Minor issues") + print(" 📙 MEDIUM (0.5-0.8): Significant problems") + print(" 📕 HIGH (0.8-1.0): Severe contradictions") + print(" 🔥 CRITICAL (1.0+): EMERGENCIES") + print("\nRun: harmonizer examples/severity_levels.py") + print("\nUse this as a reference for prioritizing refactoring!") + print("="*70 + "\n")