diff --git a/.claude/settings.local.json.license b/.claude/settings.local.json.license new file mode 100644 index 0000000..e7b6acf --- /dev/null +++ b/.claude/settings.local.json.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b5c226e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 +# +# Set default behavior to automatically normalize line endings +* text=auto + +# Force most text files to always use LF +*.astro text eol=lf +*.css text eol=lf +*.csv text eol=lf +*.cts text eol=lf +*.go text eol=lf +*.html text eol=lf +*.ini text eol=lf +*.js text eol=lf +*.json text eol=lf +*.jsx text eol=lf +*.md text eol=lf +*.mdx text eol=lf +*.mts text eol=lf +*.pkl text eol=lf +*.py text eol=lf +*.rs text eol=lf +*.sh text eol=lf +*.svelte text eol=lf +*.svg text eol=lf +*.toml text eol=lf +*.ts text eol=lf +*.tsx text eol=lf +*.txt text eol=lf +*.xml text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +.git* text eol=LF +LICENSE text eol=lf +LICENSE* text eol=lf + +# Force batch files to use CRLF (Windows scripts) +*.bat text eol=crlf +*.ps* text eol=crlf + +# Treat these as binary (no line ending conversion) +*.aac binary +*.avif binary +*.docx binary +*.eot binary +*.exe binary +*.flac binary +*.ico binary +*.jpg binary +*.m4a binary +*.mp3 binary +*.mp4 binary +*.ogg binary +*.otf binary +*.pdf binary +*.png binary +*.pptx binary +*.tar* binary +*.ttf binary +*.wav binary +*.webm binary +*.webp binary +*.woff binary +*.woff2 binary +*.xlsx binary +*.zip binary diff --git a/.github/actionlint.yml b/.github/actionlint.yml new file mode 100644 index 0000000..2439d3b --- /dev/null +++ b/.github/actionlint.yml @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +# ! Actionlint configuration file to ignore specific warnings +paths: + .github/workflows/cla.yml: + ignore: + - 'property "is_member" is not defined in object type {}' + - > + "github.event.pull_request.title" is potentially untrusted. avoid using it directly in inline scripts. instead, pass it through an environment variable. see https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions for more details + diff --git a/.github/chatmodes/analyze.chatmode.md b/.github/chatmodes/analyze.chatmode.md new file mode 100644 index 0000000..99c27ee --- /dev/null +++ b/.github/chatmodes/analyze.chatmode.md @@ -0,0 +1,10 @@ +--- +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 +description: 'Code Analysis' +tools: ["codebase", "githubRepo", "context7", "sequential-thinking", ] +--- +# Expert Code Analyst + +You're an experienced code analyst who specializes in identifying and resolving issues in codebases. Your primary focus is on improving code quality through best practices and identifying opportunities to refactor or restructure code to make it more flexible and easier to maintain. The user will ask you to research specific code, modules, or packages within the codebase. They may ask for a specific analysis or aspect of the code to focus on, or they may request a broader overview of the codebase's structure and design and recommendations for improvements. If you identify an opportunity for improving the code quality, you should provide actionable suggestions and code examples to help the user implement the improvements. Unless the user requests a different result, you should produce a report summarizing your findings with specific recommendations and references to specific code snippets by line number and filename. diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a12b2e5..fd2516a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,47 @@ -version: 2 -updates: -- package-ecosystem: cargo - directory: "/" - schedule: - interval: daily - time: "08:00" - open-pull-requests-limit: 10 +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +version: 2 +updates: + - package-ecosystem: cargo + dependency-type: all + update-types: + - version-update:semver-minor + - version-update:semver-patch + versioning-strategy: increase + directory: "/" + schedule: + interval: daily + time: "23:00" + open-pull-requests-limit: 10 + commit-message: + prefix: "deps" + assignees: + - codegen-sh[bot] + reviewers: + - codegen-sh[bot] + labels: + - "dependencies" + rebase-strategy: auto + - package-ecosystem: github-actions + dependency-type: all + update-types: + - version-update:semver-major + - version-update:semver-minor + - version-update:semver-patch + versioning-strategy: increase + directory: "/.github/workflows" + schedule: + interval: daily + time: "23:00" + open-pull-requests-limit: 10 + commit-message: + prefix: "deps" + assignees: + - codegen-sh[bot] + reviewers: + - codegen-sh[bot] + labels: + - "dependencies" + rebase-strategy: auto diff --git a/.github/dontusefornow.md b/.github/dontusefornow.md new file mode 100644 index 0000000..31f5d8e --- /dev/null +++ b/.github/dontusefornow.md @@ -0,0 +1,148 @@ + + +# Copilot Instructions for Thread + +## Project Overview + +Thread is a Rust code analysis engine that provides intelligent context for AI assistants. The project is transitioning from vendored ast-grep CLI code to a multi-environment service architecture supporting CLI, cloud, WASM, and CI/CD deployments. + +## Architecture Guidelines + +### Service Layer Pattern +The codebase follows a service abstraction pattern to support multiple environments: + +```rust +// Pure service functions (environment-agnostic) +pub async fn scan_with_services( + file_discovery: Arc, + config_service: Arc, + options: ScanOptions, +) -> Result +``` + +### Crate Organization + +- **ag-thread/**: Vendored ast-grep modules being refactored for service layers +- **thread-core/**: Core traits, types, and errors (pure abstractions) +- **thread-engine/**: Main analysis implementation using petgraph +- **thread-parse/**: AST-grep integration and language detection +- **Service-ready crates**: `ag-core`, `ag-search`, `ag-fix`, `ag-types`, `ag-label` +- **Needs refactoring**: `ag-scan`, `ag-utils`, `ag-check-rule` (heavy CLI dependencies) + +## Development Commands + +Essential commands for this workspace: + +```bash +# Build all crates (except WASM) +mise run build +mise run b + +# WASM builds +mise run build-wasm # Development (single-threaded) +mise run build-wasm-release # Production optimized + +# Testing and quality +mise run test # Tests with cargo nextest +mise run lint # Full linting via hk run check +mise run ci # All CI checks +``` + +## Key Patterns to Follow + +### 1. Service Trait Definitions +When creating new services, follow the pattern from `ag-types`: + +```rust +#[async_trait] +pub trait YourService: Send + Sync { + async fn your_method(&self, input: &str) -> Result; +} +``` + +### 2. Environment-Agnostic Core Functions +Avoid CLI dependencies in core logic: + +```rust +// βœ… Good: Pure function with injected services +pub async fn analyze_with_services( + content: String, + services: &ServiceRegistry +) -> Result + +// ❌ Avoid: Direct filesystem or terminal access +pub fn analyze_files(paths: Vec) -> Result<()> +``` + +### 3. Multi-Environment Support +Structure implementations for different environments: + +```rust +// CLI implementation +impl YourService for CliYourService { /* uses std::fs */ } + +// Cloud implementation +impl YourService for CloudYourService { /* uses S3/HTTP */ } + +// WASM implementation +impl YourService for WasmYourService { /* uses fetch API */ } +``` + +## CLI Dependencies Analysis Status + +Refer to individual `CLI_DEPENDENCIES.md` files in each ag-thread crate: + +- **Immediate attention needed**: `ag-scan/`, `ag-utils/` (heavy CLI dependencies) +- **Service-ready**: `ag-core/`, `ag-search/`, `ag-fix/`, `ag-types/`, `ag-label/` +- **Minor refactoring**: `ag-rule/`, `ag-check-rule/` + +## Critical Abstractions + +### File Operations +Replace direct filesystem access with service traits: +```rust +// Instead of std::fs::read_to_string +let content = file_service.read_file(path).await?; +``` + +### Terminal I/O +Replace direct terminal access with service traits: +```rust +// Instead of println! or crossterm +output_service.write(&format!("Result: {}", result)).await?; +``` + +### Configuration Loading +Replace direct file config loading: +```rust +// Instead of reading YAML files directly +let rules = config_service.load_rules(ConfigSource::Path(path)).await?; +``` + +## Testing Strategy + +- Use `cargo nextest -j 1` for parallel tests with race condition prevention +- Mock service implementations for unit tests +- Environment-specific integration tests for each service implementation +- `RUST_BACKTRACE=1` enabled for debugging + +## WASM Considerations + +- Default builds are single-threaded for Cloudflare Workers compatibility +- Core logic separated from filesystem operations for WASM portability +- Multi-threaded builds available for browser environments (`--multi-threading`) + +When working with WASM targets, ensure no direct filesystem or process dependencies in core libraries. + +## Current Development Focus + +**Week 1 Sprint**: Establishing service layer foundations +- Refactoring `ag-scan` to use service abstractions +- Creating `ag-services` crate with core trait definitions +- Implementing CLI service adapters to maintain current functionality + +The goal is to enable Thread to analyze code and provide AI-friendly context across all deployment environments while maintaining the performance and functionality of the original ast-grep implementation. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0648fcf --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +# ! GitHub Action to run the CI pipeline for Rust projects +# ! This action is triggered on pushes and pull requests to the main and staging branches. +name: CI +on: + push: + branches: [main, staging] + pull_request: + branches: [main, staging] +env: + CARGO_TERM_COLOR: always +jobs: + test: + name: Test Suite + runs-on: ubuntu-latest + strategy: + matrix: + rust: + - stable + - beta + - nightly + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + components: rustfmt, clippy + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo index + uses: actions/cache@v4 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo build + uses: actions/cache@v4 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} + - name: Set up mise + run: | + chmod -R +x ./scripts + ./install-mise.sh + MISE="$HOME/.local/bin/mise" + echo \"eval "$($MISE activate bash)"\" >> "$HOME/.bashrc" + source "$HOME/.bashrc" + $MISE run install + - name: Run hk ci workflow + run: > + "$HOME/.local/bin/mise" run ci + + security_audit: + name: Security Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: rustsec/audit-check@v1.4.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + coverage: + name: Code Coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + components: llvm-tools-preview + - name: Install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov + - name: Generate code coverage + run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: lcov.info + fail_ci_if_error: true diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index d634c1c..f258c08 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -1,43 +1,180 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +# ! GitHub Action to check CLA signatures for Knitli repositories +# ! This action is triggered on issue comments and pull request events. name: "CLA Assistant" on: issue_comment: types: [created] pull_request_target: - types: [opened,closed,synchronize] - + types: [opened, closed, synchronize] permissions: actions: write contents: write # this can be 'read' if the signatures are in remote repository pull-requests: write statuses: write - jobs: - CLAAssistant: + set-pr-title: + runs-on: ubuntu-latest + if: > + github.event_name == 'pull_request_target' + + steps: + - name: "Set PR Title" + env: + EV_TITLE: ${{ github.event.pull_request.title }} + EV_BODY: ${{ github.event.pull_request.body }} + run: | + echo "${EV_TITLE}" + set-issue-title: runs-on: ubuntu-latest + if: > + github.event_name == 'issue_comment' + steps: + - name: "Set Issue Title" + env: + EV_TITLE: ${{ toJson(github.event.issue.title) }} + EV_BODY: ${{ toJson(github.event.comment.body) }} + run: | + echo "${EV_TITLE}" + check-cla: + runs-on: ubuntu-latest + steps: + - name: "SetVariables" + run: | + # shellcheck disable=SC2296 + # This script sets up environment variables based on the GitHub event context. + echo "Setting up variables..." + repo="${{ github.repository }}" + if [[ $repo != knitli* ]]; then + echo "This action is only for Knitli repositories, exiting..." + echo "looks like we're in a forked repository, exiting..." + exit 0 + fi + actor="${{ github.actor }}" + echo "EV_ACTOR=$actor" >> "$GITHUB_ENV" + event="${{ github.event_name }}" + event="${event//_target/}" + event="${event//_comment/}" + if [[ $event == pull_request* ]]; then + author="${{ github.event.pull_request.user.login }}" + email="${{ github.event.pull_request.user.email }}" + { + echo "IS_PR=true"; + echo "IS_ISSUE=false"; + echo "EV_NUMBER=\"${{ github.event.pull_request.number }}\""; + echo "EV_AUTHOR=\"$author\""; + echo "EV_URL=\"${{ github.event.pull_request.html_url }}\""; + echo "EV_EMAIL=\"$email\""; + echo "IS_RECHECK=false"; + } >> "$GITHUB_ENV" + else + author="${{ github.event.issue.user.login }}" + email="${{ github.event.issue.user.email }}" + { + echo "IS_PR=false"; + echo "IS_ISSUE=true"; + echo "EV_NUMBER=\"${{ github.event.issue.number }}\""; + echo "EV_AUTHOR=\"$author\""; + echo "EV_URL=\"${{ github.event.issue.html_url }}\""; + echo "EV_EMAIL=\"$email\""; + } >> "$GITHUB_ENV" + if [[ "$EV_BODY" == 'recheck' || "$EV_BODY" == *'I read the contributors license agreement and I agree to it.'* ]]; then + echo "IS_RECHECK=true" >> "$GITHUB_ENV" + else + echo "IS_RECHECK=false" >> "$GITHUB_ENV" + fi + fi + # if it's a rerun of the action, then the author is the actor + if [[ -z $author ]] || [[ $author != "$actor" ]]; then + author="$actor" + if [[ -z $email ]]; then + email="${author}@users.noreply.github.com" + fi + echo "EV_AUTHOR=$author" >> "$GITHUB_ENV" + echo "EV_EMAIL=$email" >> "$GITHUB_ENV" + fi + response=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer \"${{ secrets.GITHUB_TOKEN }}\"" \ + "https://api.github.com/orgs/knitli/members/$author") + if [ "$response" == "204" ]; then + echo "is_member=true" >> "$GITHUB_OUTPUT" + echo "User $author is a member of Knitli." + echo "MEMBER=true" >> "$GITHUB_ENV" + else + if [[ $email == *@knit.li || $email == *@knitli.com || $author == bashandbone ]]; then + echo "MEMBER=true" >> "$GITHUB_ENV" + echo "User $author has a Knitli email or is its founder. Provided email: $email" + echo "is_member=true" >> "$GITHUB_OUTPUT" + else + echo "MEMBER=false" >> "$GITHUB_ENV" + echo "is_member=false" >> "$GITHUB_OUTPUT" + fi + fi + cla-assistant: + needs: check-cla + if: > + (needs.check-cla.outputs.is_member && needs.check-cla.outputs.is_member == 'false' && needs.check-cla.outputs.is_member != 'true') || needs.check-cla.outputs.is_member == '' + + runs-on: ubuntu-latest + steps: + - name: Debug + run: | + if [[ $DEBUG_ACTIONS == 'true' ]]; then + printenv + fi - name: "CLA Assistant" if: > - (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the contributors license agreement and I agree to it.') || github.event_name == 'pull_request_target' + (env.IS_RECHECK && env.IS_PR) && (env.IS_RECHECK == 'true' || env.IS_PR == 'true') + uses: contributor-assistant/github-action@v2.6.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # the below token should have repo scope and must be manually added by you in the repository's secret - # This token is required only if you have configured to store the signatures in a remote repository/organization - # PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} with: path-to-signatures: 'cla.json' - path-to-document: 'https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md' # e.g. a CLA or a DCO document - # branch should not be protected - branch: 'main' - allowlist: bashandbone,codegen-sh[bot],dependabot[bot],github-actions[bot],actions-user,changeset-bot + path-to-document: 'https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md' + branch: 'staging' + allowlist: > + bashandbone,codegen-sh[bot],dependabot[bot],github-actions[bot],actions-user,changeset-bot - # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken - #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) - #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) create-file-commit-message: 'Adding file for tracking CLA signatures' - signed-commit-message: '$contributorName signed πŸ–ŠοΈ the Thread 🧡 CLA in $owner/$repo#$pullRequestNo' - custom-notsigned-prcomment: 'βœ‹ Hey $contributorName, πŸ›‘ thanks for your contribution! Before we can accept it, **we need you to sign our contributors license agreement (CLA)**. πŸ–ŠοΈ' - custom-pr-sign-comment: 'I have read the [Thread contributors license agreement](https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md) and I agree to it.' - custom-allsigned-prcomment: 'πŸš€ GOOD TO GO. Everyone has agreed to the CLA. πŸ‘' + signed-commit-message: > + $env.EV_AUTHOR signed πŸ–ŠοΈ the Thread 🧡 CLA in [$env.GITHUB_REPOSITORY # $env.EV_NUMBER]($env.EV_URL) + + custom-notsigned-prcomment: | + βœ‹πŸ›‘ Hey $env.EV_AUTHOR, + + ## Thanks for your contribution to Thread! + + ### You need to agree to the CLA first... πŸ–ŠοΈ + + Before we can accept your (awesome) contribution, **we need you to agree to our contributors license agreement (CLA)**. πŸ–ŠοΈ + + ### To agree to the CLA, please comment: + > I read the contributors license agreement and I agree to it. + Those words are important[^1], so please don't change them. πŸ˜‰ + + [^1]: Our bot needs those *exact* words to recognize that you agree to the CLA. If you want to add something else, please do so after those words. πŸ˜‰ + custom-pr-sign-comment: | + $env.EV_AUTHOR, agrees to the Thread CLA. + + $env.EV_AUTHOR acknowledges they read and agree to the [Thread contributors license agreement](https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md). + custom-allsigned-prcomment: | + ## πŸš€ GOOD TO GO. Everyone has agreed to the CLA. πŸ‘ + + ### Thanks for your contribution to Thread! 🧡 + Your contribution is now ready to be merged[^1]. πŸŽ‰ + + ### Maintainers: Ship this PR! πŸ“¦πŸš€ + + [^1]: If it passes the other CI checks, of course. πŸ˜‰ I'm just here for the legal stuff. + # UNUSED OPTIONS #lock-pullrequest-aftermerge: false - if you don't want this bot to automatically lock the pull request after merging (default - true) #use-dco-flag: true - If you are using DCO instead of CLA + #TODO: move the signatures to a remote repository + #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) + #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) + # PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} diff --git a/.gitignore b/.gitignore index 785d15b..a888866 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + .vscode/* !.vscode/settings.json !.vscode/tasks.json @@ -5,7 +10,18 @@ !.vscode/extensions.json !.vscode/*.code-snippets !*.code-workspace -!.vscode/mcp.json + +# MCP -- we keep project level mcp in root .mcp.json +.roo/mcp.json +.roo/mcp.json.license +.vscode/mcp.json +.vscode/mcp.json.license +.claude/settings.local.json.license + +**/target/ +**/dist/ +**/node_modules/ +**/pkg/ # Built Visual Studio Code Extensions *.vsix @@ -15,7 +31,8 @@ # Generated by Cargo # will have compiled files and executables debug -target +target/ +dist/ # These are backup files generated by rustfmt **/*.rs.bk @@ -197,6 +214,9 @@ tags *~ +crates/rule-engine/serialization_analysis/serialization_analysis +!crates/rule-engine/serialization_analysis/serialization_analysis.rs + # temporary files which can be created if a process still has a handle open of a deleted file .fuse_hidden* @@ -217,3 +237,26 @@ secring.* .cursorignore .cursorindexingignore +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +# Generated by cargo mutants +# Contains mutation testing data +**/mutants.out*/ + +# RustRover +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +.vendored_research/ diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..0c39555 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,31 @@ +{ + "mcpServers": { + "filesystem": { + "type": "stdio", + "command": "npx", + "args": [ + "-y", + "@modelcontextprotocol/server-filesystem", + "~/thread", + "~/.cargo" + ], + "env": {} + }, + "context7": { + "args": [ + "--bun", + "@upstash/context7-mcp@latest" + ], + "command": "bunx", + "type": "stdio" + }, + "sequential-thinking": { + "args": [ + "-y", + "@modelcontextprotocol/server-sequential-thinking" + ], + "command": "npx", + "type": "stdio" + } + } +} diff --git a/.mcp.json.license b/.mcp.json.license new file mode 100644 index 0000000..e7b6acf --- /dev/null +++ b/.mcp.json.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/.roo/mcp.json.license b/.roo/mcp.json.license new file mode 100644 index 0000000..e7b6acf --- /dev/null +++ b/.roo/mcp.json.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d0c9e00 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "git.enabled": true, + "chat.mcp.serverSampling": { + "thread/.vscode/mcp.json: browser-debug-tools": { + "allowedModels": [] + } + } +} diff --git a/.yamlfmt.yml b/.yamlfmt.yml index 4dc13cc..66189c9 100644 --- a/.yamlfmt.yml +++ b/.yamlfmt.yml @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 line_ending: lf gitignore_excludes: true format: diff --git a/CLAUDE.md b/CLAUDE.md index e05cdbb..ea4ea04 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,3 +1,10 @@ + + # CLAUDE.md This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. @@ -12,7 +19,7 @@ Thread is a Rust code analysis engine designed to generate intelligent context f Thread follows a single-representation approach: -``` +```plaintext File β†’ ast-grep (parsing) β†’ petgraph (analysis) β†’ Content store (dedup) β†’ API ``` @@ -41,6 +48,7 @@ The workspace follows Rust conventions with core types separated from implementa ### Design Rationale This structure follows the pattern used by `serde` (core traits) vs `serde_json` (implementation): + - `thread-core` defines `LanguageParser` trait, `CodeElement` types, `Result` types - `thread-engine` implements the actual analysis logic and graph building - Other crates can depend on `thread-core` for types without pulling in the full engine @@ -48,24 +56,28 @@ This structure follows the pattern used by `serde` (core traits) vs `serde_json` ## Development Commands ### Build Commands + - `mise run build` or `mise run b` - Build all crates (except WASM) - `mise run build-release` or `mise run br` - Release build - `mise run build-wasm` or `mise run bw` - Build WASM for development (single-threaded) - `mise run build-wasm-release` or `mise run bwr` - Build WASM for production ### WASM Build Options + - `cargo run -p xtask build-wasm` - Basic WASM build - `cargo run -p xtask build-wasm --multi-threading` - Multi-threaded for browsers - `cargo run -p xtask build-wasm --release` - Production optimized - `cargo run -p xtask build-wasm --profiling` - With profiling enabled ### Testing and Quality + - `mise run test` or `mise run t` - Run tests with `cargo nextest` - `mise run lint` or `mise run c` - Full linting via `hk run check` - `mise run fix` or `mise run f` - Auto-fix formatting and linting - `mise run ci` - Run all CI checks (build + lint + test) ### Development Setup + - `mise run install` - Install dev tools and git hooks - `mise run update` - Update all dev tools - `mise run clean` - Clean build artifacts and caches @@ -73,6 +85,7 @@ This structure follows the pattern used by `serde` (core traits) vs `serde_json` ## Implementation Plan Context ### Current Sprint (Week 1) + - **Day 1**: βœ… Project cleanup and setup - **Day 2**: πŸ”„ Basic ast-grep integration (current focus) - **Day 3**: Petgraph integration @@ -82,14 +95,18 @@ This structure follows the pattern used by `serde` (core traits) vs `serde_json` - **Day 7**: Week 1 demo and testing ### Near-term Goals + The immediate target is a working `analyze_rust_file()` function that: + 1. Parses Rust code with ast-grep 2. Extracts functions, calls, and imports 3. Builds a petgraph representation 4. Provides basic graph queries ### MVP Definition + A CLI tool that can analyze Rust files and generate AI-friendly context showing: + - Function definitions with line numbers - Call relationships (what calls what) - Import dependencies @@ -98,12 +115,14 @@ A CLI tool that can analyze Rust files and generate AI-friendly context showing: ## Key Design Decisions ### What to Skip for MVP + - ❌ type-sitter (build complexity) - ❌ tree-sitter-graph (memory management complexity) - ❌ ropey (incremental editing - add later) - ❌ Multi-language support initially (Rust first) ### What to Keep + - βœ… ast-grep (mature parsing with language detection) - βœ… petgraph (single source of truth) - βœ… Content-addressable storage (essential for deduplication) @@ -128,9 +147,10 @@ A CLI tool that can analyze Rust files and generate AI-friendly context showing: When an AI asks: "How does the `parse` function work in Thread?" Thread should provide: + 1. **Function location**: Exact file and line numbers 2. **Dependencies**: What functions `parse` calls 3. **Usage**: What functions call `parse` 4. **Context**: Related code snippets with line numbers -This enables AI assistants to get precisely the context they need without dumping entire files. \ No newline at end of file +This enables AI assistants to get precisely the context they need without dumping entire files. diff --git a/CONTRIBUTORS_LICENSE_AGREEMENT.md b/CONTRIBUTORS_LICENSE_AGREEMENT.md index 8579800..d3299ec 100644 --- a/CONTRIBUTORS_LICENSE_AGREEMENT.md +++ b/CONTRIBUTORS_LICENSE_AGREEMENT.md @@ -1,3 +1,10 @@ + + # Thread Contributor License Agreement (CLA) ## Welcome to Thread! 🧡πŸͺ‘ @@ -21,7 +28,7 @@ By submitting code, docs, or anything else to Thread, you agree: ## Standard Legal Stuff πŸ‘©β€βš–οΈ - **As is:** You’re contributing your work β€œas is” with no promises or guarantees. We’re not responsible for bugs, legal issues, or breakage. -- **You keep your rights:** You can use, share, license, or remix *your own* work anywhere, any way you want (yes, even wallpaper). +- **You keep your rights:** You can use, share, license, or remix *your own* work anywhere, any way you want (yes, even as wallpaper). - **No Take-Backs:** Once you contribute, you can’t undo or take back your permission. --- @@ -43,5 +50,5 @@ Clarity is our mission. If anything’s fuzzy, ask us before you hit β€œsubmit. --- -[^1]: Knitli is the company behind Thread. We’re a Delaware corporation based in the U.S., and this agreement is made under Delaware law. Delaware: where corporations are born πŸ‘ΆπŸΌπŸ§’πŸ’. +[^1]: Knitli is the company behind Thread. We’re a Delaware corporation based in the U.S., and Delaware law governs this agreement. Delaware: where corporations are born πŸ‘ΆπŸΌπŸ§’πŸ’. [^2]: β€œShare” includes distributing your contribution. We can include it in anything we make or sell, anywhere and any way we want. diff --git a/Cargo.lock b/Cargo.lock index 8e2e470..3b2c828 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + [[package]] name = "anyhow" version = "1.0.98" @@ -18,37 +30,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] -name = "ast-grep-core" -version = "0.38.6" +name = "ast-grep-config" +version = "0.38.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79f9731e95d381ae2d8ee1244d21bbf3982840435341ab40260896a76e7e9763" +checksum = "17fc7253f4513826011e9f6971a72af19614448d4e37d356de7b036ec92c38d5" dependencies = [ + "ast-grep-core", "bit-set", + "globset", "regex", + "schemars 0.8.22", + "serde", + "serde_yaml", "thiserror", - "tree-sitter", ] [[package]] -name = "ast-grep-dynamic" -version = "0.38.6" +name = "ast-grep-core" +version = "0.38.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7ff88b95e38a49522827c9a94835d427fc29b41397fa3cca1b8b627a2a55c0" +checksum = "a807991d95797b16ed5bf7431be8d1890dccb8c1b2e560c1f0d983a4f125405d" dependencies = [ - "ast-grep-core", - "ignore", - "libloading", - "serde", - "target-triple", + "bit-set", + "regex", "thiserror", "tree-sitter", ] [[package]] name = "ast-grep-language" -version = "0.38.6" +version = "0.38.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e32f2833a0fbfdb66e8fbb4de26c89384ab3d6616d2cf00ce3db1fd4de08c5e" +checksum = "6c11c57842b8103eeb4133b4f3423915e13be161e794b44f022b092d034c70aa" dependencies = [ "ast-grep-core", "ignore", @@ -65,7 +78,7 @@ dependencies = [ "tree-sitter-html", "tree-sitter-java", "tree-sitter-javascript", - "tree-sitter-json", + "tree-sitter-json 0.23.0", "tree-sitter-kotlin-sg", "tree-sitter-lua", "tree-sitter-php", @@ -78,6 +91,12 @@ dependencies = [ "tree-sitter-yaml", ] +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bit-set" version = "0.8.0" @@ -93,6 +112,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + [[package]] name = "bstr" version = "1.12.0" @@ -109,11 +134,17 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" -version = "1.2.29" +version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ "shlex", ] @@ -124,6 +155,58 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + [[package]] name = "console_error_panic_hook" version = "0.1.7" @@ -134,6 +217,39 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "criterion" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "itertools 0.13.0", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -160,15 +276,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] -name = "dlmalloc" -version = "0.2.9" +name = "crunchy" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01597dde41c0b9da50d5f8c219023d63d8f27f39a27095070fd191fddc83891" -dependencies = [ - "cfg-if", - "libc", - "windows-sys", -] +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "dyn-clone" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" [[package]] name = "either" @@ -183,10 +300,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "fixedbitset" -version = "0.4.2" +name = "getrandom" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] [[package]] name = "globset" @@ -201,6 +324,16 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "half" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.15.4" @@ -233,6 +366,24 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -256,13 +407,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] -name = "libloading" -version = "0.8.8" +name = "libyml" +version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" +checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" dependencies = [ - "cfg-if", - "windows-targets", + "anyhow", + "version_check", ] [[package]] @@ -287,6 +438,15 @@ dependencies = [ "walkdir", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -294,14 +454,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] -name = "petgraph" -version = "0.6.5" +name = "oorandom" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset", - "indexmap", -] +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pico-args" @@ -309,6 +471,43 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -327,6 +526,50 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rapidhash" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9813f789f95ee4fe6b4d01834404d7cccacbc3f6c029343af910b3c2835eb9f1" +dependencies = [ + "rand", +] + [[package]] name = "rayon" version = "1.10.0" @@ -349,6 +592,26 @@ dependencies = [ "wasm_sync", ] +[[package]] +name = "ref-cast" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" version = "1.11.1" @@ -399,6 +662,55 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive 0.8.22", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive 1.0.4", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + +[[package]] +name = "schemars_derive" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "serde" version = "1.0.219" @@ -419,6 +731,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.140" @@ -432,12 +755,50 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "serde_yml" +version = "0.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" +dependencies = [ + "indexmap", + "itoa", + "libyml", + "memchr", + "ryu", + "serde", + "version_check", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdeez" +version = "2.0.0-dev5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97e4567daf40a565ebc9058e929445e55eef4d254e3deeb936cd6ef586cc6ba9" +dependencies = [ + "cfg-if", + "paste", +] + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -455,12 +816,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "target-triple" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790" - [[package]] name = "thiserror" version = "2.0.12" @@ -482,80 +837,133 @@ dependencies = [ ] [[package]] -name = "thread" -version = "0.0.1" +name = "thread-ast-engine" +version = "0.1.0" dependencies = [ + "bit-set", "cc", + "criterion", + "regex", + "thiserror", + "thread-language", + "thread-utils", + "tree-sitter", + "tree-sitter-typescript", ] [[package]] -name = "thread-cli" -version = "0.0.1" +name = "thread-language" +version = "0.1.0" +dependencies = [ + "cc", + "criterion", + "ignore", + "serde", + "thread-ast-engine", + "thread-utils", + "tree-sitter", + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-css", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json 0.24.8", + "tree-sitter-kotlin-sg", + "tree-sitter-lua", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-swift", + "tree-sitter-typescript", + "tree-sitter-yaml", +] [[package]] -name = "thread-core" -version = "0.0.1" +name = "thread-rule-engine" +version = "0.1.0" dependencies = [ + "ast-grep-config", "ast-grep-core", - "ast-grep-dynamic", "ast-grep-language", + "bit-set", + "cc", + "criterion", + "globset", + "regex", + "schemars 1.0.4", + "serde", + "serde_json", + "serde_yml", + "thiserror", + "thread-ast-engine", + "thread-language", + "thread-utils", + "tree-sitter", + "tree-sitter-javascript", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", ] [[package]] -name = "thread-diff" +name = "thread-services" version = "0.1.0" - -[[package]] -name = "thread-engine" -version = "0.0.1" dependencies = [ - "anyhow", - "petgraph", - "rayon", "serde", "thiserror", - "thread-core", - "thread-parser", - "thread-store", + "thread-ast-engine", + "thread-language", + "thread-rule-engine", + "thread-utils", ] [[package]] -name = "thread-fs" -version = "0.1.0" - -[[package]] -name = "thread-parser" +name = "thread-utils" version = "0.0.1" dependencies = [ - "ast-grep-core", - "ast-grep-dynamic", - "ast-grep-language", + "memchr", + "rapidhash", + "simdeez", ] -[[package]] -name = "thread-store" -version = "0.1.0" - [[package]] name = "thread-wasm" version = "0.0.1" dependencies = [ "console_error_panic_hook", - "dlmalloc", "js-sys", "rayon", "serde", - "thread-core", + "thread-language", + "thread-utils", "wasm-bindgen", "wasm-bindgen-test", "web-sys", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tree-sitter" -version = "0.25.6" +version = "0.25.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0" +checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" dependencies = [ "cc", "regex", @@ -685,6 +1093,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-json" +version = "0.24.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d727acca406c0020cffc6cf35516764f36c8e3dc4408e5ebe2cb35a947ec471" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-kotlin-sg" version = "0.4.0" @@ -797,6 +1215,18 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -807,6 +1237,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -1005,9 +1444,38 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "xtask" version = "0.1.0" dependencies = [ "pico-args", ] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.lock.license b/Cargo.lock.license new file mode 100644 index 0000000..e7b6acf --- /dev/null +++ b/Cargo.lock.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/Cargo.toml b/Cargo.toml index 1de1082..3867f3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,18 @@ -[package] +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +cargo-features = ["codegen-backend"] + +# ========================================================= +#* THREAD - Workspace +# ========================================================= + +[workspace.package] description = "A safe, fast, flexible code analysis and code parsing library and tool. Built with tree-sitter, ast-grep, and difftastic in Rust." -resolver = "3" edition = "2024" rust-version = "1.85" license = "AGPL-3.0-or-later" -name = "thread" version = "0.0.1" keywords = [ "parsing", @@ -12,7 +20,7 @@ keywords = [ "tree-sitter", "static-analysis", "repository-tools", - "context" + "context", ] categories = ["development-tools", "parser-implementations", "text-processing"] repository = "https://github.com/knitli/thread" @@ -23,37 +31,75 @@ authors = [ "Knitli Inc ", "Adam Poulemanos for Knitli ", ] +include = [ + "README.md", + "LICENSE.md", + "CONTRIBUTORS_LICENSE_AGREEMENT.md", + "CONTRIBUTING.md", + "VENDORED.md", + "src/**", + "CHANGELOG.md", + "sbom.spdx", + "examples/**", + "tests/**", +] [workspace] +resolver = "3" members = [ - "crates/thread-cli", "crates/thread-core", "crates/thread-engine", "crates/thread-diff", "crates/thread-fs", "crates/thread-parse", "crates/thread-store", "crates/thread-wasm" -, "xtask"] + "crates/ast-engine", + "crates/rule-engine", + "crates/services", + "crates/language", + "crates/utils", + "crates/wasm", + "xtask", +] -# Note: difftastic's diffing functionality is vendored internally [workspace.dependencies] -anyhow = "1.0.98" +bit-set = "0.8.0" +cfg-if = "1.0.1" +ignore = "0.4.22" +regex = "1.10" +schemars = "1.0.4" +serde = { version = "1.0.219", features = ["derive"] } thiserror = "2.0.12" +tree-sitter = "0.25.8" + +serde_json = "1.0.140" +serde_yaml = { version = "0.0.12", package = "serde_yml" } -string-interner = "0.19.0" # for fast string interning -serde = { version = "1.0.219", features = ["derive"] } # serialization library +# speed! +rapidhash = "1.4.0" +memchr = { version = "2.7.5", features = ["std"] } +simdeez = "2.0.0-dev5" +rayon = { version = "1.10.0" } -[features] -# default = ["derive", "languages", "parser", "query"] -[build-dependencies] -cc = "*" +thread-ast-engine = { path = "crates/ast-engine", default-features = false } +thread-language = { path = "crates/language", default-features = false } +thread-rule-engine = { path = "crates/rule-engine", default-features = false } +thread-services = { path = "crates/services", default-features = false } +thread-utils = { path = "crates/utils", default-features = false } +thread-wasm = { path = "crates/wasm", default-features = false } [profile.dev] opt-level = 1 lto = false debug = true +incremental = true debug-assertions = true -codegen-units = 256 # More codegen units for faster compilation +codegen-units = 256 # More codegen units for faster compilation + +[profile.dev-debug] +inherits = "dev" +codegen-backend = "cranelift" [profile.release] -lto = true # Link-time optimization +lto = true # Link-time optimization codegen-units = 1 -panic = "abort" # Smaller binary size -opt-level = 3 # Maximum optimization +incremental = false +panic = "abort" # Smaller binary size +opt-level = 3 # Maximum optimization [profile.release-dev] inherits = "release" @@ -66,9 +112,10 @@ codegen-units = 256 [profile.wasm-release] inherits = "release" +incremental = false lto = true strip = true -opt-level = "s" # optimize for size in WASM +opt-level = "s" # optimize for size in WASM # Optimize proc-macros even in debug builds [profile.dev.package."*"] @@ -123,16 +170,3 @@ transmute_undefined_repr = "allow" unnecessary_wraps = "allow" unused_self = "allow" used_underscore_items = "allow" - -[workspace.package] -version = "0.0.1" -edition = "2024" -rust-version = "1.85" -license = "AGPL-3.0-or-later" -repository = "https://github.com/knitli/thread" -documentation = "https://thread.knitli.dev" -homepage = "https://knitli.com" -authors = [ - "Knitli Inc ", - "Adam Poulemanos for Knitli ", -] diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..1b62c0f --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,662 @@ +# GNU AFFERO GENERAL PUBLIC LICENSE + + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/LICENSES/AGPL-3.0-or-later.txt b/LICENSES/AGPL-3.0-or-later.txt new file mode 100644 index 0000000..0c97efd --- /dev/null +++ b/LICENSES/AGPL-3.0-or-later.txt @@ -0,0 +1,235 @@ +GNU AFFERO GENERAL PUBLIC LICENSE +Version 3, 19 November 2007 + +Copyright (C) 2007 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + + Preamble + +The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. + +Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. + +A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. + +The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. + +An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. + +The precise terms and conditions for copying, distribution and modification follow. + + TERMS AND CONDITIONS + +0. Definitions. + +"This License" refers to version 3 of the GNU Affero General Public License. + +"Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based on the Program. + +To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. + +To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. + +An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. + +1. Source Code. +The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. + +A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. + +The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +2. Basic Permissions. +All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. + +3. Protecting Users' Legal Rights From Anti-Circumvention Law. +No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. + +4. Conveying Verbatim Copies. +You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. + +5. Conveying Modified Source Versions. +You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". + + c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. + +6. Conveying Non-Source Forms. +You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: + + a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. + + d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. + +"Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. + +7. Additional Terms. +"Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or authors of the material; or + + e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. + +All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. + +8. Termination. + +You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. + +9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. + +10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. + +An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. + +11. Patents. + +A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. + +In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. + +A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. + +12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. + +13. Remote Network Interaction; Use with the GNU General Public License. + +Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. + +14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. + +15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. + +END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. + +You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt new file mode 100644 index 0000000..137069b --- /dev/null +++ b/LICENSES/Apache-2.0.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt new file mode 100644 index 0000000..d817195 --- /dev/null +++ b/LICENSES/MIT.txt @@ -0,0 +1,18 @@ +MIT License + +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the +following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/PLAN.md b/PLAN.md index 1678bea..9b3a49a 100644 --- a/PLAN.md +++ b/PLAN.md @@ -1,3 +1,10 @@ + + thread/ β”œβ”€β”€ crates/ β”‚ β”œβ”€β”€ thread-core/ # Main engine + petgraph + error types diff --git a/README.md b/README.md index f468bd6..4a6d1cb 100644 --- a/README.md +++ b/README.md @@ -1 +1,44 @@ -such empty + + +# Thread + +[![REUSE status](https://api.reuse.software/badge/git.fsfe.org/reuse/api)](https://api.reuse.software/info/git.fsfe.org/reuse/api) + +## License + +### Thread + +Thread is licensed under the GNU Affero General Public License v3.0 (AGPL-3.0-or-later). You can find the full license text in the [LICENSE](LICENSE.md) file. You can use Thread for free, for personal and commercial use, you can also change the code however you like, but **you must share your changes with the community** under the AGPL 3.0 or later. You must also include the AGPL 3.0 with any copies of Thread you share. Copies must also include the copyright notice. Knitli Inc. is the creator and copyright holder of Thread. + +If you're not familiar with the AGPL 3.0, the important parts are: + +- You can use Thread for free, for personal and commercial use. +- You can change the code however you like. +- You must share your changes with the community under the AGPL 3.0 or later. This includes the source for any changes you make, along with that of any larger work you create that includes Thread. +- If you don't make any changes to Thread, you can use it without sharing your source code. +- You must include the AGPL 3.0 and Knitli's copyright notice with any copies of Thread you share. We recommend using the [SPDX specification](https://spdx.dev/learn/handling-license-info/) + +### Want to use Thread in a closed source project? + +**If you want to use Thread in a closed source project, you can purchase a commercial license from Knitli**. This allows you to use Thread without sharing your source code. Please contact us at [licensing@knit.li](mailto:licensing@knit.li) + +### Other Licenses + +While most of Thread is licensed under the AGPL 3.0, there are some exceptions: + +- Some components were forked from [`Ast-Grep`](https://github.com/ast-grep/ast-grep) and are licensed under the AGPL 3.0 or later *AND* the MIT license. Our changes are AGPL; the original code is MIT. See [`VENDORED.md`](VENDORED.md) for details. +- Unless otherwise noted, documentation and configuration files are licensed under either the MIT license or the Apache License 2.0, your choice. This includes the `README.md`, `CONTRIBUTORS_LICENSE_AGREEMENT.md`, and other similar files. This allows for more flexibility in how these files can be used and shared. +- + +### Contributing + +We love contributions of any kind! By contributing to Thread, you agree to our [Contributor License Agreement (CLA)](CONTRIBUTORS_LICENSE_AGREEMENT.md). This agreement ensures that we can continue to develop and maintain Thread while giving you credit for your contributions. + +#### We Use Reuse + +If you're in doubt, look at the top of the file, or look for a `.license` file with the same name as the file (like `Cargo.lock.license`). We follow the [Reuse Specification](https://reuse.software/) for license information in our codebase, which means every single file should have license information. We also keep a Software Bill of Materials (SBOM) in the repository root: [`sbom.spdx`](sbom.spdx). This file lists all the licenses of the files in the repository, and is generated automatically by our build system. diff --git a/VENDORED.md b/VENDORED.md new file mode 100644 index 0000000..c9c1882 --- /dev/null +++ b/VENDORED.md @@ -0,0 +1,69 @@ + +# Our Fork of Ast-Grep + +We forked most of the excellent [Ast-Grep][AG] codebase to create Thread. We originally tried using Ast-Grep as a library, but ran into limitations. The `core` module is intended to work as a library, but our plans for Thread required finer control over features at build-time. + +While Thread includes a CLI (and that’s likely your first encounter with it), our CLI is just the tip of the iceberg. The real focus is on service-oriented architecture for cloud and automation use. + +**We forked at Ast-Grep v0.38.7**. See [the original repo at that version](https://github.com/ast-grep/ast-grep/tree/0.38.7) for reference. + +--- + +## Why We Forked + +We tried multiple approaches to integrating Ast-Grep, from working with it as a library with a complex feature-gating scheme, to vendoring and dividing four crates into granular components (14 crates!). That latter one was overkill, and was probably us jumping the shark early :shark:⛷️. + +We settled on a middle ground. We forked `core`, `config`, and `language`, and will continue to use `dynamic` and others as dependencies as needed. We also did our best to make as few changes as possible -- mostly focusing on separating features with gating, and abstracting some core elements to better fit our service oriented approach. + +Our changes are mostly structuralβ€”we needed finer-grained control over organization, minimal cold start times, and clean separation between services. + +### Where the Fork Lives + +* [`thread-ast-engine`](https://github.com/knitli/thread/tree/main/crates/ast-engine): Fork of `ast-grep-core`. We separated its features into `parsing`, and `matching` features so that we could better control their usage in our services. +* [`thread-rule-engine`](https://github.com/knitli/thread/tree/main/crates/rule-engine): Fork of `ast-grep-config`. We isolated rule management, parsing, and validation functionality, and made changes to separate the logic from the assumption of a config file, allowing us more flexibility to implement rule-based operations in different environments. +* [`thread-language`](https://github.com/knitli/thread/tree/main/crates/language): We changed very little here, we needed the languages publicly exposed to feature gate each one separately. We also plan to add different languages more suitable for our needs. + +We admittedly didn't have this conversation with the Ast-Grep contributors, which we will once the dust settles a bit and we can divert attention from delivering an MVP. Our changes are intentionally reversible, and we'd like to find a way to return to using the core crates and contributing there (but that may not be realistic with different goals between the projects). + +### Licensing + +**Original Ast-Grep code** is MIT-licensed (see the `LICENSE-MIT` file in each crate). +**Our changes and anything Thread-specific** are licensed under the [AGPL v3.0](https://github.com/knitli/thread/blob/main/LICENSE.md). + +* If you want pure MIT, use Ast-Grep directly, or cherry-pick the original code. The relationships are: + + * `thread-ast-engine` β†’ `ast-grep-core` + * `thread-rule-engine` β†’ `ast-grep-config` + * `thread-language` β†’ `ast-grep-language` + +* Using our fork means AGPL; sharing required. If you want to treat your code based on Thread like :ring: Gollum :ring:, [contact us for a commercial license](mailto:licensing@knit.li), and you can keep your *precious*. +* Our project meets the [Reuse Specification](https://reuse.software/). Every file in the project is marked in its header with license information, or with an accompanying `.license` file. Code from `Ast-Grep` will be marked `AGPL-3.0-or-later AND MIT` (this isn't an `or` where you can choose between them). + +> Technically, you *can* only use the unchanged Ast-Grep bits under MITβ€”but you’d need to do the diffing yourself, and you’ll miss out on Thread-specific improvements (not sure why you would do that instead of just forking Ast-Grep...). AGPL means our changes (and anyone else’s) will always be open source. + +--- + +## We're Going to Contribute to Ast-Grep, too + +Most of Thread's Ast-Grep codebase is unchanged for now, and where we identify bugs or areas for improvement, we'll submit them upstream under Ast-Grep's MIT license. Similarly, we'll monitor changes to Ast-Grep and incorporate fixes and improvements into Thread. + +## So Are You Going to Try to Keep the Changes Minimal Forever? + +Probably not. Our first commitment is making Thread as great as we can, even if we diverge from Ast-Grep. We'd love to see the projects grow together, but they may not always align perfectly. Ast-Grep has its own roadmap and priorities, and we have ours. Thread is not Ast-Grep; it is just built on top of it. + +## Why Ast-Grep? + +Ast-Grep makes [Tree-sitter][ts] actually usable for code search/replace. We built on it because it solved the hard partsβ€”especially CST-wranglingβ€”so we could focus on new stuff, not rebuilding the same wheel.[^1] + +> For reasons lost to time, everyone in this ecosystem calls their [CSTs][csts] β€œASTs.” Maybe it’s like the first rule of Tree-sitter Club: we all pretend they’re ASTs :fist:. + +[^1]: If our initial attempts at integrating Ast-Grep represent how we would reinvent the wheel, we probably would have made our version square and in 15 parts, assembly required. + +[AG]: https://github.com/ast-grep/ast-grep +[ts]: https://github.com/tree-sitter/tree-sitter +[csts]: https://en.wikipedia.org/wiki/Concrete_syntax_tree diff --git a/_typos.toml b/_typos.toml new file mode 100755 index 0000000..0a57fd3 --- /dev/null +++ b/_typos.toml @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +[default] +locale = "en-us" +check-file = true +check-filename = true +extend-ignore-re = [ + "(?s)(#|//)\\s*spellchecker:off.*?\\n\\s*(#|//)\\s*spellchecker:on", +] +extend-ignore-identifiers-re = [ + "iif", + "iife", + "i18n", + "i18next", + "i18n-t", + "i18n-tc", + "strat", + "Inferrable", +] + +[files] +ignore-hidden = false +ignore-files = true +extend-exclude = [ + "/usr/**/*", + "/tmp/**/*", + "/**/node_modules/**", + "node_modules", + "mkdocs-material", + "license-list-data", + "external", + "vendor", + ".git", + "/usr/share/code/**/*", + "eslint.config.mjs", + "docs/manifest.json", + "src/assets/videos/**/*", + "src/assets/fonts/**/*", + "src/assets/images/**/*", +] diff --git a/_unused.toml b/_unused.toml index a6b69fc..6d6a2fd 100644 --- a/_unused.toml +++ b/_unused.toml @@ -1,3 +1,9 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# + +# SPDX-License-Identifier: MIT OR Apache-2.0 + # dashmap = { version = "6.1.0", features = ["rayon", "inline"] } # fmmap = { version = "0.4.0", features = ["tokio"] } # memory map for handling large files efficiently # ignore = { version = "0.4.23", features = ["simd-accel"] } # gitignore diff --git a/crates/ast-engine/Cargo.toml b/crates/ast-engine/Cargo.toml new file mode 100644 index 0000000..eba3db0 --- /dev/null +++ b/crates/ast-engine/Cargo.toml @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 + +[package] +name = "thread-ast-engine" +description = "Core AST engine for Thread - parsing, matching, and transforming code using AST patterns. Forked from ast-grep-core." +keywords = ["ast", "pattern", "codemod", "search", "rewrite"] +categories = ["command-line-utilities", "development-tools", "parsing"] +readme = "README.md" +license = "AGPL-3.0-or-later AND MIT" +version = "0.1.0" +authors = [ + "Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>", + "Knitli Inc ", + "Adam Poulemanos for Knitli ", +] +documentation.workspace = true +edition.workspace = true +homepage.workspace = true +repository.workspace = true +rust-version.workspace = true +include.workspace = true + +[dependencies] +thread-utils = { workspace = true, default-features = false, features = [ + "hashers", + "simd", +] } +thiserror.workspace = true + +# Tree-sitter required for parsing +tree-sitter = { workspace = true, optional = true } +# Bit-set and regex required for pattern matching +bit-set = { workspace = true, optional = true } +regex = { workspace = true, optional = true } + +[features] +default = ["parsing", "matching"] +# The 'parsing' feature enables the tree-sitter backend +parsing = ["dep:tree-sitter"] +# The 'matching' feature enables the pattern matching engine +matching = ["dep:regex", "dep:bit-set"] + +[dev-dependencies] +tree-sitter-typescript = "0.23.2" +thread-language = { workspace = true, features = ["builtin-parser"] } +criterion = { version = "0.6.0", features = ["html_reports"] } + +[[bench]] +name = "performance_improvements" +harness = false + +[build-dependencies] +cc = "1.2.30" + +[lints] +# Enable linting for the crate +workspace = true diff --git a/crates/ast-engine/LICENSE-AGPL-3.0-or-later b/crates/ast-engine/LICENSE-AGPL-3.0-or-later new file mode 100644 index 0000000..1b62c0f --- /dev/null +++ b/crates/ast-engine/LICENSE-AGPL-3.0-or-later @@ -0,0 +1,662 @@ +# GNU AFFERO GENERAL PUBLIC LICENSE + + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/ast-engine/LICENSE-MIT b/crates/ast-engine/LICENSE-MIT new file mode 100644 index 0000000..e3a8a65 --- /dev/null +++ b/crates/ast-engine/LICENSE-MIT @@ -0,0 +1,30 @@ + + +# MIT License + +Copyright (c) 2022 Herrington Darkholme + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +## This crate was created from forked code + +The above license and copyright applies to any code before the fork. Any changes since Ast-Grep v.0.38.7 are separately licensed. + +- See [LICENSE-AGPL-3.0-or-later](LICENSE-AGPL-3.0-or-later) +- For a description of the fork and what it includes, visit the [Thread repo](https://github.com/knitli/thread/tree/main/VENDORED.md) diff --git a/crates/ast-engine/README.md b/crates/ast-engine/README.md new file mode 100644 index 0000000..2e23dca --- /dev/null +++ b/crates/ast-engine/README.md @@ -0,0 +1,6 @@ + diff --git a/crates/ast-engine/VENDORED.md b/crates/ast-engine/VENDORED.md new file mode 100644 index 0000000..c9c1882 --- /dev/null +++ b/crates/ast-engine/VENDORED.md @@ -0,0 +1,69 @@ + +# Our Fork of Ast-Grep + +We forked most of the excellent [Ast-Grep][AG] codebase to create Thread. We originally tried using Ast-Grep as a library, but ran into limitations. The `core` module is intended to work as a library, but our plans for Thread required finer control over features at build-time. + +While Thread includes a CLI (and that’s likely your first encounter with it), our CLI is just the tip of the iceberg. The real focus is on service-oriented architecture for cloud and automation use. + +**We forked at Ast-Grep v0.38.7**. See [the original repo at that version](https://github.com/ast-grep/ast-grep/tree/0.38.7) for reference. + +--- + +## Why We Forked + +We tried multiple approaches to integrating Ast-Grep, from working with it as a library with a complex feature-gating scheme, to vendoring and dividing four crates into granular components (14 crates!). That latter one was overkill, and was probably us jumping the shark early :shark:⛷️. + +We settled on a middle ground. We forked `core`, `config`, and `language`, and will continue to use `dynamic` and others as dependencies as needed. We also did our best to make as few changes as possible -- mostly focusing on separating features with gating, and abstracting some core elements to better fit our service oriented approach. + +Our changes are mostly structuralβ€”we needed finer-grained control over organization, minimal cold start times, and clean separation between services. + +### Where the Fork Lives + +* [`thread-ast-engine`](https://github.com/knitli/thread/tree/main/crates/ast-engine): Fork of `ast-grep-core`. We separated its features into `parsing`, and `matching` features so that we could better control their usage in our services. +* [`thread-rule-engine`](https://github.com/knitli/thread/tree/main/crates/rule-engine): Fork of `ast-grep-config`. We isolated rule management, parsing, and validation functionality, and made changes to separate the logic from the assumption of a config file, allowing us more flexibility to implement rule-based operations in different environments. +* [`thread-language`](https://github.com/knitli/thread/tree/main/crates/language): We changed very little here, we needed the languages publicly exposed to feature gate each one separately. We also plan to add different languages more suitable for our needs. + +We admittedly didn't have this conversation with the Ast-Grep contributors, which we will once the dust settles a bit and we can divert attention from delivering an MVP. Our changes are intentionally reversible, and we'd like to find a way to return to using the core crates and contributing there (but that may not be realistic with different goals between the projects). + +### Licensing + +**Original Ast-Grep code** is MIT-licensed (see the `LICENSE-MIT` file in each crate). +**Our changes and anything Thread-specific** are licensed under the [AGPL v3.0](https://github.com/knitli/thread/blob/main/LICENSE.md). + +* If you want pure MIT, use Ast-Grep directly, or cherry-pick the original code. The relationships are: + + * `thread-ast-engine` β†’ `ast-grep-core` + * `thread-rule-engine` β†’ `ast-grep-config` + * `thread-language` β†’ `ast-grep-language` + +* Using our fork means AGPL; sharing required. If you want to treat your code based on Thread like :ring: Gollum :ring:, [contact us for a commercial license](mailto:licensing@knit.li), and you can keep your *precious*. +* Our project meets the [Reuse Specification](https://reuse.software/). Every file in the project is marked in its header with license information, or with an accompanying `.license` file. Code from `Ast-Grep` will be marked `AGPL-3.0-or-later AND MIT` (this isn't an `or` where you can choose between them). + +> Technically, you *can* only use the unchanged Ast-Grep bits under MITβ€”but you’d need to do the diffing yourself, and you’ll miss out on Thread-specific improvements (not sure why you would do that instead of just forking Ast-Grep...). AGPL means our changes (and anyone else’s) will always be open source. + +--- + +## We're Going to Contribute to Ast-Grep, too + +Most of Thread's Ast-Grep codebase is unchanged for now, and where we identify bugs or areas for improvement, we'll submit them upstream under Ast-Grep's MIT license. Similarly, we'll monitor changes to Ast-Grep and incorporate fixes and improvements into Thread. + +## So Are You Going to Try to Keep the Changes Minimal Forever? + +Probably not. Our first commitment is making Thread as great as we can, even if we diverge from Ast-Grep. We'd love to see the projects grow together, but they may not always align perfectly. Ast-Grep has its own roadmap and priorities, and we have ours. Thread is not Ast-Grep; it is just built on top of it. + +## Why Ast-Grep? + +Ast-Grep makes [Tree-sitter][ts] actually usable for code search/replace. We built on it because it solved the hard partsβ€”especially CST-wranglingβ€”so we could focus on new stuff, not rebuilding the same wheel.[^1] + +> For reasons lost to time, everyone in this ecosystem calls their [CSTs][csts] β€œASTs.” Maybe it’s like the first rule of Tree-sitter Club: we all pretend they’re ASTs :fist:. + +[^1]: If our initial attempts at integrating Ast-Grep represent how we would reinvent the wheel, we probably would have made our version square and in 15 parts, assembly required. + +[AG]: https://github.com/ast-grep/ast-grep +[ts]: https://github.com/tree-sitter/tree-sitter +[csts]: https://en.wikipedia.org/wiki/Concrete_syntax_tree diff --git a/crates/ast-engine/benches/performance_improvements.rs b/crates/ast-engine/benches/performance_improvements.rs new file mode 100644 index 0000000..48f9fe9 --- /dev/null +++ b/crates/ast-engine/benches/performance_improvements.rs @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! Benchmarks for performance improvements in ast-engine crate +//! +//! Run with: cargo bench --package thread-ast-engine + +use criterion::{Criterion, criterion_group, criterion_main}; +use std::hint::black_box; +use thread_ast_engine::{Pattern, Root}; +use thread_language::Tsx; +use thread_utils::RapidMap; + +fn bench_pattern_conversion(c: &mut Criterion) { + let source_code = r#" + function complexFunction(a, b, c) { + if (a > b) { + return c.map(x => x * 2).filter(x => x > 10); + } else { + const result = []; + for (let i = 0; i < c.length; i++) { + if (c[i] % 2 === 0) { + result.push(c[i] * 3); + } + } + return result; + } + } + "#; + + let pattern_str = "function $NAME($$$ARGS) { $$$BODY }"; + + c.bench_function("pattern_conversion_optimized", |b| { + b.iter(|| { + let pattern = Pattern::new(black_box(pattern_str), Tsx); + let root = Root::str(black_box(source_code), Tsx); + let node = root.root(); + let matches: Vec<_> = node.find_all(&pattern).collect(); + black_box(matches.len()) + }) + }); +} + +fn bench_meta_var_env_conversion(c: &mut Criterion) { + let source_code = "const value = 123; const another = 456; const third = 789;"; + let pattern_str = "const $VAR = $VALUE"; + + c.bench_function("meta_var_env_conversion", |b| { + b.iter(|| { + let pattern = Pattern::new(black_box(pattern_str), Tsx); + let root = Root::str(black_box(source_code), Tsx); + let matches: Vec<_> = root.root().find_all(&pattern).collect(); + + // Test the optimized string concatenation + for m in matches { + let env_map = RapidMap::from(m.get_env().clone()); + black_box(env_map); + } + }) + }); +} + +fn bench_pattern_children_collection(c: &mut Criterion) { + let source_code = r#" + class TestClass { + method1() { return 1; } + method2() { return 2; } + method3() { return 3; } + method4() { return 4; } + method5() { return 5; } + } + "#; + + c.bench_function("pattern_children_collection", |b| { + b.iter(|| { + let root = Root::str(black_box(source_code), Tsx); + let pattern = Pattern::new("class $NAME { $$$METHODS }", Tsx); + let matches: Vec<_> = root.root().find_all(&pattern).collect(); + black_box(matches); + }) + }); +} + +criterion_group!( + benches, + bench_pattern_conversion, + bench_meta_var_env_conversion, + bench_pattern_children_collection +); +criterion_main!(benches); diff --git a/crates/ast-engine/src/language.rs b/crates/ast-engine/src/language.rs new file mode 100644 index 0000000..9f415a1 --- /dev/null +++ b/crates/ast-engine/src/language.rs @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::{Pattern, PatternBuilder, PatternError}; +use crate::meta_var::{MetaVariable, extract_meta_var}; +use std::borrow::Cow; +use std::path::Path; + +/// Trait to abstract ts-language usage in ast-grep, which includes: +/// * which character is used for meta variable. +/// * if we need to use other char in meta var for parser at runtime +/// * pre process the Pattern code. +pub trait Language: Clone + 'static { + /// normalize pattern code before matching + /// e.g. remove `expression_statement`, or prefer parsing {} to object over block + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + Cow::Borrowed(query) + } + + /// Configure meta variable special character + /// By default $ is the metavar char, but in PHP it can be # + #[inline] + fn meta_var_char(&self) -> char { + '$' + } + + /// Some language does not accept $ as the leading char for identifiers. + /// We need to change $ to other char at run-time to make parser happy, thus the name expando. + /// By default this is the same as `meta_var` char so replacement is done at runtime. + #[inline] + fn expando_char(&self) -> char { + self.meta_var_char() + } + + /// extract `MetaVariable` from a given source string + /// At runtime we need to use `expand_char` + fn extract_meta_var(&self, source: &str) -> Option { + extract_meta_var(source, self.expando_char()) + } + /// Return the file language from path. Return None if the file type is not supported. + fn from_path>(_path: P) -> Option { + // TODO: throw panic here if not implemented properly? + None + } + + fn kind_to_id(&self, kind: &str) -> u16; + fn field_to_id(&self, field: &str) -> Option; + fn build_pattern(&self, builder: &PatternBuilder) -> Result; +} + +#[cfg(test)] +pub use test::*; + +#[cfg(test)] +mod test { + use super::*; + use crate::tree_sitter::{LanguageExt, StrDoc, TSLanguage}; + + #[derive(Clone, Debug)] + pub struct Tsx; + impl Language for Tsx { + fn kind_to_id(&self, kind: &str) -> u16 { + let ts_lang: TSLanguage = tree_sitter_typescript::LANGUAGE_TSX.into(); + ts_lang.id_for_node_kind(kind, /* named */ true) + } + fn field_to_id(&self, field: &str) -> Option { + self.get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, self.clone())) + } + } + impl LanguageExt for Tsx { + fn get_ts_language(&self) -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TSX.into() + } + } +} diff --git a/crates/ast-engine/src/lib.rs b/crates/ast-engine/src/lib.rs new file mode 100644 index 0000000..3a8ca1c --- /dev/null +++ b/crates/ast-engine/src/lib.rs @@ -0,0 +1,131 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +/*! +This module contains the core engine for Thread. + +It provides APIs for parsing, traversing, searching and replacing tree-sitter nodes. +The functionality is feature-gated to allow for selective compilation: +- `parsing`: Enables tree-sitter parsing backend +- `matching`: Enables pattern matching and replacement capabilities +*/ + +pub mod language; +pub mod source; + +// Core AST functionality (always available) +mod node; +pub use node::{Node, Position}; +pub use source::Doc; +// pub use matcher::types::{MatchStrictness, Pattern, PatternBuilder, PatternError, PatternNode}; + +// Feature-gated modules +#[cfg(feature = "parsing")] +pub mod tree_sitter; + +// Everything but types feature gated behind "matching" in `matchers` +mod matchers; + +#[cfg(feature = "matching")] +mod match_tree; +#[cfg(feature = "matching")] +pub mod matcher; +pub mod meta_var; +#[cfg(feature = "matching")] +pub mod ops; +#[doc(hidden)] +pub mod pinned; +#[cfg(feature = "matching")] +pub mod replacer; + +// Re-exports + +// the bare types with no implementations +#[cfg(not(feature = "matching"))] +pub use matchers::{ + MatchStrictness, Pattern, PatternBuilder, PatternError, PatternNode, matcher::Matcher, +}; + +// implemented types +#[cfg(feature = "matching")] +pub use matcher::{ + MatchAll, MatchNone, Matcher, MatcherExt, NodeMatch, Pattern, PatternBuilder, PatternError, + PatternNode, +}; + +pub use meta_var::MetaVarEnv; + +#[cfg(feature = "matching")] +pub use match_tree::MatchStrictness; + +pub use language::Language; + +pub use node::Root; + +pub type AstGrep = Root; + +#[cfg(all(test, feature = "parsing", feature = "matching"))] +mod test { + use super::*; + use crate::tree_sitter::LanguageExt; + use language::Tsx; + use ops::Op; + + pub type Result = std::result::Result<(), String>; + + #[test] + fn test_replace() -> Result { + let mut ast_grep = Tsx.ast_grep("var a = 1; let b = 2;"); + ast_grep.replace("var $A = $B", "let $A = $B")?; + let source = ast_grep.generate(); + assert_eq!(source, "let a = 1; let b = 2;"); // note the semicolon + Ok(()) + } + + #[test] + fn test_replace_by_rule() -> Result { + let rule = Op::either("let a = 123").or("let b = 456"); + let mut ast_grep = Tsx.ast_grep("let a = 123"); + let replaced = ast_grep.replace(rule, "console.log('it works!')")?; + assert!(replaced); + let source = ast_grep.generate(); + assert_eq!(source, "console.log('it works!')"); + Ok(()) + } + + #[test] + fn test_replace_unnamed_node() -> Result { + // ++ and -- is unnamed node in tree-sitter javascript + let mut ast_grep = Tsx.ast_grep("c++"); + ast_grep.replace("$A++", "$A--")?; + let source = ast_grep.generate(); + assert_eq!(source, "c--"); + Ok(()) + } + + #[test] + fn test_replace_trivia() -> Result { + let mut ast_grep = Tsx.ast_grep("var a = 1 /*haha*/;"); + ast_grep.replace("var $A = $B", "let $A = $B")?; + let source = ast_grep.generate(); + assert_eq!(source, "let a = 1 /*haha*/;"); // semicolon + + let mut ast_grep = Tsx.ast_grep("var a = 1; /*haha*/"); + ast_grep.replace("var $A = $B", "let $A = $B")?; + let source = ast_grep.generate(); + assert_eq!(source, "let a = 1; /*haha*/"); + Ok(()) + } + + #[test] + fn test_replace_trivia_with_skipped() -> Result { + let mut ast_grep = Tsx.ast_grep("return foo(1, 2,) /*haha*/;"); + ast_grep.replace("return foo($A, $B)", "return bar($A, $B)")?; + let source = ast_grep.generate(); + assert_eq!(source, "return bar(1, 2) /*haha*/;"); // semicolon + Ok(()) + } +} diff --git a/crates/ast-engine/src/match_tree/match_node.rs b/crates/ast-engine/src/match_tree/match_node.rs new file mode 100644 index 0000000..f73ea28 --- /dev/null +++ b/crates/ast-engine/src/match_tree/match_node.rs @@ -0,0 +1,325 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::Aggregator; +use super::strictness::MatchOneNode; +use crate::matcher::MatchStrictness; +use crate::matcher::{PatternNode, kind_utils}; +use crate::meta_var::MetaVariable; +use crate::{Doc, Node}; +use std::iter::Peekable; + +pub(super) fn match_node_impl<'tree, D: Doc>( + goal: &PatternNode, + candidate: &Node<'tree, D>, + agg: &mut impl Aggregator<'tree, D>, + strictness: &MatchStrictness, +) -> MatchOneNode { + use PatternNode as P; + match &goal { + // leaf = without named children + P::Terminal { + text, + kind_id, + is_named, + } => match strictness.match_terminal(*is_named, text, *kind_id, candidate) { + MatchOneNode::MatchedBoth => { + if agg.match_terminal(candidate).is_some() { + MatchOneNode::MatchedBoth + } else { + MatchOneNode::NoMatch + } + } + c => c, + }, + P::MetaVar { meta_var, .. } => match agg.match_meta_var(meta_var, candidate) { + Some(()) => MatchOneNode::MatchedBoth, + None => MatchOneNode::NoMatch, // TODO: this may be wrong + }, + P::Internal { + kind_id, children, .. + } if kind_utils::are_kinds_matching(*kind_id, candidate.kind_id()) => { + let cand_children = candidate.children(); + match match_nodes_impl_recursive(children, cand_children, agg, strictness) { + Some(()) => MatchOneNode::MatchedBoth, + None => MatchOneNode::NoMatch, + } + } + _ => MatchOneNode::NoMatch, // TODO + } +} + +fn match_nodes_impl_recursive<'tree, D: Doc>( + goals: &[PatternNode], + candidates: impl Iterator>, + agg: &mut impl Aggregator<'tree, D>, + strictness: &MatchStrictness, +) -> Option<()> { + let mut goal_children = goals.iter().peekable(); + let mut cand_children = candidates.peekable(); + cand_children.peek()?; + loop { + match may_match_ellipsis_impl(&mut goal_children, &mut cand_children, agg, strictness)? { + ControlFlow::Return => return Some(()), + ControlFlow::Continue => continue, + ControlFlow::Fallthrough => (), + } + match match_single_node_while_skip_trivial( + &mut goal_children, + &mut cand_children, + agg, + strictness, + )? { + ControlFlow::Return => return Some(()), + ControlFlow::Continue => continue, + ControlFlow::Fallthrough => (), + } + let consumed_goal = goal_children.next(); + // if goal runs out, do not proceed cand nodes + if consumed_goal.is_some() { + cand_children.next(); + } + if goal_children.peek().is_none() { + // all goal found + let has_trailing = cand_children.all(|n| strictness.should_skip_trailing(&n)); + return has_trailing.then_some(()); + } + cand_children.peek()?; + } +} + +enum ControlFlow { + Continue, + Fallthrough, + Return, +} + +/// returns None means no match +fn may_match_ellipsis_impl<'p, 't: 'p, D: Doc>( + goal_children: &mut Peekable>, + cand_children: &mut Peekable>>, + agg: &mut impl Aggregator<'t, D>, + strictness: &MatchStrictness, +) -> Option { + let Some(curr_node) = goal_children.peek() else { + // in rare case, an internal node's children is empty + // see https://github.com/ast-grep/ast-grep/issues/1688 + return Some(ControlFlow::Return); + }; + let Ok(optional_name) = try_get_ellipsis_mode(curr_node) else { + return Some(ControlFlow::Fallthrough); + }; + let mut matched = vec![]; + goal_children.next(); + // goal has all matched + if goal_children.peek().is_none() { + match_ellipsis(agg, &optional_name, matched, cand_children, 0)?; + return Some(ControlFlow::Return); + } + // skip trivial nodes in goal after ellipsis + let mut skipped_anonymous = 0; + while goal_children.peek().unwrap().is_trivial() { + goal_children.next(); + skipped_anonymous += 1; + if goal_children.peek().is_none() { + match_ellipsis( + agg, + &optional_name, + matched, + cand_children, + skipped_anonymous, + )?; + return Some(ControlFlow::Return); + } + } + // if next node is a Ellipsis, consume one candidate node + if try_get_ellipsis_mode(goal_children.peek().unwrap()).is_ok() { + matched.push(cand_children.next().unwrap()); + cand_children.peek()?; + match_ellipsis( + agg, + &optional_name, + matched, + std::iter::empty(), + skipped_anonymous, + )?; + return Some(ControlFlow::Continue); + } + loop { + if matches!( + match_node_impl( + goal_children.peek().unwrap(), + cand_children.peek().unwrap(), + agg, + strictness, + ), + MatchOneNode::MatchedBoth + ) { + // found match non Ellipsis, + match_ellipsis( + agg, + &optional_name, + matched, + std::iter::empty(), + skipped_anonymous, + )?; + break Some(ControlFlow::Fallthrough); + } + matched.push(cand_children.next().unwrap()); + cand_children.peek()?; + } +} + +fn match_single_node_while_skip_trivial<'p, 't: 'p, D: Doc>( + goal_children: &mut Peekable>, + cand_children: &mut Peekable>>, + agg: &mut impl Aggregator<'t, D>, + strictness: &MatchStrictness, +) -> Option { + loop { + let Some(cand) = cand_children.peek() else { + // if cand runs out, check remaining goal + // if goal is skippable, it is a match, else a non match + return strictness + .should_skip_goal(goal_children) + .then_some(ControlFlow::Fallthrough); + }; + // try match goal node with candidate node + match match_node_impl(goal_children.peek().unwrap(), cand, agg, strictness) { + MatchOneNode::MatchedBoth => return Some(ControlFlow::Fallthrough), + MatchOneNode::SkipGoal => { + goal_children.next(); + if goal_children.peek().is_none() { + return Some(ControlFlow::Fallthrough); + } + } + MatchOneNode::SkipBoth => { + cand_children.next(); + goal_children.next(); + if goal_children.peek().is_none() { + return Some(ControlFlow::Fallthrough); + } + } + // skip trivial node + MatchOneNode::SkipCandidate => { + cand_children.next(); + } + // unmatched significant node + MatchOneNode::NoMatch => return None, + } + } +} + +/// Returns Ok if ellipsis pattern is found. If the ellipsis is named, returns it name. +/// If the ellipsis is unnamed, returns None. If it is not ellipsis node, returns Err. +fn try_get_ellipsis_mode(node: &PatternNode) -> Result, ()> { + let PatternNode::MetaVar { meta_var, .. } = node else { + return Err(()); + }; + match meta_var { + MetaVariable::Multiple => Ok(None), + MetaVariable::MultiCapture(n) => Ok(Some(n.into())), + _ => Err(()), + } +} + +fn match_ellipsis<'t, D: Doc>( + agg: &mut impl Aggregator<'t, D>, + optional_name: &Option, + mut matched: Vec>, + cand_children: impl Iterator>, + skipped_anonymous: usize, +) -> Option<()> { + matched.extend(cand_children); + agg.match_ellipsis(optional_name.as_deref(), matched, skipped_anonymous)?; + Some(()) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::matcher::KindMatcher; + use crate::matcher::types::Pattern; + use crate::{Matcher, Root, meta_var::MetaVarEnv}; + use std::borrow::Cow; + fn match_tree(p: &str, n: &str, strictness: MatchStrictness) -> MatchOneNode { + let pattern = Pattern::new(p, Tsx); + let kind = pattern.potential_kinds().expect("should have kind"); + let kind = KindMatcher::from_id(kind.into_iter().next().expect("should have kind") as u16); + let n = Root::str(n, Tsx); + let n = n.root().find(kind).expect("should find"); + let mut env = Cow::Owned(MetaVarEnv::new()); + match_node_impl(&pattern.node, &*n, &mut env, &strictness) + } + fn matched(p: &str, n: &str, strictness: MatchStrictness) { + let ret = match_tree(p, n, strictness); + assert!( + matches!(ret, MatchOneNode::MatchedBoth), + "expect match. pattern: `{p}`, node: `{n}`" + ); + } + fn unmatched(p: &str, n: &str, strictness: MatchStrictness) { + let ret = match_tree(p, n, strictness); + assert!( + !matches!(ret, MatchOneNode::MatchedBoth), + "expect no match. pattern: `{p}`, node: `{n}`" + ); + } + use MatchStrictness as M; + + #[test] + fn test_ast_match() { + matched("import $A from 'lib'", "import A from \"lib\"", M::Ast); + unmatched("$A(bar)", "foo(/* A*/bar)", M::Ast); + matched("$A(bar)", "foo(bar)", M::Ast); + unmatched("$A(bar)", "foo(bar, baz)", M::Ast); + matched("print($A,)", "print(123)", M::Ast); + matched("print($$$A,b,$$$C)", "print(b)", M::Ast); + matched("print($$$A,b,$$$C)", "print(a, b)", M::Ast); + matched("print($$$A,b,$$$C)", "print(a, b, c)", M::Ast); + matched("print($$$A,b,$$$C)", "print(a, b, c,)", M::Ast); + } + + #[test] + fn test_relaxed_match() { + matched("import $A from 'lib'", "import A from \"lib\"", M::Relaxed); + matched("$A(bar)", "foo(/* A*/bar)", M::Relaxed); + // fix https://github.com/ast-grep/ast-grep/issues/1848 + matched( + "import { foo } from 'bar'", + "import { foo, } from 'bar'", + M::Relaxed, + ); + unmatched( + "import { foo } from 'bar'", + "import { foo, bar, baz } from 'bar'", + M::Relaxed, + ); + unmatched( + "import { foo } from 'bar'", + "import { foo, bar } from 'bar'", + M::Relaxed, + ); + } + + #[test] + fn test_cst_match() { + unmatched("import $A from 'lib'", "import A from \"lib\"", M::Cst); + unmatched("$A(bar)", "foo(/* A*/bar)", M::Cst); + unmatched("print($A,)", "print(123)", M::Cst); + } + + #[test] + fn test_signature_match() { + matched( + "import $A from 'lib'", + "import A from \"lib\"", + M::Signature, + ); + matched("$A(bar)", "foo(/* A*/bar)", M::Signature); + } +} diff --git a/crates/ast-engine/src/match_tree/mod.rs b/crates/ast-engine/src/match_tree/mod.rs new file mode 100644 index 0000000..11993ae --- /dev/null +++ b/crates/ast-engine/src/match_tree/mod.rs @@ -0,0 +1,346 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +mod match_node; +mod strictness; + +use crate::matcher::Pattern; +use match_node::match_node_impl; +use strictness::MatchOneNode; + +use crate::meta_var::{MetaVarEnv, MetaVariable}; +use crate::{Doc, Node}; + +use std::borrow::Cow; + +// re-export +pub use strictness::MatchStrictness; + +trait Aggregator<'t, D: Doc> { + fn match_terminal(&mut self, node: &Node<'t, D>) -> Option<()>; + fn match_meta_var(&mut self, var: &MetaVariable, node: &Node<'t, D>) -> Option<()>; + fn match_ellipsis( + &mut self, + var: Option<&str>, + nodes: Vec>, + skipped_anonymous: usize, + ) -> Option<()>; +} + +struct ComputeEnd(usize); + +impl<'t, D: Doc> Aggregator<'t, D> for ComputeEnd { + fn match_terminal(&mut self, node: &Node<'t, D>) -> Option<()> { + self.0 = node.range().end; + Some(()) + } + fn match_meta_var(&mut self, _: &MetaVariable, node: &Node<'t, D>) -> Option<()> { + self.0 = node.range().end; + Some(()) + } + fn match_ellipsis( + &mut self, + _var: Option<&str>, + nodes: Vec>, + _skipped: usize, + ) -> Option<()> { + let n = nodes.last()?; + self.0 = n.range().end; + Some(()) + } +} + +pub fn match_end_non_recursive(goal: &Pattern, candidate: &Node) -> Option { + let mut end = ComputeEnd(0); + match match_node_impl(&goal.node, candidate, &mut end, &goal.strictness) { + MatchOneNode::MatchedBoth => Some(end.0), + _ => None, + } +} + +fn match_leaf_meta_var<'tree, D: Doc>( + mv: &MetaVariable, + candidate: &Node<'tree, D>, + env: &mut Cow>, +) -> Option<()> { + use MetaVariable as MV; + match mv { + MV::Capture(name, named) => { + if *named && !candidate.is_named() { + None + } else { + env.to_mut().insert(name, candidate.clone())?; + Some(()) + } + } + MV::Dropped(named) => { + if *named && !candidate.is_named() { + None + } else { + Some(()) + } + } + // Ellipsis will be matched in parent level + MV::Multiple => { + debug_assert!(false, "Ellipsis should be matched in parent level"); + Some(()) + } + MV::MultiCapture(name) => { + env.to_mut().insert(name, candidate.clone())?; + Some(()) + } + } +} + +impl<'t, D: Doc> Aggregator<'t, D> for Cow<'_, MetaVarEnv<'t, D>> { + fn match_terminal(&mut self, _: &Node<'t, D>) -> Option<()> { + Some(()) + } + fn match_meta_var(&mut self, var: &MetaVariable, node: &Node<'t, D>) -> Option<()> { + match_leaf_meta_var(var, node, self) + } + fn match_ellipsis( + &mut self, + var: Option<&str>, + nodes: Vec>, + skipped_anonymous: usize, + ) -> Option<()> { + if let Some(var) = var { + let mut matched = nodes; + let skipped = matched.len().saturating_sub(skipped_anonymous); + drop(matched.drain(skipped..)); + self.to_mut().insert_multi(var, matched)?; + } + Some(()) + } +} + +pub fn match_node_non_recursive<'tree, D: Doc>( + goal: &Pattern, + candidate: Node<'tree, D>, + env: &mut Cow>, +) -> Option> { + match match_node_impl(&goal.node, &candidate, env, &goal.strictness) { + MatchOneNode::MatchedBoth => Some(candidate), + _ => None, + } +} + +pub fn does_node_match_exactly(goal: &Node, candidate: &Node) -> bool { + // return true if goal and candidate are the same node + if goal.node_id() == candidate.node_id() { + return true; + } + // gh issue #1087, we make pattern matching a little bit more permissive + // compare node text if at least one node is leaf + if goal.is_named_leaf() || candidate.is_named_leaf() { + return goal.text() == candidate.text(); + } + if goal.kind_id() != candidate.kind_id() { + return false; + } + let goal_children = goal.children(); + let cand_children = candidate.children(); + if goal_children.len() != cand_children.len() { + return false; + } + goal_children + .zip(cand_children) + .all(|(g, c)| does_node_match_exactly(&g, &c)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::meta_var::MetaVarEnv; + use crate::tree_sitter::StrDoc; + use crate::{Node, Root}; + use thread_utils::RapidMap; + + fn find_node_recursive<'tree>( + goal: &Pattern, + node: Node<'tree, StrDoc>, + env: &mut Cow>>, + ) -> Option>> { + match_node_non_recursive(goal, node.clone(), env).or_else(|| { + node.children() + .find_map(|sub| find_node_recursive(goal, sub, env)) + }) + } + + fn test_match(s1: &str, s2: &str) -> RapidMap { + let goal = Pattern::new(s1, Tsx); + let cand = Root::str(s2, Tsx); + let cand = cand.root(); + let mut env = Cow::Owned(MetaVarEnv::new()); + let ret = find_node_recursive(&goal, cand.clone(), &mut env); + assert!( + ret.is_some(), + "goal: {goal:?}, candidate: {}", + cand.get_inner_node().to_sexp(), + ); + RapidMap::from(env.into_owned()) + } + + fn test_non_match(s1: &str, s2: &str) { + let goal = Pattern::new(s1, Tsx); + let cand = Root::str(s2, Tsx); + let cand = cand.root(); + let mut env = Cow::Owned(MetaVarEnv::new()); + let ret = find_node_recursive(&goal, cand, &mut env); + assert!(ret.is_none()); + } + + #[test] + fn test_simple_match() { + test_match("const a = 123", "const a=123"); + test_non_match("const a = 123", "var a = 123"); + } + + #[test] + fn test_nested_match() { + test_match("const a = 123", "function() {const a= 123;}"); + test_match("const a = 123", "class A { constructor() {const a= 123;}}"); + test_match( + "const a = 123", + "for (let a of []) while (true) { const a = 123;}", + ); + } + + #[test] + fn test_should_exactly_match() { + test_match( + "function foo() { let a = 123; }", + "function foo() { let a = 123; }", + ); + test_non_match( + "function foo() { let a = 123; }", + "function bar() { let a = 123; }", + ); + } + + #[test] + fn test_match_inner() { + test_match( + "function bar() { let a = 123; }", + "function foo() { function bar() {let a = 123; }}", + ); + test_non_match( + "function foo() { let a = 123; }", + "function foo() { function bar() {let a = 123; }}", + ); + } + + #[test] + fn test_single_ellipsis() { + test_match("foo($$$)", "foo(a, b, c)"); + test_match("foo($$$)", "foo()"); + } + #[test] + fn test_named_ellipsis() { + test_match("foo($$$A, c)", "foo(a, b, c)"); + test_match("foo($$$A, b, c)", "foo(a, b, c)"); + test_match("foo($$$A, a, b, c)", "foo(a, b, c)"); + test_non_match("foo($$$A, a, b, c)", "foo(b, c)"); + } + + #[test] + fn test_leading_ellipsis() { + test_match("foo($$$, c)", "foo(a, b, c)"); + test_match("foo($$$, b, c)", "foo(a, b, c)"); + test_match("foo($$$, a, b, c)", "foo(a, b, c)"); + test_non_match("foo($$$, a, b, c)", "foo(b, c)"); + } + #[test] + fn test_trailing_ellipsis() { + test_match("foo(a, $$$)", "foo(a, b, c)"); + test_match("foo(a, b, $$$)", "foo(a, b, c)"); + // test_match("foo(a, b, c, $$$)", "foo(a, b, c)"); + test_non_match("foo(a, b, c, $$$)", "foo(b, c)"); + } + + #[test] + fn test_meta_var_named() { + test_match("return $A", "return 123;"); + test_match("return $_", "return 123;"); + test_non_match("return $A", "return;"); + test_non_match("return $_", "return;"); + test_match("return $$A", "return;"); + test_match("return $$_A", "return;"); + } + + #[test] + fn test_meta_var_multiple_occurrence() { + test_match("$A($$$)", "test(123)"); + test_match("$A($B)", "test(123)"); + test_non_match("$A($A)", "test(aaa)"); + test_non_match("$A($A)", "test(123)"); + test_non_match("$A($A, $A)", "test(123, 456)"); + test_match("$A($A)", "test(test)"); + test_non_match("$A($A)", "foo(bar)"); + } + + #[test] + fn test_string() { + test_match("'a'", "'a'"); + test_match("'abcdefg'", "'abcdefg'"); + test_match("`abcdefg`", "`abcdefg`"); + test_non_match("'a'", "'b'"); + test_non_match("'abcdefg'", "'gggggg'"); + } + + #[test] + fn test_skip_trivial_node() { + test_match("foo($A, $B)", "foo(a, b,)"); + test_match("class A { b() {}}", "class A { get b() {}}"); + } + + #[test] + fn test_trivia_in_pattern() { + test_match("foo($A, $B,)", "foo(a, b,)"); + test_non_match("foo($A, $B,)", "foo(a, b)"); + test_match("class A { get b() {}}", "class A { get b() {}}"); + test_non_match("class A { get b() {}}", "class A { b() {}}"); + } + + fn find_end_recursive(goal: &Pattern, node: &Node>) -> Option { + match_end_non_recursive(goal, node).or_else(|| { + node.children() + .find_map(|sub| find_end_recursive(goal, sub)) + }) + } + + fn test_end(s1: &str, s2: &str) -> Option { + let goal = Pattern::new(s1, Tsx); + let cand = Root::str(s2, Tsx); + let cand = cand.root(); + find_end_recursive(&goal, cand.clone()) + } + + #[test] + fn test_match_end() { + let end = test_end("return $A", "return 123 /* trivia */"); + assert_eq!(end.expect("should work"), 10); + let end = test_end("return f($A)", "return f(1,) /* trivia */"); + assert_eq!(end.expect("should work"), 12); + } + + // see https://github.com/ast-grep/ast-grep/issues/411 + #[test] + fn test_ellipsis_end() { + let end = test_end( + "import {$$$A, B, $$$C} from 'a'", + "import {A, B, C} from 'a'", + ); + assert_eq!(end.expect("must match"), 25); + } + + #[test] + fn test_gh_1087() { + test_match("($P) => $F($P)", "(x) => bar(x)"); + } +} diff --git a/crates/ast-engine/src/match_tree/strictness.rs b/crates/ast-engine/src/match_tree/strictness.rs new file mode 100644 index 0000000..c350673 --- /dev/null +++ b/crates/ast-engine/src/match_tree/strictness.rs @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::Doc; +pub use crate::matcher::MatchStrictness; +use crate::matcher::{PatternNode, kind_utils}; +use crate::meta_var::MetaVariable; +use crate::node::Node; +use std::iter::Peekable; +use std::str::FromStr; + +#[derive(Debug, Clone)] +pub enum MatchOneNode { + MatchedBoth, + SkipBoth, + SkipGoal, + SkipCandidate, + NoMatch, +} + +fn skip_comment_or_unnamed(n: &Node) -> bool { + if !n.is_named() { + return true; + } + let kind = n.kind(); + kind.contains("comment") +} + +impl MatchStrictness { + pub(crate) fn match_terminal( + &self, + is_named: bool, + text: &str, + goal_kind: u16, + candidate: &Node, + ) -> MatchOneNode { + let cand_kind = candidate.kind_id(); + let is_kind_matched = kind_utils::are_kinds_matching(goal_kind, cand_kind); + // work around ast-grep/ast-grep#1419 and tree-sitter/tree-sitter-typescript#306 + // tree-sitter-typescript has wrong span of unnamed node so text would not match + // just compare kind for unnamed node + if is_kind_matched && (!is_named || text == candidate.text()) { + return MatchOneNode::MatchedBoth; + } + let (skip_goal, skip_candidate) = match self { + Self::Cst => (false, false), + Self::Smart => (false, !candidate.is_named()), + Self::Ast => (!is_named, !candidate.is_named()), + Self::Relaxed => (!is_named, skip_comment_or_unnamed(candidate)), + Self::Signature => { + if is_kind_matched { + return MatchOneNode::MatchedBoth; + } + (!is_named, skip_comment_or_unnamed(candidate)) + } + }; + match (skip_goal, skip_candidate) { + (true, true) => MatchOneNode::SkipBoth, + (true, false) => MatchOneNode::SkipGoal, + (false, true) => MatchOneNode::SkipCandidate, + (false, false) => MatchOneNode::NoMatch, + } + } + + // TODO: this is a method for working around trailing nodes after pattern is matched + pub(crate) fn should_skip_trailing(&self, candidate: &Node) -> bool { + match self { + Self::Cst | Self::Ast => false, + Self::Smart => true, + Self::Relaxed | + Self::Signature => skip_comment_or_unnamed(candidate), + } + } + + pub(crate) fn should_skip_goal<'p>( + &self, + goal_children: &mut Peekable>, + ) -> bool { + while let Some(pattern) = goal_children.peek() { + let skipped = match self { + Self::Cst => false, + Self::Smart => match pattern { + PatternNode::MetaVar { meta_var } => match meta_var { + MetaVariable::Multiple | + MetaVariable::MultiCapture(_) => true, + MetaVariable::Dropped(_) | + MetaVariable::Capture(..) => false, + }, + PatternNode::Terminal { .. } | + PatternNode::Internal { .. } => false, + }, + Self::Ast | Self::Relaxed | Self::Signature => match pattern { + PatternNode::MetaVar { meta_var } => match meta_var { + MetaVariable::Multiple | MetaVariable::MultiCapture(_) => true, + MetaVariable::Dropped(named) | MetaVariable::Capture(_, named) => !named, + }, + PatternNode::Terminal { is_named, .. } => !is_named, + PatternNode::Internal { .. } => false, + }, + }; + if !skipped { + return false; + } + goal_children.next(); + } + true + } +} + +impl FromStr for MatchStrictness { + type Err = &'static str; + fn from_str(s: &str) -> Result { + match s { + "cst" => Ok(Self::Cst), + "smart" => Ok(Self::Smart), + "ast" => Ok(Self::Ast), + "relaxed" => Ok(Self::Relaxed), + "signature" => Ok(Self::Signature), + _ => Err("invalid strictness, valid options are: cst, smart, ast, relaxed, signature"), + } + } +} diff --git a/crates/ast-engine/src/matcher.rs b/crates/ast-engine/src/matcher.rs new file mode 100644 index 0000000..2c183cc --- /dev/null +++ b/crates/ast-engine/src/matcher.rs @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! This module defines the core `Matcher` trait in ast-grep. +//! +//! `Matcher` has three notable implementations in this module: +//! * `Pattern`: matches against a tree-sitter node based on its tree structure. +//! * `KindMatcher`: matches a node based on its `kind` +//! * `RegexMatcher`: matches a node based on its textual content using regex. + +use crate::Doc; +use crate::{Node, meta_var::MetaVarEnv}; + +use bit_set::BitSet; +use std::borrow::Cow; + +pub use crate::matchers::kind::*; +pub use crate::matchers::matcher::Matcher; +pub use crate::matchers::node_match::*; +pub use crate::matchers::pattern::*; +pub use crate::matchers::text::*; + +/// `MatcherExt` provides additional utility methods for `Matcher`. +/// It is implemented for all types that implement `Matcher`. +/// N.B. This trait is not intended to be implemented by users. +pub trait MatcherExt: Matcher { + fn match_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option> { + // in future we might need to customize initial MetaVarEnv + let mut env = Cow::Owned(MetaVarEnv::new()); + let node = self.match_node_with_env(node, &mut env)?; + Some(NodeMatch::new(node, env.into_owned())) + } + + fn find_node<'tree, D: Doc>(&self, node: Node<'tree, D>) -> Option> { + for n in node.dfs() { + if let Some(ret) = self.match_node(n.clone()) { + return Some(ret); + } + } + None + } +} + +impl MatcherExt for T where T: Matcher {} + +impl Matcher for str { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let pattern = Pattern::new(self, node.lang()); + pattern.match_node_with_env(node, env) + } + + fn get_match_len(&self, node: Node<'_, D>) -> Option { + let pattern = Pattern::new(self, node.lang()); + pattern.get_match_len(node) + } +} + +impl Matcher for &T +where + T: Matcher + ?Sized, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + (**self).match_node_with_env(node, env) + } + + fn potential_kinds(&self) -> Option { + (**self).potential_kinds() + } + + fn get_match_len(&self, node: Node<'_, D>) -> Option { + (**self).get_match_len(node) + } +} + +pub struct MatchAll; +impl Matcher for MatchAll { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option> { + Some(node) + } + + fn potential_kinds(&self) -> Option { + // return None to match anything + None + } +} + +pub struct MatchNone; +impl Matcher for MatchNone { + fn match_node_with_env<'tree, D: Doc>( + &self, + _node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option> { + None + } + + fn potential_kinds(&self) -> Option { + // matches nothing + Some(BitSet::new()) + } +} diff --git a/crates/ast-engine/src/matchers/kind.rs b/crates/ast-engine/src/matchers/kind.rs new file mode 100644 index 0000000..48efe67 --- /dev/null +++ b/crates/ast-engine/src/matchers/kind.rs @@ -0,0 +1,156 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::matcher::Matcher; + +use crate::language::Language; +use crate::meta_var::MetaVarEnv; +use crate::node::KindId; +use crate::{Doc, Node}; + +use std::borrow::Cow; + +use bit_set::BitSet; +use thiserror::Error; + +// 0 is symbol_end for not found, 65535 is builtin symbol ERROR +// see https://tree-sitter.docsforge.com/master/api/#TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION +// and https://tree-sitter.docsforge.com/master/api/ts_language_symbol_for_name/ +const TS_BUILTIN_SYM_END: KindId = 0; +const TS_BUILTIN_SYM_ERROR: KindId = 65535; + +#[derive(Debug, Error)] +pub enum KindMatcherError { + #[error("Kind `{0}` is invalid.")] + InvalidKindName(String), +} + +#[derive(Debug, Clone)] +pub struct KindMatcher { + kind: KindId, +} + +impl KindMatcher { + pub fn new(node_kind: &str, lang: &L) -> Self { + Self { + kind: lang.kind_to_id(node_kind), + } + } + + pub fn try_new(node_kind: &str, lang: &L) -> Result { + let s = Self::new(node_kind, lang); + if s.is_invalid() { + Err(KindMatcherError::InvalidKindName(node_kind.into())) + } else { + Ok(s) + } + } + + #[must_use] pub const fn from_id(kind: KindId) -> Self { + Self { kind } + } + + /// Whether the kind matcher contains undefined tree-sitter kind. + #[must_use] pub const fn is_invalid(&self) -> bool { + self.kind == TS_BUILTIN_SYM_END + } + + /// Construct a matcher that only matches ERROR + #[must_use] + pub const fn error_matcher() -> Self { + Self::from_id(TS_BUILTIN_SYM_ERROR) + } +} + +pub mod kind_utils { + use super::{KindId, TS_BUILTIN_SYM_ERROR}; + + /// Whether the kind will match parsing error occurred in the source code. + /// + /// This is used to match parsing error in the source code. + /// for example, we can use `kind: ERROR` in YAML to find invalid syntax in source. + /// the name `is_error` implies the matcher itself is error. + /// But here the matcher itself is valid and it is what it matches is error. + #[must_use] + pub const fn is_error_kind(kind: KindId) -> bool { + kind == TS_BUILTIN_SYM_ERROR + } + + #[must_use] + pub const fn are_kinds_matching(goal: KindId, candidate: KindId) -> bool { + goal == candidate || is_error_kind(goal) + } +} + +impl Matcher for KindMatcher { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option> { + if node.kind_id() == self.kind { + Some(node) + } else { + None + } + } + + fn potential_kinds(&self) -> Option { + let mut set = BitSet::new(); + set.insert(self.kind.into()); + Some(set) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::matcher::MatcherExt; + use crate::{Root, tree_sitter::StrDoc}; + + fn pattern_node(s: &str) -> Root> { + Root::str(s, Tsx) + } + #[test] + fn test_kind_match() { + let kind = "public_field_definition"; + let cand = pattern_node("class A { a = 123 }"); + let cand = cand.root(); + let pattern = KindMatcher::new(kind, &Tsx); + assert!( + pattern.find_node(cand.clone()).is_some(), + "goal: {}, candidate: {}", + kind, + cand.get_inner_node().to_sexp(), + ); + } + + #[test] + fn test_kind_non_match() { + let kind = "field_definition"; + let cand = pattern_node("const a = 123"); + let cand = cand.root(); + let pattern = KindMatcher::new(kind, &Tsx); + assert!( + pattern.find_node(cand.clone()).is_none(), + "goal: {}, candidate: {}", + kind, + cand.get_inner_node().to_sexp(), + ); + } + + #[test] + fn test_kind_potential_kinds() { + let kind = "field_definition"; + let matcher = KindMatcher::new(kind, &Tsx); + let potential_kinds = matcher + .potential_kinds() + .expect("should have potential kinds"); + // should has exactly one potential kind + assert_eq!(potential_kinds.len(), 1); + } +} diff --git a/crates/ast-engine/src/matchers/mod.rs b/crates/ast-engine/src/matchers/mod.rs new file mode 100644 index 0000000..ec7191f --- /dev/null +++ b/crates/ast-engine/src/matchers/mod.rs @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later +#![allow(clippy::redundant_pub_crate)] +//! Module imports for pattern matching. Feature gated except for unimplemented `types` module. +//! +//! ## Implementation Notes +//! +//! We changed the structure here from Ast-Grep, which uses a pattern like what's still +//! in [`crate::replacer`], where the root `parent.rs` module contains all +//! the submodules. +//! +//! ### Why this structure? +//! +//! We needed to access the type definitions without the `matching` feature flag, so we: +//! - Moved type definitions to `types.rs` (which we created). +//! - renamed the directory from `matcher` to `matchers` +//! - Created this `mod.rs` to import the submodules conditionally based on the `matching` feature flag. +//! - Kept trait implementations behind the feature flag. +//! - Moved [`types::MatchStrictness`] to `types.rs` in this module from `crate::match_tree::strictness` (not the implementation, just the type definition). +//! +//! #### Practical Implications +//! +//! From an API perspective, nothing changed -- `matcher` is still the main entry point for pattern matching (if the feature is enabled). + +#[cfg(feature = "matching")] +pub(crate) mod pattern; + +#[cfg(feature = "matching")] +pub(crate) mod kind; + +#[cfg(feature = "matching")] +pub(crate) mod node_match; + +#[cfg(feature = "matching")] +pub(crate) mod text; + +pub(crate) mod types; +#[cfg(not(feature = "matching"))] +pub use types::*; + +pub(crate) mod matcher { + pub use super::types::Matcher; +} diff --git a/crates/ast-engine/src/matchers/node_match.rs b/crates/ast-engine/src/matchers/node_match.rs new file mode 100644 index 0000000..d5e491b --- /dev/null +++ b/crates/ast-engine/src/matchers/node_match.rs @@ -0,0 +1,151 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::matcher::Matcher; +use crate::meta_var::MetaVarEnv; +use crate::replacer::Replacer; +use crate::source::Edit as E; +use crate::{Doc, Node}; + +use std::borrow::Borrow; +use std::ops::Deref; + +type Edit = E<::Source>; + +/// Represents the matched node with populated `MetaVarEnv`. +/// It derefs to the `Node` so you can use it as a `Node`. +/// To access the underlying `MetaVarEnv`, call `get_env` method. +#[derive(Clone)] +pub struct NodeMatch<'t, D: Doc>(Node<'t, D>, MetaVarEnv<'t, D>); + +impl<'tree, D: Doc> NodeMatch<'tree, D> { + pub const fn new(node: Node<'tree, D>, env: MetaVarEnv<'tree, D>) -> Self { + Self(node, env) + } + + pub const fn get_node(&self) -> &Node<'tree, D> { + &self.0 + } + + /// Returns the populated `MetaVarEnv` for this match. + pub const fn get_env(&self) -> &MetaVarEnv<'tree, D> { + &self.1 + } + pub const fn get_env_mut(&mut self) -> &mut MetaVarEnv<'tree, D> { + &mut self.1 + } + /// # Safety + /// should only called for readopting nodes + pub(crate) const unsafe fn get_node_mut(&mut self) -> &mut Node<'tree, D> { + &mut self.0 + } +} + +impl NodeMatch<'_, D> { + pub fn replace_by>(&self, replacer: R) -> Edit { + let range = self.range(); + let position = range.start; + let deleted_length = range.len(); + let inserted_text = replacer.generate_replacement(self); + Edit:: { + position, + deleted_length, + inserted_text, + } + } + + #[doc(hidden)] + pub fn make_edit(&self, matcher: &M, replacer: &R) -> Edit + where + M: Matcher, + R: Replacer, + { + let range = replacer.get_replaced_range(self, matcher); + let inserted_text = replacer.generate_replacement(self); + Edit:: { + position: range.start, + deleted_length: range.len(), + inserted_text, + } + } +} + +impl<'tree, D: Doc> From> for NodeMatch<'tree, D> { + fn from(node: Node<'tree, D>) -> Self { + Self(node, MetaVarEnv::new()) + } +} + +/// `NodeMatch` is an immutable view to Node +impl<'tree, D: Doc> From> for Node<'tree, D> { + fn from(node_match: NodeMatch<'tree, D>) -> Self { + node_match.0 + } +} + +/// `NodeMatch` is an immutable view to Node +impl<'tree, D: Doc> Deref for NodeMatch<'tree, D> { + type Target = Node<'tree, D>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// `NodeMatch` is an immutable view to Node +impl<'tree, D: Doc> Borrow> for NodeMatch<'tree, D> { + fn borrow(&self) -> &Node<'tree, D> { + &self.0 + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::tree_sitter::{LanguageExt, StrDoc}; + + fn use_node(n: &Node>) -> String { + n.text().to_string() + } + + fn borrow_node<'a, D, B>(b: B) -> String + where + D: Doc + 'static, + B: Borrow>, + { + b.borrow().text().to_string() + } + + #[test] + fn test_node_match_as_node() { + let root = Tsx.ast_grep("var a = 1"); + let node = root.root(); + let src = node.text().to_string(); + let nm = NodeMatch::from(node); + let ret = use_node(&*nm); + assert_eq!(ret, src); + assert_eq!(use_node(&*nm), borrow_node(nm)); + } + + #[test] + fn test_node_env() { + let root = Tsx.ast_grep("var a = 1"); + let find = root.root().find("var $A = 1").expect("should find"); + let env = find.get_env(); + let node = env.get_match("A").expect("should find"); + assert_eq!(node.text(), "a"); + } + + #[test] + fn test_replace_by() { + let root = Tsx.ast_grep("var a = 1"); + let find = root.root().find("var $A = 1").expect("should find"); + let fixed = find.replace_by("var b = $A"); + assert_eq!(fixed.position, 0); + assert_eq!(fixed.deleted_length, 9); + assert_eq!(fixed.inserted_text, "var b = a".as_bytes()); + } +} diff --git a/crates/ast-engine/src/matchers/pattern.rs b/crates/ast-engine/src/matchers/pattern.rs new file mode 100644 index 0000000..bd6a704 --- /dev/null +++ b/crates/ast-engine/src/matchers/pattern.rs @@ -0,0 +1,601 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::kind::{ + KindMatcher, + kind_utils, +}; +use super::matcher::Matcher; +pub use super::types::{MatchStrictness, Pattern, PatternBuilder, PatternError, PatternNode}; +use crate::language::Language; +use crate::match_tree::{match_end_non_recursive, match_node_non_recursive}; +use crate::meta_var::{MetaVarEnv, MetaVariable}; +use crate::source::SgNode; +use crate::{Doc, Node, Root}; + +use bit_set::BitSet; +use std::borrow::Cow; +use thread_utils::RapidSet; + +impl PatternBuilder<'_> { + pub fn build(&self, parse: F) -> Result + where + F: FnOnce(&str) -> Result, + D: Doc, + { + let doc = parse(&self.src).map_err(PatternError::Parse)?; + let root = Root::doc(doc); + if let Some(selector) = self.selector { + self.contextual(&root, selector) + } else { + self.single(&root) + } + } + fn single(&self, root: &Root) -> Result { + let goal = root.root(); + if goal.children().len() == 0 { + return Err(PatternError::NoContent(self.src.to_string())); + } + if !is_single_node(&goal.inner) { + return Err(PatternError::MultipleNode(self.src.to_string())); + } + let node = Pattern::single_matcher(root); + Ok(Pattern::from(node)) + } + + fn contextual(&self, root: &Root, selector: &str) -> Result { + let goal = root.root(); + let kind_matcher = KindMatcher::try_new(selector, root.lang())?; + let Some(node) = goal.find(&kind_matcher) else { + return Err(PatternError::NoSelectorInContext { + context: self.src.to_string(), + selector: selector.into(), + }); + }; + Ok(Pattern { + root_kind: Some(node.kind_id()), + node: convert_node_to_pattern(node.get_node()), + strictness: MatchStrictness::Smart, + }) + } +} + +impl PatternNode { + // for skipping trivial nodes in goal after ellipsis + #[must_use] + pub const fn is_trivial(&self) -> bool { + match self { + Self::Terminal { is_named, .. } => !*is_named, + _ => false, + } + } + + #[inline] + #[must_use] + pub fn fixed_string(&self) -> Cow<'_, str> { + match &self { + Self::Terminal { text, .. } => Cow::Borrowed(text), + Self::MetaVar { .. } => Cow::Borrowed(""), + Self::Internal { children, .. } => children + .iter() + .map(|n| n.fixed_string()) + .fold(Cow::Borrowed(""), |longest, curr| { + if longest.len() >= curr.len() { + longest + } else { + curr + } + }), + } + } +} +impl<'r, D: Doc> From> for PatternNode { + fn from(node: Node<'r, D>) -> Self { + convert_node_to_pattern(&node) + } +} + +impl<'r, D: Doc> From> for Pattern { + fn from(node: Node<'r, D>) -> Self { + Self { + node: convert_node_to_pattern(&node), + root_kind: None, + strictness: MatchStrictness::Smart, + } + } +} + +fn convert_node_to_pattern(node: &Node<'_, D>) -> PatternNode { + if let Some(meta_var) = extract_var_from_node(node) { + PatternNode::MetaVar { meta_var } + } else if node.is_leaf() { + PatternNode::Terminal { + text: node.text().to_string(), + is_named: node.is_named(), + kind_id: node.kind_id(), + } + } else { + // Pre-allocate vector with estimated capacity to reduce allocations + let child_count = node.children().count(); + let mut children = Vec::with_capacity(child_count); + + for child in node.children() { + if !child.is_missing() { + children.push(PatternNode::from(child)); + } + } + + PatternNode::Internal { + kind_id: node.kind_id(), + children, + } + } +} + +fn extract_var_from_node(goal: &Node<'_, D>) -> Option { + let key = goal.text(); + goal.lang().extract_meta_var(&key) +} + +#[inline] +fn is_single_node<'r, N: SgNode<'r>>(n: &N) -> bool { + match n.children().len() { + 1 => true, + 2 => { + let c = n.child(1).expect("second child must exist"); + // some language will have weird empty syntax node at the end + // see golang's `$A = 0` pattern test case + c.is_missing() || c.kind().is_empty() + } + _ => false, + } +} +impl Pattern { + #[must_use] + pub const fn has_error(&self) -> bool { + let kind = match &self.node { + PatternNode::Terminal { kind_id, .. } | + PatternNode::Internal { kind_id, .. } => *kind_id, + PatternNode::MetaVar { .. } => match self.root_kind { + Some(k) => k, + None => return false, + }, + }; + kind_utils::is_error_kind(kind) + } + + #[must_use] + pub fn fixed_string(&self) -> Cow<'_, str> { + self.node.fixed_string() + } + + /// Get all defined variables in the pattern. + /// Used for validating rules and report undefined variables. + #[must_use] + pub fn defined_vars(&self) -> RapidSet<&str> { + let mut vars = RapidSet::default(); + collect_vars(&self.node, &mut vars); + vars + } +} + +fn meta_var_name(meta_var: &MetaVariable) -> Option<&str> { + use MetaVariable as MV; + match meta_var { + MV::Capture(name, _) | + MV::MultiCapture(name) => Some(name), + MV::Dropped(_) | + MV::Multiple => None, + } +} + +fn collect_vars<'p>(p: &'p PatternNode, vars: &mut RapidSet<&'p str>) { + match p { + PatternNode::MetaVar { meta_var, .. } => { + if let Some(name) = meta_var_name(meta_var) { + vars.insert(name); + } + } + PatternNode::Terminal { .. } => { + // collect nothing for terminal nodes! + } + PatternNode::Internal { children, .. } => { + for c in children { + collect_vars(c, vars); + } + } + } +} + +impl Pattern { + pub fn try_new(src: &str, lang: &L) -> Result { + let processed = lang.pre_process_pattern(src); + let builder = PatternBuilder { + selector: None, + src: processed, + }; + lang.build_pattern(&builder) + } + + pub fn new(src: &str, lang: &L) -> Self { + Self::try_new(src, lang).unwrap() + } + + #[must_use] + pub const fn with_strictness(mut self, strictness: MatchStrictness) -> Self { + self.strictness = strictness; + self + } + + pub fn contextual( + context: &str, + selector: &str, + lang: &L, + ) -> Result { + let processed = lang.pre_process_pattern(context); + let builder = PatternBuilder { + selector: Some(selector), + src: processed, + }; + lang.build_pattern(&builder) + } + fn single_matcher(root: &Root) -> Node<'_, D> { + // debug_assert!(matches!(self.style, PatternStyle::Single)); + let node = root.root(); + let mut inner = node.inner; + while is_single_node(&inner) { + inner = inner.child(0).unwrap(); + } + Node { inner, root } + } +} + +impl Matcher for Pattern { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + if let Some(k) = self.root_kind { + if node.kind_id() != k { + return None; + } + } + // do not pollute the env if pattern does not match + let mut may_write = Cow::Borrowed(env.as_ref()); + let node = match_node_non_recursive(self, node, &mut may_write)?; + if let Cow::Owned(map) = may_write { + // only change env when pattern matches + *env = Cow::Owned(map); + } + Some(node) + } + + fn potential_kinds(&self) -> Option { + let kind = match self.node { + PatternNode::Terminal { kind_id, .. } => kind_id, + PatternNode::MetaVar { .. } => self.root_kind?, + PatternNode::Internal { kind_id, .. } => { + if kind_utils::is_error_kind(kind_id) { + // error can match any kind + return None; + } + kind_id + } + }; + + let mut kinds = BitSet::new(); + kinds.insert(kind.into()); + Some(kinds) + } + + fn get_match_len(&self, node: Node<'_, D>) -> Option { + let start = node.range().start; + let end = match_end_non_recursive(self, &node)?; + Some(end - start) + } +} +impl std::fmt::Debug for PatternNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MetaVar { meta_var, .. } => write!(f, "{meta_var:?}"), + Self::Terminal { text, .. } => write!(f, "{text}"), + Self::Internal { children, .. } => write!(f, "{children:?}"), + } + } +} + +impl std::fmt::Debug for Pattern { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.node) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::matcher::MatcherExt; + use crate::meta_var::MetaVarEnv; + use crate::tree_sitter::StrDoc; + use thread_utils::RapidMap; + + fn pattern_node(s: &str) -> Root> { + Root::str(s, Tsx) + } + + fn test_match(s1: &str, s2: &str) { + let pattern = Pattern::new(s1, Tsx); + let cand = pattern_node(s2); + let cand = cand.root(); + assert!( + pattern.find_node(cand.clone()).is_some(), + "goal: {:?}, candidate: {}", + pattern, + cand.get_inner_node().to_sexp(), + ); + } + fn test_non_match(s1: &str, s2: &str) { + let pattern = Pattern::new(s1, Tsx); + let cand = pattern_node(s2); + let cand = cand.root(); + assert!( + pattern.find_node(cand.clone()).is_none(), + "goal: {:?}, candidate: {}", + pattern, + cand.get_inner_node().to_sexp(), + ); + } + + #[test] + fn test_meta_variable() { + test_match("const a = $VALUE", "const a = 123"); + test_match("const $VARIABLE = $VALUE", "const a = 123"); + test_match("const $VARIABLE = $VALUE", "const a = 123"); + } + + #[test] + fn test_whitespace() { + test_match("function t() { }", "function t() {}"); + test_match("function t() {}", "function t() { }"); + } + + fn match_env(goal_str: &str, cand: &str) -> RapidMap { + let pattern = Pattern::new(goal_str, Tsx); + let cand = pattern_node(cand); + let cand = cand.root(); + let nm = pattern.find_node(cand).unwrap(); + let mapped_env = RapidMap::from(nm.get_env().clone()); + mapped_env + } + + #[test] + fn test_meta_variable_env() { + let env = match_env("const a = $VALUE", "const a = 123"); + assert_eq!(env["VALUE"], "123"); + } + + #[test] + fn test_pattern_should_not_pollute_env() { + // gh issue #1164 + let pattern = Pattern::new("const $A = 114", Tsx); + let cand = pattern_node("const a = 514"); + let cand = cand.root().child(0).unwrap(); + let map = MetaVarEnv::new(); + let mut env = Cow::Borrowed(&map); + let nm = pattern.match_node_with_env(cand, &mut env); + assert!(nm.is_none()); + assert!(env.get_match("A").is_none()); + assert!(map.get_match("A").is_none()); + } + + #[test] + fn test_match_non_atomic() { + let env = match_env("const a = $VALUE", "const a = 5 + 3"); + assert_eq!(env["VALUE"], "5 + 3"); + } + + #[test] + fn test_class_assignment() { + test_match("class $C { $MEMBER = $VAL}", "class A {a = 123}"); + test_non_match("class $C { $MEMBER = $VAL; b = 123; }", "class A {a = 123}"); + // test_match("a = 123", "class A {a = 123}"); + test_non_match("a = 123", "class B {b = 123}"); + } + + #[test] + fn test_return() { + test_match("$A($B)", "return test(123)"); + } + + #[test] + fn test_contextual_pattern() { + let pattern = Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx) + .expect("test"); + let cand = pattern_node("class B { b = 123 }"); + assert!(pattern.find_node(cand.root()).is_some()); + let cand = pattern_node("let b = 123"); + assert!(pattern.find_node(cand.root()).is_none()); + } + + #[test] + fn test_contextual_match_with_env() { + let pattern = Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx) + .expect("test"); + let cand = pattern_node("class B { b = 123 }"); + let nm = pattern.find_node(cand.root()).expect("test"); + let env = nm.get_env(); + let env = RapidMap::from(env.clone()); + assert_eq!(env["F"], "b"); + assert_eq!(env["I"], "123"); + } + + #[test] + fn test_contextual_unmatch_with_env() { + let pattern = Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx) + .expect("test"); + let cand = pattern_node("let b = 123"); + let nm = pattern.find_node(cand.root()); + assert!(nm.is_none()); + } + + fn get_kind(kind_str: &str) -> usize { + Tsx.kind_to_id(kind_str).into() + } + + #[test] + fn test_pattern_potential_kinds() { + let pattern = Pattern::new("const a = 1", Tsx); + let kind = get_kind("lexical_declaration"); + let kinds = pattern.potential_kinds().expect("should have kinds"); + assert_eq!(kinds.len(), 1); + assert!(kinds.contains(kind)); + } + + #[test] + fn test_pattern_with_non_root_meta_var() { + let pattern = Pattern::new("const $A = $B", Tsx); + let kind = get_kind("lexical_declaration"); + let kinds = pattern.potential_kinds().expect("should have kinds"); + assert_eq!(kinds.len(), 1); + assert!(kinds.contains(kind)); + } + + #[test] + fn test_bare_wildcard() { + let pattern = Pattern::new("$A", Tsx); + // wildcard should match anything, so kinds should be None + assert!(pattern.potential_kinds().is_none()); + } + + #[test] + fn test_contextual_potential_kinds() { + let pattern = Pattern::contextual("class A { $F = $I }", "public_field_definition", Tsx) + .expect("test"); + let kind = get_kind("public_field_definition"); + let kinds = pattern.potential_kinds().expect("should have kinds"); + assert_eq!(kinds.len(), 1); + assert!(kinds.contains(kind)); + } + + #[test] + fn test_contextual_wildcard() { + let pattern = + Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test"); + let kind = get_kind("property_identifier"); + let kinds = pattern.potential_kinds().expect("should have kinds"); + assert_eq!(kinds.len(), 1); + assert!(kinds.contains(kind)); + } + + #[test] + #[ignore] + fn test_multi_node_pattern() { + let pattern = Pattern::new("a;b;c;", Tsx); + let kinds = pattern.potential_kinds().expect("should have kinds"); + assert_eq!(kinds.len(), 1); + test_match("a;b;c", "a;b;c;"); + } + + #[test] + #[ignore] + fn test_multi_node_meta_var() { + let env = match_env("a;$B;c", "a;b;c"); + assert_eq!(env["B"], "b"); + let env = match_env("a;$B;c", "a;1+2+3;c"); + assert_eq!(env["B"], "1+2+3"); + } + + #[test] + #[ignore] + fn test_pattern_size() { + assert_eq!(std::mem::size_of::(), 40); + } + + #[test] + fn test_error_kind() { + let ret = Pattern::contextual("a", "property_identifier", Tsx); + assert!(ret.is_err()); + let ret = Pattern::new("123+", Tsx); + assert!(ret.has_error()); + } + + #[test] + fn test_bare_wildcard_in_context() { + let pattern = + Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test"); + let cand = pattern_node("let b = 123"); + // it should not match + assert!(pattern.find_node(cand.root()).is_none()); + } + + #[test] + fn test_pattern_fixed_string() { + let pattern = Pattern::new("class A { $F }", Tsx); + assert_eq!(pattern.fixed_string(), "class"); + let pattern = + Pattern::contextual("class A { $F }", "property_identifier", Tsx).expect("test"); + assert!(pattern.fixed_string().is_empty()); + } + + #[test] + fn test_pattern_error() { + let pattern = Pattern::try_new("", Tsx); + assert!(matches!(pattern, Err(PatternError::NoContent(_)))); + let pattern = Pattern::try_new("12 3344", Tsx); + assert!(matches!(pattern, Err(PatternError::MultipleNode(_)))); + } + + #[test] + fn test_debug_pattern() { + let pattern = Pattern::new("var $A = 1", Tsx); + assert_eq!( + format!("{pattern:?}"), + "[var, [Capture(\"A\", true), =, 1]]" + ); + } + + fn defined_vars(s: &str) -> Vec { + let pattern = Pattern::new(s, Tsx); + let mut vars: Vec<_> = pattern + .defined_vars() + .into_iter() + .map(String::from) + .collect(); + vars.sort(); + vars + } + + #[test] + fn test_extract_meta_var_from_pattern() { + let vars = defined_vars("var $A = 1"); + assert_eq!(vars, ["A"]); + } + + #[test] + fn test_extract_complex_meta_var() { + let vars = defined_vars("function $FUNC($$$ARGS): $RET { $$$BODY }"); + assert_eq!(vars, ["ARGS", "BODY", "FUNC", "RET"]); + } + + #[test] + fn test_extract_duplicate_meta_var() { + let vars = defined_vars("var $A = $A"); + assert_eq!(vars, ["A"]); + } + + #[test] + fn test_contextual_pattern_vars() { + let pattern = + Pattern::contextual("
", "jsx_attribute", Tsx).expect("correct"); + assert_eq!(pattern.defined_vars(), ["A"].into_iter().collect()); + } + + #[test] + fn test_gh_1087() { + test_match("($P) => $F($P)", "(x) => bar(x)"); + } +} diff --git a/crates/ast-engine/src/matchers/text.rs b/crates/ast-engine/src/matchers/text.rs new file mode 100644 index 0000000..2111410 --- /dev/null +++ b/crates/ast-engine/src/matchers/text.rs @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::matcher::Matcher; +use crate::Doc; +use crate::Node; +use crate::meta_var::MetaVarEnv; + +use bit_set::BitSet; +use regex::{Error as RegexError, Regex}; +use thiserror::Error; + +use std::borrow::Cow; + +#[derive(Debug, Error)] +pub enum RegexMatcherError { + #[error("Parsing text matcher fails.")] + Regex(#[from] RegexError), +} + +#[derive(Clone, Debug)] +pub struct RegexMatcher { + regex: Regex, +} + +impl RegexMatcher { + pub fn try_new(text: &str) -> Result { + Ok(Self { + regex: Regex::new(text)?, + }) + } +} + +impl Matcher for RegexMatcher { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option> { + self.regex.is_match(&node.text()).then_some(node) + } + + fn potential_kinds(&self) -> Option { + None + } +} diff --git a/crates/ast-engine/src/matchers/types.rs b/crates/ast-engine/src/matchers/types.rs new file mode 100644 index 0000000..5299aeb --- /dev/null +++ b/crates/ast-engine/src/matchers/types.rs @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! Types for Pattern and Pattern matching. +//! +//! Definitions for the globally important pattern matching types. +//! Allows their use outside the pattern matching feature flags (unimplemented). + +use crate::Doc; +use crate::MetaVarEnv; +use crate::meta_var::MetaVariable; +use crate::node::Node; +use bit_set::BitSet; +use std::borrow::Cow; +use thiserror::Error; + +pub trait Matcher { + /// Returns the node why the input is matched or None if not matched. + /// The return value is usually input node itself, but it can be different node. + /// For example `Has` matcher can return the child or descendant node. + fn match_node_with_env<'tree, D: Doc>( + &self, + _node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option>; + + /// Returns a bitset for all possible target node kind ids. + /// Returns None if the matcher needs to try against all node kind. + fn potential_kinds(&self) -> Option { + None + } + + /// `get_match_len` will skip trailing anonymous child node to exclude punctuation. + // This is not included in NodeMatch since it is only used in replace + fn get_match_len(&self, _node: Node<'_, D>) -> Option { + None + } +} + +#[derive(Clone, Debug)] +pub enum MatchStrictness { + Cst, // all nodes are matched + Smart, // all nodes except source trivial nodes are matched. + Ast, // only ast nodes are matched + Relaxed, // ast-nodes excluding comments are matched + Signature, // ast-nodes excluding comments, without text +} + +#[derive(Clone)] +pub struct Pattern { + pub node: PatternNode, + pub(crate) root_kind: Option, + pub strictness: MatchStrictness, +} + +#[derive(Clone, Debug)] +pub struct PatternBuilder<'a> { + pub(crate) selector: Option<&'a str>, + pub(crate) src: Cow<'a, str>, +} + +#[derive(Clone)] +pub enum PatternNode { + MetaVar { + meta_var: MetaVariable, + }, + /// Node without children. + Terminal { + text: String, + is_named: bool, + kind_id: u16, + }, + /// Non-Terminal Syntax Nodes are called Internal + Internal { + kind_id: u16, + children: Vec, + }, +} + +#[derive(Debug, Error)] +pub enum PatternError { + #[error("Fails to parse the pattern query: `{0}`")] + Parse(String), + #[error("No AST root is detected. Please check the pattern source `{0}`.")] + NoContent(String), + #[error("Multiple AST nodes are detected. Please check the pattern source `{0}`.")] + MultipleNode(String), + #[error(transparent)] + #[cfg(feature = "matching")] + InvalidKind(#[from] super::kind::KindMatcherError), + #[error( + "Fails to create Contextual pattern: selector `{selector}` matches no node in the context `{context}`." + )] + NoSelectorInContext { context: String, selector: String }, +} diff --git a/crates/ast-engine/src/meta_var.rs b/crates/ast-engine/src/meta_var.rs new file mode 100644 index 0000000..eea2156 --- /dev/null +++ b/crates/ast-engine/src/meta_var.rs @@ -0,0 +1,446 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#[cfg(feature = "matching")] +use crate::match_tree::does_node_match_exactly; +#[cfg(feature = "matching")] +use crate::matcher::Matcher; +use crate::source::Content; +use crate::{Doc, Node}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::hash::BuildHasherDefault; +use thread_utils::{map_with_capacity, RapidInlineHasher, RapidMap}; + +use crate::replacer::formatted_slice; + +pub type MetaVariableID = String; + +pub type Underlying = Vec<<::Source as Content>::Underlying>; + +/// a dictionary that stores metavariable instantiation +/// const a = 123 matched with const a = $A will produce env: $A => 123 +#[derive(Clone)] +pub struct MetaVarEnv<'tree, D: Doc> { + single_matched: RapidMap>, + multi_matched: RapidMap>>, + transformed_var: RapidMap>, +} + +impl<'t, D: Doc> MetaVarEnv<'t, D> { + #[must_use] + pub fn new() -> Self { + Self { + single_matched: RapidMap::default(), + multi_matched: RapidMap::default(), + transformed_var: RapidMap::default(), + } + } + + pub fn insert(&mut self, id: &str, ret: Node<'t, D>) -> Option<&mut Self> { + if self.match_variable(id, &ret) { + self.single_matched.insert(id.to_string(), ret); + Some(self) + } else { + None + } + } + + pub fn insert_multi(&mut self, id: &str, ret: Vec>) -> Option<&mut Self> { + if self.match_multi_var(id, &ret) { + self.multi_matched.insert(id.to_string(), ret); + Some(self) + } else { + None + } + } + + /// Insert without cloning the key if it's already owned + pub fn insert_owned(&mut self, id: String, ret: Node<'t, D>) -> Option<&mut Self> { + if self.match_variable(&id, &ret) { + self.single_matched.insert(id, ret); + Some(self) + } else { + None + } + } + + /// Insert multi without cloning the key if it's already owned + pub fn insert_multi_owned(&mut self, id: String, ret: Vec>) -> Option<&mut Self> { + if self.match_multi_var(&id, &ret) { + self.multi_matched.insert(id, ret); + Some(self) + } else { + None + } + } + #[must_use] + pub fn get_match(&self, var: &str) -> Option<&'_ Node<'t, D>> { + self.single_matched.get(var) + } + #[must_use] + pub fn get_multiple_matches(&self, var: &str) -> Vec> { + self.multi_matched.get(var).cloned().unwrap_or_default() + } + + /// Returns a reference to multiple matches without cloning + #[must_use] + pub fn get_multiple_matches_ref(&self, var: &str) -> Option<&Vec>> { + self.multi_matched.get(var) + } + + pub fn add_label(&mut self, label: &str, node: Node<'t, D>) { + self.multi_matched + .entry(label.into()) + .or_default() + .push(node); + } + #[must_use] + pub fn get_labels(&self, label: &str) -> Option<&Vec>> { + self.multi_matched.get(label) + } + + #[cfg(feature = "matching")] + pub fn get_matched_variables(&self) -> impl Iterator + use<'_, 't, D> { + let single = self + .single_matched + .keys() + .map(|n| MetaVariable::Capture(n.clone(), false)); + let transformed = self + .transformed_var + .keys() + .map(|n| MetaVariable::Capture(n.clone(), false)); + let multi = self + .multi_matched + .keys() + .map(|n| MetaVariable::MultiCapture(n.clone())); + single.chain(multi).chain(transformed) + } + + fn match_variable(&self, id: &str, candidate: &Node<'t, D>) -> bool { + if let Some(m) = self.single_matched.get(id) { + return does_node_match_exactly(m, candidate); + } + true + } + #[cfg(feature = "matching")] + fn match_multi_var(&self, id: &str, cands: &[Node<'t, D>]) -> bool { + let Some(nodes) = self.multi_matched.get(id) else { + return true; + }; + let mut named_nodes = nodes.iter().filter(|n| n.is_named()); + let mut named_cands = cands.iter().filter(|n| n.is_named()); + loop { + if let Some(node) = named_nodes.next() { + let Some(cand) = named_cands.next() else { + // cand is done but node is not + break false; + }; + if !does_node_match_exactly(node, cand) { + break false; + } + } else if named_cands.next().is_some() { + // node is done but cand is not + break false; + } else { + // both None, matches + break true; + } + } + } + + #[cfg(feature = "matching")] + pub fn match_constraints( + &mut self, + var_matchers: &RapidMap, + ) -> bool { + let mut env = Cow::Borrowed(self); + for (var_id, candidate) in &self.single_matched { + if let Some(m) = var_matchers.get(var_id) { + if m.match_node_with_env(candidate.clone(), &mut env).is_none() { + return false; + } + } + } + if let Cow::Owned(env) = env { + *self = env; + } + true + } + + pub fn insert_transformation(&mut self, var: &MetaVariable, name: &str, slice: Underlying) { + let node = match var { + MetaVariable::Capture(v, _) => self.single_matched.get(v), + MetaVariable::MultiCapture(vs) => self.multi_matched.get(vs).and_then(|vs| vs.first()), + _ => None, + }; + let deindented = if let Some(v) = node { + formatted_slice(&slice, v.get_doc().get_source(), v.range().start).to_vec() + } else { + slice + }; + self.transformed_var.insert(name.to_string(), deindented); + } + #[must_use] + pub fn get_transformed(&self, var: &str) -> Option<&Underlying> { + self.transformed_var.get(var) + } + #[must_use] + pub fn get_var_bytes<'s>( + &'s self, + var: &MetaVariable, + ) -> Option<&'s [::Underlying]> { + get_var_bytes_impl(self, var) + } +} + +impl MetaVarEnv<'_, D> { + /// internal for readopt `NodeMatch` in pinned.rs + /// readopt node and env when sending them to other threads + pub(crate) fn visit_nodes(&mut self, mut f: F) + where + F: FnMut(&mut Node<'_, D>), + { + for n in self.single_matched.values_mut() { + f(n); + } + for ns in self.multi_matched.values_mut() { + for n in ns { + f(n); + } + } + } +} + +fn get_var_bytes_impl<'e, 't, C, D>( + env: &'e MetaVarEnv<'t, D>, + var: &MetaVariable, +) -> Option<&'e [C::Underlying]> +where + D: Doc + 't, + C: Content + 't, +{ + match var { + MetaVariable::Capture(n, _) => { + if let Some(node) = env.get_match(n) { + let bytes = node.get_doc().get_source().get_range(node.range()); + Some(bytes) + } else if let Some(bytes) = env.get_transformed(n) { + Some(bytes) + } else { + None + } + } + MetaVariable::MultiCapture(n) => { + let nodes = env.get_multiple_matches(n); + if nodes.is_empty() { + None + } else { + // NOTE: start_byte is not always index range of source's slice. + // e.g. start_byte is still byte_offset in utf_16 (napi). start_byte + // so we need to call source's get_range method + let start = nodes[0].range().start; + let end = nodes[nodes.len() - 1].range().end; + Some(nodes[0].get_doc().get_source().get_range(start..end)) + } + } + _ => None, + } +} + +impl Default for MetaVarEnv<'_, D> { + fn default() -> Self { + Self::new() + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum MetaVariable { + /// $A for captured meta var + Capture(MetaVariableID, bool), + /// $_ for non-captured meta var + Dropped(bool), + /// $$$ for non-captured multi var + Multiple, + /// $$$A for captured ellipsis + MultiCapture(MetaVariableID), +} + +pub(crate) fn extract_meta_var(src: &str, meta_char: char) -> Option { + use MetaVariable::{Capture, Dropped, MultiCapture, Multiple}; + let ellipsis: String = std::iter::repeat_n(meta_char, 3).collect(); + if src == ellipsis { + return Some(Multiple); + } + if let Some(trimmed) = src.strip_prefix(&ellipsis) { + if !trimmed.chars().all(is_valid_meta_var_char) { + return None; + } + if trimmed.starts_with('_') { + return Some(Multiple); + } + return Some(MultiCapture(trimmed.to_owned())); + } + if !src.starts_with(meta_char) { + return None; + } + let trimmed = &src[meta_char.len_utf8()..]; + let (trimmed, named) = if let Some(t) = trimmed.strip_prefix(meta_char) { + (t, false) + } else { + (trimmed, true) + }; + if !trimmed.starts_with(is_valid_first_char) || // empty or started with number + !trimmed.chars().all(is_valid_meta_var_char) + // not in form of $A or $_ + { + return None; + } + if trimmed.starts_with('_') { + Some(Dropped(named)) + } else { + Some(Capture(trimmed.to_owned(), named)) + } +} + +#[inline] +const fn is_valid_first_char(c: char) -> bool { + matches!(c, 'A'..='Z' | '_') +} + +#[inline] +pub(crate) const fn is_valid_meta_var_char(c: char) -> bool { + is_valid_first_char(c) || c.is_ascii_digit() +} + +impl<'tree, D: Doc> From> for HashMap> +where + D::Source: Content, +{ + fn from(env: MetaVarEnv<'tree, D>) -> Self { + let mut ret: Self = map_with_capacity( + env.single_matched.len() + env.multi_matched.len() + env.transformed_var.len() + ); + for (id, node) in env.single_matched { + ret.insert(id, node.text().into()); + } + for (id, bytes) in env.transformed_var { + ret.insert(id, ::encode_bytes(&bytes).to_string()); + } + for (id, nodes) in env.multi_matched { + // Optimize string concatenation by pre-calculating capacity + if nodes.is_empty() { + ret.insert(id, "[]".to_string()); + continue; + } + + let estimated_capacity = nodes.len() * 16 + 10; // rough estimate + let mut result = String::with_capacity(estimated_capacity); + result.push('['); + + let mut first = true; + for node in &nodes { + if !first { + result.push_str(", "); + } + result.push_str(&node.text()); + first = false; + } + result.push(']'); + ret.insert(id, result); + } + ret + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Pattern; + use crate::language::Tsx; + use crate::tree_sitter::LanguageExt; + + fn extract_var(s: &str) -> Option { + extract_meta_var(s, '$') + } + #[test] + fn test_match_var() { + use MetaVariable::*; + assert_eq!(extract_var("$$$"), Some(Multiple)); + assert_eq!(extract_var("$ABC"), Some(Capture("ABC".into(), true))); + assert_eq!(extract_var("$$ABC"), Some(Capture("ABC".into(), false))); + assert_eq!(extract_var("$MATCH1"), Some(Capture("MATCH1".into(), true))); + assert_eq!(extract_var("$$$ABC"), Some(MultiCapture("ABC".into()))); + assert_eq!(extract_var("$_"), Some(Dropped(true))); + assert_eq!(extract_var("$_123"), Some(Dropped(true))); + assert_eq!(extract_var("$$_"), Some(Dropped(false))); + } + + #[test] + fn test_not_meta_var() { + assert_eq!(extract_var("$123"), None); + assert_eq!(extract_var("$"), None); + assert_eq!(extract_var("$$"), None); + assert_eq!(extract_var("abc"), None); + assert_eq!(extract_var("$abc"), None); + } + + fn match_constraints(pattern: &str, node: &str) -> bool { + let mut matchers = thread_utils::RapidMap::default(); + matchers.insert("A".to_string(), Pattern::new(pattern, Tsx)); + let mut env = MetaVarEnv::new(); + let root = Tsx.ast_grep(node); + let node = root.root().child(0).unwrap().child(0).unwrap(); + env.insert("A", node); + env.match_constraints(&matchers) + } + + #[test] + fn test_non_ascii_meta_var() { + let extract = |s| extract_meta_var(s, 'Β΅'); + use MetaVariable::*; + assert_eq!(extract("¡¡¡"), Some(Multiple)); + assert_eq!(extract("Β΅ABC"), Some(Capture("ABC".into(), true))); + assert_eq!(extract("¡¡ABC"), Some(Capture("ABC".into(), false))); + assert_eq!(extract("¡¡¡ABC"), Some(MultiCapture("ABC".into()))); + assert_eq!(extract("Β΅_"), Some(Dropped(true))); + assert_eq!(extract("abc"), None); + assert_eq!(extract("Β΅abc"), None); + } + + #[test] + fn test_match_constraints() { + assert!(match_constraints("a + b", "a + b")); + } + + #[test] + fn test_match_not_constraints() { + assert!(!match_constraints("a - b", "a + b")); + } + + #[test] + fn test_multi_var_match() { + let grep = Tsx.ast_grep("if (true) { a += 1; b += 1 } else { a += 1; b += 1 }"); + let node = grep.root(); + let found = node.find("if (true) { $$$A } else { $$$A }"); + assert!(found.is_some()); + let grep = Tsx.ast_grep("if (true) { a += 1 } else { b += 1 }"); + let node = grep.root(); + let not_found = node.find("if (true) { $$$A } else { $$$A }"); + assert!(not_found.is_none()); + } + + #[test] + fn test_multi_var_match_with_trailing() { + let grep = Tsx.ast_grep("if (true) { a += 1; } else { a += 1; b += 1 }"); + let node = grep.root(); + let not_found = node.find("if (true) { $$$A } else { $$$A }"); + assert!(not_found.is_none()); + let grep = Tsx.ast_grep("if (true) { a += 1; b += 1; } else { a += 1 }"); + let node = grep.root(); + let not_found = node.find("if (true) { $$$A } else { $$$A }"); + assert!(not_found.is_none()); + } +} diff --git a/crates/ast-engine/src/node.rs b/crates/ast-engine/src/node.rs new file mode 100644 index 0000000..187309e --- /dev/null +++ b/crates/ast-engine/src/node.rs @@ -0,0 +1,580 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::Doc; +use crate::Language; +#[cfg(feature = "matching")] +use crate::matcher::{Matcher, MatcherExt, NodeMatch}; +use crate::replacer::Replacer; +use crate::source::{Content, Edit as E, SgNode}; + +type Edit = E<::Source>; + +use std::borrow::Cow; + +/// Represents a position in the source code. +/// +/// The line and column are zero-based, character offsets. +/// It is different from tree-sitter's position which is zero-based `byte` offsets. +/// Note, accessing `column` is O(n) operation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Position { + /// zero-based line offset. Text encoding does not matter. + line: usize, + /// zero-based BYTE offset instead of character offset + byte_column: usize, + /// byte offset of this position + byte_offset: usize, +} + +impl Position { + #[must_use] pub const fn new(line: usize, byte_column: usize, byte_offset: usize) -> Self { + Self { + line, + byte_column, + byte_offset, + } + } + #[must_use] pub const fn line(&self) -> usize { + self.line + } + /// Returns the column in terms of characters. + /// Note: node does not have to be a node of matching position. + pub fn column(&self, node: &Node<'_, D>) -> usize { + let source = node.get_doc().get_source(); + source.get_char_column(self.byte_column, self.byte_offset) + } + #[must_use] pub const fn byte_point(&self) -> (usize, usize) { + (self.line, self.byte_column) + } +} + +/// Represents [`tree_sitter::Tree`] and owns source string +/// Note: Root is generic against [`Language`](crate::language::Language) +#[derive(Clone, Debug)] +pub struct Root { + pub(crate) doc: D, +} + +impl Root { + pub const fn doc(doc: D) -> Self { + Self { doc } + } + + pub fn lang(&self) -> &D::Lang { + self.doc.get_lang() + } + /// The root node represents the entire source + pub fn root(&self) -> Node<'_, D> { + Node { + inner: self.doc.root_node(), + root: self, + } + } + + // extract non generic implementation to reduce code size + pub fn edit(&mut self, edit: &Edit) -> Result<&mut Self, String> { + self.doc.do_edit(edit)?; + Ok(self) + } + + pub fn replace>( + &mut self, + pattern: M, + replacer: R, + ) -> Result { + let root = self.root(); + if let Some(edit) = root.replace(pattern, replacer) { + drop(root); // rust cannot auto drop root if D is not specified + self.edit(&edit)?; + Ok(true) + } else { + Ok(false) + } + } + + /// Adopt the `tree_sitter` as the descendant of the root and return the wrapped sg Node. + /// It assumes `inner` is the under the root and will panic at dev build if wrong node is used. + pub fn adopt<'r>(&'r self, inner: D::Node<'r>) -> Node<'r, D> { + debug_assert!(self.check_lineage(&inner)); + Node { inner, root: self } + } + + fn check_lineage(&self, inner: &D::Node<'_>) -> bool { + let mut node = inner.clone(); + while let Some(n) = node.parent() { + node = n; + } + node.node_id() == self.doc.root_node().node_id() + } + + /// P.S. I am your father. + #[doc(hidden)] + pub unsafe fn readopt<'a: 'b, 'b>(&'a self, node: &mut Node<'b, D>) { + debug_assert!(self.check_lineage(&node.inner)); + node.root = self; + } +} + +// why we need one more content? https://github.com/ast-grep/ast-grep/issues/1951 +/// 'r represents root lifetime +#[derive(Clone, Debug)] +pub struct Node<'r, D: Doc> { + pub(crate) inner: D::Node<'r>, + pub(crate) root: &'r Root, +} +pub type KindId = u16; + +/// APIs for Node inspection +impl<'r, D: Doc> Node<'r, D> { + pub const fn get_doc(&self) -> &'r D { + &self.root.doc + } + pub fn node_id(&self) -> usize { + self.inner.node_id() + } + pub fn is_leaf(&self) -> bool { + self.inner.is_leaf() + } + /// if has no named children. + /// N.B. it is different from `is_named` && `is_leaf` + // see https://github.com/ast-grep/ast-grep/issues/276 + pub fn is_named_leaf(&self) -> bool { + self.inner.is_named_leaf() + } + pub fn is_error(&self) -> bool { + self.inner.is_error() + } + pub fn kind(&self) -> Cow<'_, str> { + self.inner.kind() + } + pub fn kind_id(&self) -> KindId { + self.inner.kind_id() + } + + pub fn is_named(&self) -> bool { + self.inner.is_named() + } + pub fn is_missing(&self) -> bool { + self.inner.is_missing() + } + + /// byte offsets of start and end. + pub fn range(&self) -> std::ops::Range { + self.inner.range() + } + + /// Nodes' start position in terms of zero-based rows and columns. + pub fn start_pos(&self) -> Position { + self.inner.start_pos() + } + + /// Nodes' end position in terms of rows and columns. + pub fn end_pos(&self) -> Position { + self.inner.end_pos() + } + + pub fn text(&self) -> Cow<'r, str> { + self.root.doc.get_node_text(&self.inner) + } + + pub fn lang(&self) -> &'r D::Lang { + self.root.lang() + } + + /// the underlying tree-sitter Node + pub fn get_inner_node(&self) -> D::Node<'r> { + self.inner.clone() + } + + pub const fn root(&self) -> &'r Root { + self.root + } +} + +/** + * Corresponds to inside/has/precedes/follows + */ +impl Node<'_, D> { + pub fn matches(&self, m: M) -> bool { + m.match_node(self.clone()).is_some() + } + + pub fn inside(&self, m: M) -> bool { + self.ancestors().find_map(|n| m.match_node(n)).is_some() + } + + pub fn has(&self, m: M) -> bool { + self.dfs().skip(1).find_map(|n| m.match_node(n)).is_some() + } + + pub fn precedes(&self, m: M) -> bool { + self.next_all().find_map(|n| m.match_node(n)).is_some() + } + + pub fn follows(&self, m: M) -> bool { + self.prev_all().find_map(|n| m.match_node(n)).is_some() + } +} + +/// tree traversal API +impl<'r, D: Doc> Node<'r, D> { + #[must_use] + pub fn parent(&self) -> Option { + let inner = self.inner.parent()?; + Some(Node { + inner, + root: self.root, + }) + } + + pub fn children(&self) -> impl ExactSizeIterator> + '_ { + self.inner.children().map(|inner| Node { + inner, + root: self.root, + }) + } + + #[must_use] + pub fn child(&self, nth: usize) -> Option { + let inner = self.inner.child(nth)?; + Some(Node { + inner, + root: self.root, + }) + } + + pub fn field(&self, name: &str) -> Option { + let inner = self.inner.field(name)?; + Some(Node { + inner, + root: self.root, + }) + } + + pub fn child_by_field_id(&self, field_id: u16) -> Option { + let inner = self.inner.child_by_field_id(field_id)?; + Some(Node { + inner, + root: self.root, + }) + } + + pub fn field_children(&self, name: &str) -> impl Iterator> + '_ { + let field_id = self.lang().field_to_id(name); + self.inner.field_children(field_id).map(|inner| Node { + inner, + root: self.root, + }) + } + + /// Returns all ancestors nodes of `self`. + /// Using cursor is overkill here because adjust cursor is too expensive. + pub fn ancestors(&self) -> impl Iterator> + '_ { + let root = self.root.doc.root_node(); + self.inner.ancestors(root).map(|inner| Node { + inner, + root: self.root, + }) + } + #[must_use] + pub fn next(&self) -> Option { + let inner = self.inner.next()?; + Some(Node { + inner, + root: self.root, + }) + } + + /// Returns all sibling nodes next to `self`. + // NOTE: Need go to parent first, then move to current node by byte offset. + // This is because tree_sitter cursor is scoped to the starting node. + // See https://github.com/tree-sitter/tree-sitter/issues/567 + pub fn next_all(&self) -> impl Iterator> + '_ { + self.inner.next_all().map(|inner| Node { + inner, + root: self.root, + }) + } + + #[must_use] + pub fn prev(&self) -> Option { + let inner = self.inner.prev()?; + Some(Node { + inner, + root: self.root, + }) + } + + pub fn prev_all(&self) -> impl Iterator> + '_ { + self.inner.prev_all().map(|inner| Node { + inner, + root: self.root, + }) + } + + pub fn dfs<'s>(&'s self) -> impl Iterator> + 's { + self.inner.dfs().map(|inner| Node { + inner, + root: self.root, + }) + } + + #[must_use] + pub fn find(&self, pat: M) -> Option> { + pat.find_node(self.clone()) + } + + pub fn find_all<'s, M: Matcher + 's>( + &'s self, + pat: M, + ) -> impl Iterator> + 's { + let kinds = pat.potential_kinds(); + self.dfs().filter_map(move |cand| { + if let Some(k) = &kinds { + if !k.contains(cand.kind_id().into()) { + return None; + } + } + pat.match_node(cand) + }) + } +} + +/// Tree manipulation API +impl Node<'_, D> { + pub fn replace>(&self, matcher: M, replacer: R) -> Option> { + let matched = matcher.find_node(self.clone())?; + let edit = matched.make_edit(&matcher, &replacer); + Some(edit) + } + + pub fn after(&self) -> Edit { + todo!() + } + pub fn before(&self) -> Edit { + todo!() + } + pub fn append(&self) -> Edit { + todo!() + } + pub fn prepend(&self) -> Edit { + todo!() + } + + /// Empty children. Remove all child node + pub fn empty(&self) -> Option> { + let mut children = self.children().peekable(); + let start = children.peek()?.range().start; + let end = children.last()?.range().end; + Some(Edit:: { + position: start, + deleted_length: end - start, + inserted_text: Vec::new(), + }) + } + + /// Remove the node itself + pub fn remove(&self) -> Edit { + let range = self.range(); + Edit:: { + position: range.start, + deleted_length: range.end - range.start, + inserted_text: Vec::new(), + } + } +} + +#[cfg(test)] +mod test { + use crate::language::{Language, Tsx}; + use crate::tree_sitter::LanguageExt; + #[test] + fn test_is_leaf() { + let root = Tsx.ast_grep("let a = 123"); + let node = root.root(); + assert!(!node.is_leaf()); + } + + #[test] + fn test_children() { + let root = Tsx.ast_grep("let a = 123"); + let node = root.root(); + let children: Vec<_> = node.children().collect(); + assert_eq!(children.len(), 1); + let texts: Vec<_> = children[0] + .children() + .map(|c| c.text().to_string()) + .collect(); + assert_eq!(texts, vec!["let", "a = 123"]); + } + #[test] + fn test_empty() { + let root = Tsx.ast_grep("let a = 123"); + let node = root.root(); + let edit = node.empty().unwrap(); + assert_eq!(edit.inserted_text.len(), 0); + assert_eq!(edit.deleted_length, 11); + assert_eq!(edit.position, 0); + } + + #[test] + fn test_field_children() { + let root = Tsx.ast_grep("let a = 123"); + let node = root.root().find("let a = $A").unwrap(); + let children: Vec<_> = node.field_children("kind").collect(); + assert_eq!(children.len(), 1); + assert_eq!(children[0].text(), "let"); + } + + const MULTI_LINE: &str = " +if (a) { + test(1) +} else { + x +} +"; + + #[test] + fn test_display_context() { + // src, matcher, lead, trail + let cases = [ + ["i()", "i()", "", ""], + ["i()", "i", "", "()"], + [MULTI_LINE, "test", " ", "(1)"], + ]; + // display context should not panic + for [src, matcher, lead, trail] in cases { + let root = Tsx.ast_grep(src); + let node = root.root().find(matcher).expect("should match"); + let display = node.display_context(0, 0); + assert_eq!(display.leading, lead); + assert_eq!(display.trailing, trail); + } + } + + #[test] + fn test_multi_line_context() { + let cases = [ + ["i()", "i()", "", ""], + [MULTI_LINE, "test", "if (a) {\n ", "(1)\n} else {"], + ]; + // display context should not panic + for [src, matcher, lead, trail] in cases { + let root = Tsx.ast_grep(src); + let node = root.root().find(matcher).expect("should match"); + let display = node.display_context(1, 1); + assert_eq!(display.leading, lead); + assert_eq!(display.trailing, trail); + } + } + + #[test] + fn test_replace_all_nested() { + let root = Tsx.ast_grep("Some(Some(1))"); + let node = root.root(); + let edits = node.replace_all("Some($A)", "$A"); + assert_eq!(edits.len(), 1); + assert_eq!(edits[0].inserted_text, "Some(1)".as_bytes()); + } + + #[test] + fn test_replace_all_multiple_sorted() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2)"); + let node = root.root(); + let edits = node.replace_all("Some($A)", "$A"); + // edits must be sorted by position + assert_eq!(edits.len(), 2); + assert_eq!(edits[0].inserted_text, "Some(1)".as_bytes()); + assert_eq!(edits[1].inserted_text, "2".as_bytes()); + } + + #[test] + fn test_inside() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2)"); + let root = root.root(); + let node = root.find("Some(1)").expect("should exist"); + assert!(node.inside("Some($A)")); + } + #[test] + fn test_has() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2)"); + let root = root.root(); + let node = root.find("Some($A)").expect("should exist"); + assert!(node.has("Some(1)")); + } + #[test] + fn precedes() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2);"); + let root = root.root(); + let node = root.find("Some($A);").expect("should exist"); + assert!(node.precedes("Some(2);")); + } + #[test] + fn follows() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2);"); + let root = root.root(); + let node = root.find("Some(2);").expect("should exist"); + assert!(node.follows("Some(Some(1));")); + } + + #[test] + fn test_field() { + let root = Tsx.ast_grep("class A{}"); + let root = root.root(); + let node = root.find("class $C {}").expect("should exist"); + assert!(node.field("name").is_some()); + assert!(node.field("none").is_none()); + } + #[test] + fn test_child_by_field_id() { + let root = Tsx.ast_grep("class A{}"); + let root = root.root(); + let node = root.find("class $C {}").expect("should exist"); + let id = Tsx.field_to_id("name").unwrap(); + assert!(node.child_by_field_id(id).is_some()); + assert!(node.child_by_field_id(id + 1).is_none()); + } + + #[test] + fn test_remove() { + let root = Tsx.ast_grep("Some(Some(1)); Some(2);"); + let root = root.root(); + let node = root.find("Some(2);").expect("should exist"); + let edit = node.remove(); + assert_eq!(edit.position, 15); + assert_eq!(edit.deleted_length, 8); + } + + #[test] + fn test_ascii_pos() { + let root = Tsx.ast_grep("a"); + let root = root.root(); + let node = root.find("$A").expect("should exist"); + assert_eq!(node.start_pos().line(), 0); + assert_eq!(node.start_pos().column(&*node), 0); + assert_eq!(node.end_pos().line(), 0); + assert_eq!(node.end_pos().column(&*node), 1); + } + + #[test] + fn test_unicode_pos() { + let root = Tsx.ast_grep("πŸ¦€"); + let root = root.root(); + let node = root.find("$A").expect("should exist"); + assert_eq!(node.start_pos().line(), 0); + assert_eq!(node.start_pos().column(&*node), 0); + assert_eq!(node.end_pos().line(), 0); + assert_eq!(node.end_pos().column(&*node), 1); + let root = Tsx.ast_grep("\n πŸ¦€πŸ¦€"); + let root = root.root(); + let node = root.find("$A").expect("should exist"); + assert_eq!(node.start_pos().line(), 1); + assert_eq!(node.start_pos().column(&*node), 2); + assert_eq!(node.end_pos().line(), 1); + assert_eq!(node.end_pos().column(&*node), 4); + } +} diff --git a/crates/ast-engine/src/ops.rs b/crates/ast-engine/src/ops.rs new file mode 100644 index 0000000..575a49b --- /dev/null +++ b/crates/ast-engine/src/ops.rs @@ -0,0 +1,567 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::matcher::{MatchAll, MatchNone, Matcher}; +use crate::meta_var::MetaVarEnv; +use crate::{Doc, Node}; +use bit_set::BitSet; +use std::borrow::Cow; + +pub struct And { + pattern1: P1, + pattern2: P2, +} + +impl Matcher for And +where + P1: Matcher, + P2: Matcher, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + // keep the original env intact until both arms match + let mut new_env = Cow::Borrowed(env.as_ref()); + let node = self.pattern1.match_node_with_env(node, &mut new_env)?; + let ret = self.pattern2.match_node_with_env(node, &mut new_env)?; + // both succeed – commit the combined env + *env = Cow::Owned(new_env.into_owned()); + Some(ret) + } + + fn potential_kinds(&self) -> Option { + let set1 = self.pattern1.potential_kinds(); + let set2 = self.pattern2.potential_kinds(); + // if both constituent have Some(bitset), intersect them + // otherwise returns either of the non-null set + match (&set1, &set2) { + (Some(s1), Some(s2)) => Some(s1.intersection(s2).collect()), + _ => set1.xor(set2), + } + } +} + +// we pre-compute and cache potential_kinds. So patterns should not be mutated. +// Box<[P]> is used here for immutability so that kinds will never be invalidated. +#[derive(Clone, Debug)] +pub struct All { + patterns: Box<[P]>, + kinds: Option, +} + +impl All

{ + pub fn new>(patterns: PS) -> Self { + let patterns: Box<[P]> = patterns.into_iter().collect(); + let kinds = Self::compute_kinds(&patterns); + Self { patterns, kinds } + } + + fn compute_kinds(patterns: &[P]) -> Option { + let mut set: Option = None; + for pattern in patterns { + let Some(n) = pattern.potential_kinds() else { + continue; + }; + if let Some(set) = set.as_mut() { + set.intersect_with(&n); + } else { + set = Some(n); + } + } + set + } + + #[must_use] + pub const fn inner(&self) -> &[P] { + &self.patterns + } +} + +impl Matcher for All

{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + if let Some(kinds) = &self.kinds { + if !kinds.contains(node.kind_id().into()) { + return None; + } + } + let mut new_env = Cow::Borrowed(env.as_ref()); + let all_satisfied = self + .patterns + .iter() + .all(|p| p.match_node_with_env(node.clone(), &mut new_env).is_some()); + if all_satisfied { + *env = Cow::Owned(new_env.into_owned()); + Some(node) + } else { + None + } + } + + fn potential_kinds(&self) -> Option { + self.kinds.clone() + } +} + +// Box<[P]> for immutability and potential_kinds cache correctness +#[derive(Clone, Debug)] +pub struct Any

{ + patterns: Box<[P]>, + kinds: Option, +} + +impl Any

{ + pub fn new>(patterns: PS) -> Self { + let patterns: Box<[P]> = patterns.into_iter().collect(); + let kinds = Self::compute_kinds(&patterns); + Self { patterns, kinds } + } + + fn compute_kinds(patterns: &[P]) -> Option { + let mut set = BitSet::new(); + for pattern in patterns { + let n = pattern.potential_kinds()?; + set.union_with(&n); + } + Some(set) + } + + #[must_use] + pub const fn inner(&self) -> &[P] { + &self.patterns + } +} + +impl Matcher for Any { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + if let Some(kinds) = &self.kinds { + if !kinds.contains(node.kind_id().into()) { + return None; + } + } + let mut new_env = Cow::Borrowed(env.as_ref()); + let found = self.patterns.iter().find_map(|p| { + new_env = Cow::Borrowed(env.as_ref()); + p.match_node_with_env(node.clone(), &mut new_env) + }); + if found.is_some() { + *env = Cow::Owned(new_env.into_owned()); + Some(node) + } else { + None + } + } + + fn potential_kinds(&self) -> Option { + self.kinds.clone() + } +} + +#[derive(Clone, Debug)] +pub struct Or { + pattern1: P1, + pattern2: P2, +} + +impl Matcher for Or +where + P1: Matcher, + P2: Matcher, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let mut new_env = Cow::Borrowed(env.as_ref()); + if let Some(ret) = self + .pattern1 + .match_node_with_env(node.clone(), &mut new_env) + { + *env = Cow::Owned(new_env.into_owned()); + Some(ret) + } else { + self.pattern2.match_node_with_env(node, env) + } + } + + fn potential_kinds(&self) -> Option { + let mut set1 = self.pattern1.potential_kinds()?; + let set2 = self.pattern2.potential_kinds()?; + set1.union_with(&set2); + Some(set1) + } +} + +#[derive(Clone, Debug)] +pub struct Not { + not: M, +} + +impl Not { + pub const fn new(not: M) -> Self { + Self { not } + } + + pub const fn inner(&self) -> &M { + &self.not + } +} +impl

Matcher for Not

+where + P: Matcher, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + self.not + .match_node_with_env(node.clone(), env) + .xor(Some(node)) + } +} + +#[derive(Clone, Debug)] +pub struct Op { + inner: M, +} + +impl Matcher for Op +where + M: Matcher, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + self.inner.match_node_with_env(node, env) + } + + fn potential_kinds(&self) -> Option { + self.inner.potential_kinds() + } +} + +/* +pub struct Predicate { + func: F, +} + +impl Matcher for Predicate +where + L: Language, + F: for<'tree> Fn(&Node<'tree, StrDoc>) -> bool, +{ + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut MetaVarEnv<'tree, D>, + ) -> Option> { + (self.func)(&node).then_some(node) + } +} +*/ + +/* +// we don't need specify M for static method +impl Op { + pub fn func(func: F) -> Predicate + where + F: for<'tree> Fn(&Node<'tree, StrDoc>) -> bool, + { + Predicate { func } + } +} +*/ + +impl Op { + pub const fn not(pattern: M) -> Not { + Not { not: pattern } + } +} + +impl Op { + pub const fn every(pattern: M) -> Op> { + Op { + inner: And { + pattern1: pattern, + pattern2: MatchAll, + }, + } + } + pub const fn either(pattern: M) -> Op> { + Op { + inner: Or { + pattern1: pattern, + pattern2: MatchNone, + }, + } + } + + pub fn all>(patterns: MS) -> All { + All::new(patterns) + } + + pub fn any>(patterns: MS) -> Any { + Any::new(patterns) + } + + pub const fn new(matcher: M) -> Self { + Self { inner: matcher } + } +} + +type NestedAnd = And, O>; +impl Op> { + pub fn and(self, other: O) -> Op> { + Op { + inner: And { + pattern1: self.inner, + pattern2: other, + }, + } + } +} + +type NestedOr = Or, O>; +impl Op> { + pub fn or(self, other: O) -> Op> { + Op { + inner: Or { + pattern1: self.inner, + pattern2: other, + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::Root; + use crate::language::Tsx; + use crate::matcher::MatcherExt; + use crate::meta_var::MetaVarEnv; + + fn test_find(matcher: &impl Matcher, code: &str) { + let node = Root::str(code, Tsx); + assert!(matcher.find_node(node.root()).is_some()); + } + fn test_not_find(matcher: &impl Matcher, code: &str) { + let node = Root::str(code, Tsx); + assert!(matcher.find_node(node.root()).is_none()); + } + fn find_all(matcher: impl Matcher, code: &str) -> Vec { + let node = Root::str(code, Tsx); + node.root() + .find_all(matcher) + .map(|n| n.text().to_string()) + .collect() + } + + #[test] + fn test_or() { + let matcher = Or { + pattern1: "let a = 1", + pattern2: "const b = 2", + }; + test_find(&matcher, "let a = 1"); + test_find(&matcher, "const b = 2"); + test_not_find(&matcher, "let a = 2"); + test_not_find(&matcher, "const a = 1"); + test_not_find(&matcher, "let b = 2"); + test_not_find(&matcher, "const b = 1"); + } + + #[test] + fn test_not() { + let matcher = Not { not: "let a = 1" }; + test_find(&matcher, "const b = 2"); + } + + #[test] + fn test_and() { + let matcher = And { + pattern1: "let a = $_", + pattern2: Not { not: "let a = 123" }, + }; + test_find(&matcher, "let a = 233"); + test_find(&matcher, "let a = 456"); + test_not_find(&matcher, "let a = 123"); + } + + #[test] + fn test_api_and() { + let matcher = Op::every("let a = $_").and(Op::not("let a = 123")); + test_find(&matcher, "let a = 233"); + test_find(&matcher, "let a = 456"); + test_not_find(&matcher, "let a = 123"); + } + + #[test] + fn test_api_or() { + let matcher = Op::either("let a = 1").or("const b = 2"); + test_find(&matcher, "let a = 1"); + test_find(&matcher, "const b = 2"); + test_not_find(&matcher, "let a = 2"); + test_not_find(&matcher, "const a = 1"); + test_not_find(&matcher, "let b = 2"); + test_not_find(&matcher, "const b = 1"); + } + #[test] + fn test_multiple_match() { + let sequential = find_all("$A + b", "let f = () => a + b; let ff = () => c + b"); + assert_eq!(sequential.len(), 2); + let nested = find_all( + "function $A() { $$$ }", + "function a() { function b() { b } }", + ); + assert_eq!(nested.len(), 2); + } + + #[test] + fn test_multiple_match_order() { + let ret = find_all( + "$A + b", + "let f = () => () => () => a + b; let ff = () => c + b", + ); + assert_eq!(ret, ["a + b", "c + b"], "should match source code order"); + } + + /* + #[test] + fn test_api_func() { + let matcher = Op::func(|n| n.text().contains("114514")); + test_find(&matcher, "let a = 114514"); + test_not_find(&matcher, "let a = 1919810"); + } + */ + use crate::Pattern; + trait TsxMatcher { + fn t(self) -> Pattern; + } + impl TsxMatcher for &str { + fn t(self) -> Pattern { + Pattern::new(self, Tsx) + } + } + + #[test] + fn test_and_kinds() { + // intersect None kinds + let matcher = Op::every("let a = $_".t()).and(Op::not("let a = 123".t())); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + let matcher = Op::every(Op::not("let a = $_".t())).and("let a = 123".t()); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // intersect Same kinds + let matcher = Op::every("let a = $_".t()).and("let b = 123".t()); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // intersect different kinds + let matcher = Op::every("let a = 1".t()).and("console.log(1)".t()); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(0)); + // two None kinds + let matcher = Op::every(Op::not("let a = $_".t())).and(Op::not("let a = 123".t())); + assert_eq!(matcher.potential_kinds(), None); + } + + #[test] + fn test_or_kinds() { + // union None kinds + let matcher = Op::either("let a = $_".t()).or(Op::not("let a = 123".t())); + assert_eq!(matcher.potential_kinds(), None); + let matcher = Op::either(Op::not("let a = $_".t())).or("let a = 123".t()); + assert_eq!(matcher.potential_kinds(), None); + // union Same kinds + let matcher = Op::either("let a = $_".t()).or("let b = 123".t()); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // union different kinds + let matcher = Op::either("let a = 1".t()).or("console.log(1)".t()); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(2)); + // two None kinds + let matcher = Op::either(Op::not("let a = $_".t())).or(Op::not("let a = 123".t())); + assert_eq!(matcher.potential_kinds(), None); + } + + #[test] + fn test_all_kinds() { + // intersect None kinds + let matcher = Op::all(["let a = $_".t(), "$A".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + let matcher = Op::all(["$A".t(), "let a = $_".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // intersect Same kinds + let matcher = Op::all(["let a = $_".t(), "let b = 123".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // intersect different kinds + let matcher = Op::all(["let a = 1".t(), "console.log(1)".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(0)); + // two None kinds + let matcher = Op::all(["$A".t(), "$B".t()]); + assert_eq!(matcher.potential_kinds(), None); + } + + #[test] + fn test_any_kinds() { + // union None kinds + let matcher = Op::any(["let a = $_".t(), "$A".t()]); + assert_eq!(matcher.potential_kinds(), None); + let matcher = Op::any(["$A".t(), "let a = $_".t()]); + assert_eq!(matcher.potential_kinds(), None); + // union Same kinds + let matcher = Op::any(["let a = $_".t(), "let b = 123".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(1)); + // union different kinds + let matcher = Op::any(["let a = 1".t(), "console.log(1)".t()]); + assert_eq!(matcher.potential_kinds().map(|v| v.len()), Some(2)); + // two None kinds + let matcher = Op::any(["$A".t(), "$B".t()]); + assert_eq!(matcher.potential_kinds(), None); + } + + #[test] + fn test_or_revert_env() { + let matcher = Op::either(Op::every("foo($A)".t()).and("impossible".t())).or("foo($B)".t()); + let code = Root::str("foo(123)", Tsx); + let matches = code.root().find(matcher).expect("should found"); + assert!(matches.get_env().get_match("A").is_none()); + assert_eq!(matches.get_env().get_match("B").unwrap().text(), "123"); + } + + #[test] + fn test_any_revert_env() { + let matcher = Op::any([ + Op::all(["foo($A)".t(), "impossible".t()]), + Op::all(["foo($B)".t()]), + ]); + let code = Root::str("foo(123)", Tsx); + let matches = code.root().find(matcher).expect("should found"); + assert!(matches.get_env().get_match("A").is_none()); + assert_eq!(matches.get_env().get_match("B").unwrap().text(), "123"); + } + + // gh #1225 + #[test] + fn test_all_revert_env() { + let matcher = Op::all(["$A(123)".t(), "$B(456)".t()]); + let code = Root::str("foo(123)", Tsx); + let node = code.root().find("foo($C)").expect("should exist"); + let node = node.get_node().clone(); + let mut env = Cow::Owned(MetaVarEnv::new()); + assert!(matcher.match_node_with_env(node, &mut env).is_none()); + assert!(env.get_match("A").is_none()); + } +} diff --git a/crates/ast-engine/src/pinned.rs b/crates/ast-engine/src/pinned.rs new file mode 100644 index 0000000..88c4be3 --- /dev/null +++ b/crates/ast-engine/src/pinned.rs @@ -0,0 +1,170 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::Doc; +use crate::NodeMatch; +use crate::node::{Node, Root}; + +// ast-grep Node contains a reference to Root. It implies that +// node can be used only when the Root is valid and not dropped. +// By default, tree-sitter Node<'r> is scoped by ast Root's lifetime +// That is, Node can be only used when root is on the call stack (RAII) +// It is usually sufficient but for following scenario the brwchck is too conservative: +// 1. passing Root and Node across threads +// 2. passing Root and Node across FFI boundary (from Rust to napi/pyo3) +// +// This resembles self-referencing pattern and we can use solution similar to PinBox. +// Actually, tree-sitter's Node reference is already pointing to a heap address. +// N.B. it is not documented but can be inferred from the source code and concurrency doc paragraph. +// https://github.com/tree-sitter/tree-sitter/blob/20924fa4cdeb10d82ac308481e39bf8519334e55/lib/src/tree.c#L9-L20 +// https://github.com/tree-sitter/tree-sitter/blob/20924fa4cdeb10d82ac308481e39bf8519334e55/lib/src/tree.c#L37-L39 +// https://tree-sitter.github.io/tree-sitter/using-parsers#concurrency +// +// So **as long as Root is not dropped, the Tree will not be freed. And Node will be valid.** +// +// PinnedNodeData provides a systematical way to keep Root live and `T` can be anything containing valid Nodes. +// Nodes' lifetime is 'static, meaning the Node is not borrow checked instead of living throughout the program. +// There are two ways to use PinnedNodeData +// 1. use it by borrowing. PinnedNodeData guarantees Root is alive and Node in T is valid. +// Notable example is sending Node across threads. +// 2. take its ownership. Users should take extra care to keep Root alive. +// Notable example is sending Root to JavaScript/Python heap. +#[doc(hidden)] +pub struct PinnedNodeData { + pin: Root, + data: T, +} + +impl PinnedNodeData { + #[allow(clippy::deref_addrof)] + pub fn new(pin: Root, func: F) -> Self + where + F: FnOnce(&'static Root) -> T, + { + // TODO: explain why unsafe works here and what guarantee it needs + let reference = unsafe { &*(&raw const pin) as &'static Root }; + let data = func(reference); + Self { pin, data } + } +} + +impl PinnedNodeData +where + T: NodeData, +{ + #[allow(clippy::deref_addrof)] // the lifetimes need to be static + pub fn get_data(&mut self) -> &T::Data { + let pin = unsafe { &*(&raw const self.pin) as &'static Root }; + self.data.visit_nodes(|n| unsafe { pin.readopt(n) }); + self.data.get_data() + } + pub fn into_raw(self) -> (Root, T) { + (self.pin, self.data) + } +} + +/// # Safety +/// TODO: explain unsafe trait +pub unsafe trait NodeData { + type Data; + fn get_data(&self) -> &Self::Data; + fn visit_nodes(&mut self, f: F) + where + F: FnMut(&mut Node<'_, D>); +} + +unsafe impl NodeData for Node<'static, D> { + type Data = Self; + fn get_data(&self) -> &Self::Data { + self + } + fn visit_nodes(&mut self, mut f: F) + where + F: FnMut(&mut Node<'_, D>), + { + f(self); + } +} + +unsafe impl NodeData for NodeMatch<'static, D> { + type Data = Self; + fn get_data(&self) -> &Self::Data { + self + } + fn visit_nodes(&mut self, mut f: F) + where + F: FnMut(&mut Node<'_, D>), + { + // update the matched Node + f(unsafe { self.get_node_mut() }); + // update the meta variable captured + let env = self.get_env_mut(); + env.visit_nodes(f); + } +} + +unsafe impl NodeData for Vec> { + type Data = Self; + fn get_data(&self) -> &Self::Data { + self + } + fn visit_nodes(&mut self, mut f: F) + where + F: FnMut(&mut Node<'_, D>), + { + for n in self { + n.visit_nodes(&mut f); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use crate::node::Root; + use crate::tree_sitter::StrDoc; + + fn return_from_func() -> PinnedNodeData, Node<'static, StrDoc>> { + let root = Root::str("let a = 123", Tsx); + PinnedNodeData::new(root, |r| r.root().child(0).unwrap().child(1).unwrap()) + } + + #[test] + fn test_borrow() { + let mut retained = return_from_func(); + let b = retained.get_data(); + assert_eq!(b.text(), "a = 123"); + assert!(matches!(b.lang(), Tsx)); + } + + #[test] + #[ignore] + fn test_node_match() { + todo!() + } + + fn return_vec() -> PinnedNodeData, Vec>>> { + let root = Root::str("let a = 123", Tsx); + PinnedNodeData::new(root, |r| { + r.root() + .child(0) + .unwrap() + .children() + .map(NodeMatch::from) + .collect() + }) + } + + #[test] + fn test_vec_node() { + let mut pinned = return_vec(); + let nodes = pinned.get_data(); + assert!(!nodes.is_empty()); + assert_eq!(nodes[0].text(), "let"); + assert_eq!(nodes[1].text(), "a = 123"); + } +} diff --git a/crates/ast-engine/src/replacer.rs b/crates/ast-engine/src/replacer.rs new file mode 100644 index 0000000..62cb467 --- /dev/null +++ b/crates/ast-engine/src/replacer.rs @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::matcher::Matcher; +use crate::meta_var::{MetaVariableID, Underlying, is_valid_meta_var_char}; +use crate::{Doc, Node, NodeMatch, Root}; +use std::ops::Range; + +pub(crate) use indent::formatted_slice; + +use crate::source::Edit as E; +type Edit = E<::Source>; + +mod indent; +mod structural; +mod template; + +pub use crate::source::Content; +pub use template::{TemplateFix, TemplateFixError}; + +/// Replace meta variable in the replacer string +pub trait Replacer { + fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying; + fn get_replaced_range(&self, nm: &NodeMatch<'_, D>, matcher: impl Matcher) -> Range { + let range = nm.range(); + if let Some(len) = matcher.get_match_len(nm.get_node().clone()) { + range.start..range.start + len + } else { + range + } + } +} + +impl Replacer for str { + fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying { + template::gen_replacement(self, nm) + } +} + +impl Replacer for Root { + fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying { + structural::gen_replacement(self, nm) + } +} + +impl Replacer for &T +where + D: Doc, + T: Replacer + ?Sized, +{ + fn generate_replacement(&self, nm: &NodeMatch) -> Underlying { + (**self).generate_replacement(nm) + } +} + +impl Replacer for Node<'_, D> { + fn generate_replacement(&self, _nm: &NodeMatch<'_, D>) -> Underlying { + let range = self.range(); + self.root.doc.get_source().get_range(range).to_vec() + } +} + +#[derive(Debug, Clone)] +enum MetaVarExtract { + /// $A for captured meta var + Single(MetaVariableID), + /// $$$A for captured ellipsis + Multiple(MetaVariableID), + Transformed(MetaVariableID), +} + +impl MetaVarExtract { + fn used_var(&self) -> &str { + match self { + Self::Single(s) | + Self::Multiple(s) | + Self::Transformed(s) => s, + } + } +} + +fn split_first_meta_var( + src: &str, + meta_char: char, + transform: &[MetaVariableID], +) -> Option<(MetaVarExtract, usize)> { + debug_assert!(src.starts_with(meta_char)); + let mut i = 0; + let mut skipped = 0; + let is_multi = loop { + i += 1; + skipped += meta_char.len_utf8(); + if i == 3 { + break true; + } + if !src[skipped..].starts_with(meta_char) { + break false; + } + }; + // no Anonymous meta var allowed, so _ is not allowed + let i = src[skipped..] + .find(|c: char| !is_valid_meta_var_char(c)) + .unwrap_or(src.len() - skipped); + // no name found + if i == 0 { + return None; + } + let name = src[skipped..skipped + i].to_string(); + let var = if is_multi { + MetaVarExtract::Multiple(name) + } else if transform.contains(&name) { + MetaVarExtract::Transformed(name) + } else { + MetaVarExtract::Single(name) + }; + Some((var, skipped + i)) +} diff --git a/crates/ast-engine/src/replacer/indent.rs b/crates/ast-engine/src/replacer/indent.rs new file mode 100644 index 0000000..ec8bf9a --- /dev/null +++ b/crates/ast-engine/src/replacer/indent.rs @@ -0,0 +1,434 @@ +#![allow(clippy::doc_overindented_list_items)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +/** + This module is for indentation-sensitive replacement. + + Ideally, structural search and replacement should all be based on AST. + But this means our changed AST need to be pretty-printed by structural rules, + which we don't have enough resource to support. An indentation solution is used. + + The algorithm is quite complicated, uncomprehensive, sluggish and buggy. + But let's walk through it by example. + + consider this code + ```ignore + if (true) { + a( + 1 + + 2 + + 3 + ) + } + ``` + + and this pattern and replacement + + ```ignore + // pattern + a($B) + // replacement + c( + $B + ) + ``` + + We need to compute the relative indentation of the captured meta-var. + When we insert the meta-var into replacement, keep the relative indent intact, + while also respecting the replacement indent. + Finally, the whole replacement should replace the matched node + in a manner that maintains the indentation of the source. + + We need to consider multiple indentations. + Key concepts here: + * meta-var node: in this case `$B` in pattern/replacement, or `1+2+3` in source. + * matched node: in this case `a($B)` in pattern, `a(1 + 2 + 3)` in source + * meta-var source indentation: `$B` matches `1+2+3`, the first line's indentation in source code is 4. + * meta-var replacement indentation: in this case 2 + * matched node source indentation: in this case 2 + + ## Extract Meta-var with de-indent + 1. Initial meta-var node B text: + The meta-var source indentation for `$B` is 4. + However, meta-var node does not have the first line indentation. + ```ignore + 1 + + 2 + + 3 + ``` + 2. Deindent meta-var node B, except first line: + De-indenting all lines following the first line by 4 spaces gives us this relative code layout. + + ```ignore + 1 + + 2 + + 3 + ``` + + ## Insert meta-var into replacement with re-indent + + 3. Re-indent by meta-var replacement indentation. + meta-var node $B occurs in replace with first line indentation of 2. + We need to re-indent the meta-var code before replacement, except the first line + ```ignore + 1 + + 2 + + 3 + ``` + + 4. Insert meta-var code in to replacement + ```ignore + c( + 1 + + 2 + + 3 + ) + ``` + + ## Insert replacement into source with re-indent + + 5. Re-indent the replaced template code except first line + The whole matched node first line indentation is 2. + We need to reindent the replacement code by 2, except the first line. + ```ignore + c( + 1 + + 2 + + 3 + ) + ``` + + 6. Inserted replacement code to original tree + + ```ignore + if (true) { + c( + 1 + + 2 + + 3 + ) + } + ``` + + The steps 3,4 and steps 5,6 are similar. We can define a `replace_with_indent` to it. + Following the same path, we can define a `extract_with_deindent` for steps 1,2 +*/ +use crate::source::Content; +use std::borrow::Cow; +use std::cmp::Ordering; +use std::ops::Range; + +/// We assume `NEW_LINE`, `TAB`, `SPACE` is only one code unit. +/// This is sufficiently true for utf8, utf16 and char. +fn get_new_line() -> C::Underlying { + C::decode_str("\n")[0].clone() +} +fn get_space() -> C::Underlying { + C::decode_str(" ")[0].clone() +} + +const MAX_LOOK_AHEAD: usize = 512; + +/// Represents how we de-indent matched meta var. +pub enum DeindentedExtract<'a, C: Content> { + /// If meta-var is only one line, no need to de-indent/re-indent + SingleLine(&'a [C::Underlying]), + /// meta-var's has multiple lines, may need re-indent + MultiLine(&'a [C::Underlying], usize), +} + +/// Returns [`DeindentedExtract`] for later de-indent/re-indent. +pub fn extract_with_deindent( + content: &C, + range: Range, +) -> DeindentedExtract<'_, C> { + let extract_slice = content.get_range(range.clone()); + // no need to compute indentation for single line + if !extract_slice.contains(&get_new_line::()) { + return DeindentedExtract::SingleLine(extract_slice); + } + let indent = get_indent_at_offset::(content.get_range(0..range.start)); + DeindentedExtract::MultiLine(extract_slice, indent) +} + +#[allow(dead_code)] +fn deindent_slice<'a, C: Content>( + slice: &'a [C::Underlying], + content: &'a C, + start: usize, +) -> DeindentedExtract<'a, C> { + if !slice.contains(&get_new_line::()) { + return DeindentedExtract::SingleLine(slice); + } + let indent = get_indent_at_offset::(content.get_range(0..start)); + DeindentedExtract::MultiLine(slice, indent) +} + +pub fn formatted_slice<'a, C: Content>( + slice: &'a [C::Underlying], + content: &'a C, + start: usize, +) -> Cow<'a, [C::Underlying]> { + if !slice.contains(&get_new_line::()) { + return Cow::Borrowed(slice); + } + Cow::Owned(indent_lines::(0, &DeindentedExtract::MultiLine(slice, get_indent_at_offset::(content.get_range(0..start)))).into_owned()) +} + +pub fn indent_lines<'a, C: Content>( + indent: usize, + extract: &'a DeindentedExtract<'a, C>, +) -> Cow<'a, [C::Underlying]> { + use DeindentedExtract::{MultiLine, SingleLine}; + let (lines, original_indent) = match extract { + SingleLine(line) => return Cow::Borrowed(line), + MultiLine(lines, ind) => (lines, ind), + }; + match original_indent.cmp(&indent) { + // if old and new indent match, just return old lines + Ordering::Equal => Cow::Borrowed(lines), + // need strip old indent + Ordering::Greater => Cow::Owned(remove_indent::(original_indent - indent, lines)), + // need add missing indent + Ordering::Less => Cow::Owned(indent_lines_impl::( + indent - original_indent, + lines.split(|b| *b == get_new_line::()), + )), + } +} + +fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec +where + C: Content + 'a, + Lines: Iterator, +{ + let mut ret = vec![]; + let space = get_space::(); + let leading: Vec<_> = std::iter::repeat_n(space, indent).collect(); + // first line never got indent + if let Some(line) = lines.next() { + ret.extend(line.iter().cloned()); + } + let new_line = get_new_line::(); + for line in lines { + ret.push(new_line.clone()); + ret.extend(leading.clone()); + ret.extend(line.iter().cloned()); + } + ret +} + +/// returns 0 if no indent is found before the offset +/// either truly no indent exists, or the offset is in a long line +pub fn get_indent_at_offset(src: &[C::Underlying]) -> usize { + let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD; + + let mut indent = 0; + let new_line = get_new_line::(); + let space = get_space::(); + // TODO: support TAB. only whitespace is supported now + for c in src[lookahead..].iter().rev() { + if *c == new_line { + return indent; + } + if *c == space { + indent += 1; + } else { + indent = 0; + } + } + // lookahead == 0 means we have indentation at first line. + if lookahead == 0 && indent != 0 { + indent + } else { + 0 + } +} + +// NOTE: we assume input is well indented. +// following line's should have fewer indentation than initial line +fn remove_indent(indent: usize, src: &[C::Underlying]) -> Vec { + let indentation: Vec<_> = std::iter::repeat_n(get_space::(), indent) + .collect(); + let new_line = get_new_line::(); + let lines: Vec<_> = src + .split(|b| *b == new_line) + .map(|line| match line.strip_prefix(&*indentation) { + Some(stripped) => stripped, + None => line, + }) + .collect(); + lines.join(&new_line).clone() +} + +#[cfg(test)] +mod test { + use super::*; + + fn test_deindent(source: &str, expected: &str, offset: usize) { + let source = source.to_string(); + let expected = expected.trim(); + let start = source[offset..] + .chars() + .take_while(|n| n.is_whitespace()) + .count() + + offset; + let trailing_white = source + .chars() + .rev() + .take_while(|n| n.is_whitespace()) + .count(); + let end = source.chars().count() - trailing_white; + let extracted = extract_with_deindent(&source, start..end); + let result_bytes = indent_lines::(0, &extracted); + let actual = std::str::from_utf8(&result_bytes).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn test_simple_deindent() { + let src = r" + def test(): + pass"; + let expected = r" +def test(): + pass"; + test_deindent(src, expected, 0); + } + + #[test] + fn test_first_line_indent_deindent() { + // note this indentation has no newline + let src = r" def test(): + pass"; + let expected = r" +def test(): + pass"; + test_deindent(src, expected, 0); + } + + #[test] + fn test_space_in_middle_deindent() { + let src = r" +a = lambda: + pass"; + let expected = r" +lambda: + pass"; + test_deindent(src, expected, 4); + } + + #[test] + fn test_middle_deindent() { + let src = r" + a = lambda: + pass"; + let expected = r" +lambda: + pass"; + test_deindent(src, expected, 6); + } + + #[test] + fn test_nested_deindent() { + let src = r" +def outer(): + def test(): + pass"; + let expected = r" +def test(): + pass"; + test_deindent(src, expected, 13); + } + + #[test] + fn test_no_deindent() { + let src = r" +def test(): + pass +"; + test_deindent(src, src, 0); + } + + #[test] + fn test_malformed_deindent() { + let src = r" + def test(): +pass +"; + let expected = r" +def test(): +pass +"; + test_deindent(src, expected, 0); + } + + #[test] + fn test_long_line_no_deindent() { + let src = format!("{}abc\n def", " ".repeat(MAX_LOOK_AHEAD + 1)); + test_deindent(&src, &src, 0); + } + + fn test_replace_with_indent(target: &str, start: usize, inserted: &str) -> String { + let target = target.to_string(); + let replace_lines = DeindentedExtract::MultiLine(inserted.as_bytes(), 0); + let indent = get_indent_at_offset::(&target.as_bytes()[..start]); + let ret = indent_lines::(indent, &replace_lines); + String::from_utf8(ret.to_vec()).unwrap() + } + + #[test] + fn test_simple_replace() { + let target = ""; + let inserted = "def abc(): pass"; + let actual = test_replace_with_indent(target, 0, inserted); + assert_eq!(actual, inserted); + let inserted = "def abc():\n pass"; + let actual = test_replace_with_indent(target, 0, inserted); + assert_eq!(actual, inserted); + } + + #[test] + fn test_indent_replace() { + let target = " "; + let inserted = "def abc(): pass"; + let actual = test_replace_with_indent(target, 2, inserted); + assert_eq!(actual, "def abc(): pass"); + let inserted = "def abc():\n pass"; + let actual = test_replace_with_indent(target, 2, inserted); + assert_eq!(actual, "def abc():\n pass"); + let target = " "; // 4 spaces, but insert at 2 + let actual = test_replace_with_indent(target, 2, inserted); + assert_eq!(actual, "def abc():\n pass"); + let target = " "; // 4 spaces, insert at 4 + let actual = test_replace_with_indent(target, 4, inserted); + assert_eq!(actual, "def abc():\n pass"); + } + + #[test] + fn test_leading_text_replace() { + let target = "a = "; + let inserted = "def abc(): pass"; + let actual = test_replace_with_indent(target, 4, inserted); + assert_eq!(actual, "def abc(): pass"); + let inserted = "def abc():\n pass"; + let actual = test_replace_with_indent(target, 4, inserted); + assert_eq!(actual, "def abc():\n pass"); + } + + #[test] + fn test_leading_text_indent_replace() { + let target = " a = "; + let inserted = "def abc(): pass"; + let actual = test_replace_with_indent(target, 6, inserted); + assert_eq!(actual, "def abc(): pass"); + let inserted = "def abc():\n pass"; + let actual = test_replace_with_indent(target, 6, inserted); + assert_eq!(actual, "def abc():\n pass"); + } +} diff --git a/crates/ast-engine/src/replacer/structural.rs b/crates/ast-engine/src/replacer/structural.rs new file mode 100644 index 0000000..0907eb5 --- /dev/null +++ b/crates/ast-engine/src/replacer/structural.rs @@ -0,0 +1,217 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::{Edit, Underlying}; +use crate::language::Language; +use crate::meta_var::MetaVarEnv; +use crate::source::{Content, SgNode}; +use crate::{Doc, Node, NodeMatch, Root}; + +pub fn gen_replacement(root: &Root, nm: &NodeMatch) -> Underlying { + let edits = collect_edits(root, nm.get_env(), nm.lang()); + merge_edits_to_vec(edits, root) +} + +fn collect_edits(root: &Root, env: &MetaVarEnv, lang: &D::Lang) -> Vec> { + let mut node = root.root(); + let root_id = node.node_id(); + let mut edits = vec![]; + + // this is a post-order DFS that stops traversal when the node matches + 'outer: loop { + if let Some(text) = get_meta_var_replacement(&node, env, lang) { + let range = node.range(); + let position = range.start; + let length = range.len(); + edits.push(Edit:: { + position, + deleted_length: length, + inserted_text: text, + }); + } else if let Some(first_child) = node.child(0) { + // traverse down to child + node = first_child; + continue; + } else if node.inner.is_missing() { + // TODO: better handling missing node + if let Some(sibling) = node.next() { + node = sibling; + continue; + } + break; + } + // traverse up to parent until getting to root + loop { + // come back to the root node, terminating dfs + if node.node_id() == root_id { + break 'outer; + } + if let Some(sibling) = node.next() { + node = sibling; + break; + } + node = node.parent().unwrap(); + } + } + // add the missing one + edits.push(Edit:: { + position: root.root().range().end, + deleted_length: 0, + inserted_text: vec![], + }); + edits +} + +fn merge_edits_to_vec(edits: Vec>, root: &Root) -> Underlying { + let mut ret = vec![]; + let mut start = 0; + for edit in edits { + debug_assert!(start <= edit.position, "Edit must be ordered!"); + ret.extend( + root.doc + .get_source() + .get_range(start..edit.position) + .iter() + .cloned(), + ); + ret.extend(edit.inserted_text.iter().cloned()); + start = edit.position + edit.deleted_length; + } + ret +} + +fn get_meta_var_replacement( + node: &Node, + env: &MetaVarEnv, + lang: &D::Lang, +) -> Option> { + if !node.is_named_leaf() { + return None; + } + let meta_var = lang.extract_meta_var(&node.text())?; + let replaced = env.get_var_bytes(&meta_var)?; + Some(replaced.to_vec()) +} + +#[cfg(test)] +mod test { + use crate::language::Tsx; + use crate::meta_var::MetaVarEnv; + use crate::{NodeMatch, Root, replacer::Replacer, tree_sitter::LanguageExt}; + use thread_utils::RapidMap; + + fn test_pattern_replace(replacer: &str, vars: &[(&str, &str)], expected: &str) { + let mut env = MetaVarEnv::new(); + let roots: Vec<_> = vars.iter().map(|(v, p)| (v, Tsx.ast_grep(p))).collect(); + for (var, root) in &roots { + env.insert(var, root.root()); + } + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let replacer = Root::str(replacer, Tsx); + let replaced = replacer.generate_replacement(&node_match); + let replaced = String::from_utf8_lossy(&replaced); + assert_eq!( + replaced, + expected, + "wrong replacement {replaced} {expected} {:?}", + RapidMap::from(env) + ); + } + + #[test] + fn test_no_env() { + test_pattern_replace("let a = 123", &[], "let a = 123"); + test_pattern_replace( + "console.log('hello world'); let b = 123;", + &[], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_single_env() { + test_pattern_replace("let a = $A", &[("A", "123")], "let a = 123"); + test_pattern_replace( + "console.log($HW); let b = 123;", + &[("HW", "'hello world'")], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_multiple_env() { + test_pattern_replace("let $V = $A", &[("A", "123"), ("V", "a")], "let a = 123"); + test_pattern_replace( + "console.log($HW); let $B = 123;", + &[("HW", "'hello world'"), ("B", "b")], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_multiple_occurrences() { + test_pattern_replace("let $A = $A", &[("A", "a")], "let a = a"); + test_pattern_replace("var $A = () => $A", &[("A", "a")], "var a = () => a"); + test_pattern_replace( + "const $A = () => { console.log($B); $A(); };", + &[("B", "'hello world'"), ("A", "a")], + "const a = () => { console.log('hello world'); a(); };", + ); + } + + fn test_ellipsis_replace(replacer: &str, vars: &[(&str, &str)], expected: &str) { + let mut env = MetaVarEnv::new(); + let roots: Vec<_> = vars.iter().map(|(v, p)| (v, Tsx.ast_grep(p))).collect(); + for (var, root) in &roots { + env.insert_multi(var, root.root().children().collect()); + } + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let replacer = Root::str(replacer, Tsx); + let replaced = replacer.generate_replacement(&node_match); + let replaced = String::from_utf8_lossy(&replaced); + assert_eq!( + replaced, + expected, + "wrong replacement {replaced} {expected} {:?}", + RapidMap::from(env) + ); + } + + #[test] + fn test_ellipsis_meta_var() { + test_ellipsis_replace( + "let a = () => { $$$B }", + &[("B", "alert('works!')")], + "let a = () => { alert('works!') }", + ); + test_ellipsis_replace( + "let a = () => { $$$B }", + &[("B", "alert('works!');console.log(123)")], + "let a = () => { alert('works!');console.log(123) }", + ); + } + + #[test] + fn test_multi_ellipsis() { + test_ellipsis_replace( + "import {$$$A, B, $$$C} from 'a'", + &[("A", "A"), ("C", "C")], + "import {A, B, C} from 'a'", + ); + } + + #[test] + fn test_replace_in_string() { + test_pattern_replace("'$A'", &[("A", "123")], "'123'"); + } + + #[test] + fn test_nested_matching_replace() { + // TODO + } +} diff --git a/crates/ast-engine/src/replacer/template.rs b/crates/ast-engine/src/replacer/template.rs new file mode 100644 index 0000000..d7718fc --- /dev/null +++ b/crates/ast-engine/src/replacer/template.rs @@ -0,0 +1,368 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines}; +use super::{MetaVarExtract, Replacer, split_first_meta_var}; +use crate::NodeMatch; +use crate::language::Language; +use crate::meta_var::{MetaVarEnv, Underlying}; +use crate::source::{Content, Doc}; + +use thiserror::Error; + +use std::borrow::Cow; +use thread_utils::{RapidSet, get_set}; + +#[derive(Debug, Clone)] +pub enum TemplateFix { + // no meta_var, pure text + Textual(String), + WithMetaVar(Template), +} + +#[derive(Debug, Error)] +pub enum TemplateFixError {} + +impl TemplateFix { + pub fn try_new(template: &str, lang: &L) -> Result { + Ok(create_template(template, lang.meta_var_char(), &[])) + } + + pub fn with_transform(tpl: &str, lang: &L, trans: &[String]) -> Self { + create_template(tpl, lang.meta_var_char(), trans) + } + + #[must_use] + pub fn used_vars(&self) -> RapidSet<&str> { + let template = match self { + Self::WithMetaVar(t) => t, + Self::Textual(_) => return get_set(), + }; + template.vars.iter().map(|v| v.0.used_var()).collect() + } +} + +impl Replacer for TemplateFix { + fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying { + let leading = nm.get_doc().get_source().get_range(0..nm.range().start); + let indent = get_indent_at_offset::(leading); + let bytes = replace_fixer(self, nm.get_env()); + let replaced = DeindentedExtract::MultiLine(&bytes, 0); + indent_lines::(indent, &replaced).to_vec() + } +} + +type Indent = usize; + +#[derive(Debug, Clone)] +pub struct Template { + fragments: Vec, + vars: Vec<(MetaVarExtract, Indent)>, +} + +fn create_template(tmpl: &str, mv_char: char, transforms: &[String]) -> TemplateFix { + let mut fragments = vec![]; + let mut vars = vec![]; + let mut offset = 0; + let mut len = 0; + while let Some(i) = tmpl[len + offset..].find(mv_char) { + if let Some((meta_var, skipped)) = + split_first_meta_var(&tmpl[len + offset + i..], mv_char, transforms) + { + fragments.push(tmpl[len..len + offset + i].to_string()); + // NB we have to count ident of the full string + let indent = get_indent_at_offset::(&tmpl.as_bytes()[..len + offset + i]); + vars.push((meta_var, indent)); + len += skipped + offset + i; + offset = 0; + continue; + } + debug_assert!(len + offset + i < tmpl.len()); + // offset = 0, i = 0, + // 0 1 2 + // $ a $ + offset = offset + i + 1; + } + if fragments.is_empty() { + TemplateFix::Textual(tmpl[len..].to_string()) + } else { + fragments.push(tmpl[len..].to_string()); + TemplateFix::WithMetaVar(Template { fragments, vars }) + } +} + +fn replace_fixer(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underlying { + let template = match fixer { + TemplateFix::Textual(n) => return D::Source::decode_str(n).to_vec(), + TemplateFix::WithMetaVar(t) => t, + }; + let mut ret = vec![]; + let mut frags = template.fragments.iter(); + let vars = template.vars.iter(); + if let Some(frag) = frags.next() { + ret.extend_from_slice(&D::Source::decode_str(frag)); + } + for ((var, indent), frag) in vars.zip(frags) { + if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) { + ret.extend_from_slice(&bytes); + } + ret.extend_from_slice(&D::Source::decode_str(frag)); + } + ret +} + +fn maybe_get_var<'e, 't, C, D>( + env: &'e MetaVarEnv<'t, D>, + var: &MetaVarExtract, + indent: usize, +) -> Option> +where + C: Content + 'e, + D: Doc, +{ + let (source, range) = match var { + MetaVarExtract::Transformed(name) => { + // transformed source does not have range, directly return bytes + let source = env.get_transformed(name)?; + let de_intended = DeindentedExtract::MultiLine(source, 0); + let bytes = indent_lines::(indent, &de_intended); + return Some(Cow::Owned(bytes.into())); + } + MetaVarExtract::Single(name) => { + let replaced = env.get_match(name)?; + let source = replaced.get_doc().get_source(); + let range = replaced.range(); + (source, range) + } + MetaVarExtract::Multiple(name) => { + let nodes = env.get_multiple_matches(name); + if nodes.is_empty() { + return None; + } + // NOTE: start_byte is not always index range of source's slice. + // e.g. start_byte is still byte_offset in utf_16 (napi). start_byte + // so we need to call source's get_range method + let start = nodes[0].range().start; + let end = nodes[nodes.len() - 1].range().end; + let source = nodes[0].get_doc().get_source(); + (source, start..end) + } + }; + let extracted = extract_with_deindent(source, range); + let bytes = indent_lines::(indent, &extracted); + Some(Cow::Owned(bytes.into())) +} + +// replace meta_var in template string, e.g. "Hello $NAME" -> "Hello World" +pub fn gen_replacement(template: &str, nm: &NodeMatch<'_, D>) -> Underlying { + let fixer = create_template(template, nm.lang().meta_var_char(), &[]); + fixer.generate_replacement(nm) +} + +#[cfg(test)] +mod test { + + use super::*; + use crate::Pattern; + use crate::language::Tsx; + use crate::matcher::NodeMatch; + use crate::meta_var::{MetaVarEnv, MetaVariable}; + use crate::tree_sitter::LanguageExt; + use thread_utils::RapidMap; + + #[test] + fn test_example() { + let src = r" +if (true) { + a( + 1 + + 2 + + 3 + ) +}"; + let pattern = "a($B)"; + let template = r"c( + $B +)"; + let mut src = Tsx.ast_grep(src); + let pattern = Pattern::new(pattern, Tsx); + let success = src.replace(pattern, template).expect("should replace"); + assert!(success); + let expect = r"if (true) { + c( + 1 + + 2 + + 3 + ) +}"; + assert_eq!(src.root().text(), expect); + } + + fn test_str_replace(replacer: &str, vars: &[(&str, &str)], expected: &str) { + let mut env = MetaVarEnv::new(); + let roots: Vec<_> = vars.iter().map(|(v, p)| (v, Tsx.ast_grep(p))).collect(); + for (var, root) in &roots { + env.insert(var, root.root()); + } + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let replaced = replacer.generate_replacement(&node_match); + let replaced = String::from_utf8_lossy(&replaced); + assert_eq!( + replaced, + expected, + "wrong replacement {replaced} {expected} {:?}", + RapidMap::from(env) + ); + } + + #[test] + fn test_no_env() { + test_str_replace("let a = 123", &[], "let a = 123"); + test_str_replace( + "console.log('hello world'); let b = 123;", + &[], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_single_env() { + test_str_replace("let a = $A", &[("A", "123")], "let a = 123"); + test_str_replace( + "console.log($HW); let b = 123;", + &[("HW", "'hello world'")], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_multiple_env() { + test_str_replace("let $V = $A", &[("A", "123"), ("V", "a")], "let a = 123"); + test_str_replace( + "console.log($HW); let $B = 123;", + &[("HW", "'hello world'"), ("B", "b")], + "console.log('hello world'); let b = 123;", + ); + } + + #[test] + fn test_multiple_occurrences() { + test_str_replace("let $A = $A", &[("A", "a")], "let a = a"); + test_str_replace("var $A = () => $A", &[("A", "a")], "var a = () => a"); + test_str_replace( + "const $A = () => { console.log($B); $A(); };", + &[("B", "'hello world'"), ("A", "a")], + "const a = () => { console.log('hello world'); a(); };", + ); + } + + fn test_ellipsis_replace(replacer: &str, vars: &[(&str, &str)], expected: &str) { + let mut env = MetaVarEnv::new(); + let roots: Vec<_> = vars.iter().map(|(v, p)| (v, Tsx.ast_grep(p))).collect(); + for (var, root) in &roots { + env.insert_multi(var, root.root().children().collect()); + } + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let replaced = replacer.generate_replacement(&node_match); + let replaced = String::from_utf8_lossy(&replaced); + assert_eq!( + replaced, + expected, + "wrong replacement {replaced} {expected} {:?}", + RapidMap::from(env) + ); + } + + #[test] + fn test_ellipsis_meta_var() { + test_ellipsis_replace( + "let a = () => { $$$B }", + &[("B", "alert('works!')")], + "let a = () => { alert('works!') }", + ); + test_ellipsis_replace( + "let a = () => { $$$B }", + &[("B", "alert('works!');console.log(123)")], + "let a = () => { alert('works!');console.log(123) }", + ); + } + + #[test] + fn test_multi_ellipsis() { + test_ellipsis_replace( + "import {$$$A, B, $$$C} from 'a'", + &[("A", "A"), ("C", "C")], + "import {A, B, C} from 'a'", + ); + } + + #[test] + fn test_replace_in_string() { + test_str_replace("'$A'", &[("A", "123")], "'123'"); + } + + fn test_template_replace(template: &str, vars: &[(&str, &str)], expected: &str) { + let mut env = MetaVarEnv::new(); + let roots: Vec<_> = vars.iter().map(|(v, p)| (v, Tsx.ast_grep(p))).collect(); + for (var, root) in &roots { + env.insert(var, root.root()); + } + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let bytes = template.generate_replacement(&node_match); + let ret = String::from_utf8(bytes).expect("replacement must be valid utf-8"); + assert_eq!(expected, ret); + } + + #[test] + fn test_template() { + test_template_replace("Hello $A", &[("A", "World")], "Hello World"); + test_template_replace("$B $A", &[("A", "World"), ("B", "Hello")], "Hello World"); + } + + #[test] + fn test_template_vars() { + let tf = TemplateFix::try_new("$A $B $C", &Tsx).expect("ok"); + assert_eq!(tf.used_vars(), ["A", "B", "C"].into_iter().collect()); + let tf = TemplateFix::try_new("$a$B$C", &Tsx).expect("ok"); + assert_eq!(tf.used_vars(), ["B", "C"].into_iter().collect()); + let tf = TemplateFix::try_new("$a$B$C", &Tsx).expect("ok"); + assert_eq!(tf.used_vars(), ["B", "C"].into_iter().collect()); + } + + // GH #641 + #[test] + fn test_multi_row_replace() { + test_template_replace( + "$A = $B", + &[("A", "x"), ("B", "[\n 1\n]")], + "x = [\n 1\n]", + ); + } + + #[test] + fn test_replace_rewriter() { + let tf = TemplateFix::with_transform("if (a)\n $A", &Tsx, &["A".to_string()]); + let mut env = MetaVarEnv::new(); + env.insert_transformation( + &MetaVariable::Multiple, + "A", + "if (b)\n foo".bytes().collect(), + ); + let dummy = Tsx.ast_grep("dummy"); + let node_match = NodeMatch::new(dummy.root(), env.clone()); + let bytes = tf.generate_replacement(&node_match); + let ret = String::from_utf8(bytes).expect("replacement must be valid utf-8"); + assert_eq!("if (a)\n if (b)\n foo", ret); + } + + #[test] + fn test_nested_matching_replace() { + // TODO impossible, we don't support nested replacement + } +} diff --git a/crates/ast-engine/src/source.rs b/crates/ast-engine/src/source.rs new file mode 100644 index 0000000..cfdd742 --- /dev/null +++ b/crates/ast-engine/src/source.rs @@ -0,0 +1,176 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! This module defines the `Doc` and `Content` traits to abstract away source code encoding issues. +//! +//! ast-grep supports three kinds of encoding: utf-8 for CLI, utf-16 for nodeJS napi and `Vec` for wasm. +//! Different encoding will produce different tree-sitter Node's range and position. +//! +//! The `Content` trait is defined to abstract different encoding. +//! It is used as associated type bound `Source` in the `Doc` trait. +//! Its associated type `Underlying` represents the underlying type of the content, e.g. `Vec`, `Vec`. +//! +//! `Doc` is a trait that defines a document that can be parsed by Tree-sitter. +//! It has a `Source` associated type bounded by `Content` that represents the source code of the document, +//! and a `Lang` associated type that represents the language of the document. + +use crate::{Position, language::Language, node::KindId}; +use std::borrow::Cow; +use std::ops::Range; + +// https://github.com/tree-sitter/tree-sitter/blob/e4e5ffe517ca2c668689b24cb17c51b8c6db0790/cli/src/parse.rs +#[derive(Debug, Clone)] +pub struct Edit { + pub position: usize, + pub deleted_length: usize, + pub inserted_text: Vec, +} + +/// NOTE: Some method names are the same as tree-sitter's methods. +/// Fully Qualified Syntax may needed +pub trait SgNode<'r>: Clone { + fn parent(&self) -> Option; + fn children(&self) -> impl ExactSizeIterator; + fn kind(&self) -> Cow<'_, str>; + fn kind_id(&self) -> KindId; + fn node_id(&self) -> usize; + fn range(&self) -> std::ops::Range; + fn start_pos(&self) -> Position; + fn end_pos(&self) -> Position; + + // default implementation + #[allow(clippy::needless_collect)] + fn ancestors(&self, _root: Self) -> impl Iterator { + let mut ancestors = vec![]; + let mut current = self.clone(); + while let Some(parent) = current.parent() { + ancestors.push(parent.clone()); + current = parent; + } + ancestors.reverse(); + ancestors.into_iter() + } + fn dfs(&self) -> impl Iterator { + let mut stack = vec![self.clone()]; + std::iter::from_fn(move || { + if let Some(node) = stack.pop() { + let children: Vec<_> = node.children().collect(); + stack.extend(children.into_iter().rev()); + Some(node) + } else { + None + } + }) + } + fn child(&self, nth: usize) -> Option { + self.children().nth(nth) + } + fn next(&self) -> Option { + let parent = self.parent()?; + let mut children = parent.children(); + while let Some(child) = children.next() { + if child.node_id() == self.node_id() { + return children.next(); + } + } + None + } + fn prev(&self) -> Option { + let parent = self.parent()?; + let children = parent.children(); + let mut prev = None; + for child in children { + if child.node_id() == self.node_id() { + return prev; + } + prev = Some(child); + } + None + } + fn next_all(&self) -> impl Iterator { + let mut next = self.next(); + std::iter::from_fn(move || { + let n = next.clone()?; + next = n.next(); + Some(n) + }) + } + fn prev_all(&self) -> impl Iterator { + let mut prev = self.prev(); + std::iter::from_fn(move || { + let n = prev.clone()?; + prev = n.prev(); + Some(n) + }) + } + fn is_named(&self) -> bool { + true + } + /// N.B. it is different from `is_named` && `is_leaf` + /// if a node has no named children. + fn is_named_leaf(&self) -> bool { + self.is_leaf() + } + fn is_leaf(&self) -> bool { + self.children().count() == 0 + } + + // missing node is a tree-sitter specific concept + fn is_missing(&self) -> bool { + false + } + fn is_error(&self) -> bool { + false + } + + fn field(&self, name: &str) -> Option; + fn field_children(&self, field_id: Option) -> impl Iterator; + fn child_by_field_id(&self, field_id: u16) -> Option; +} + +pub trait Doc: Clone + 'static { + type Source: Content; + type Lang: Language; + type Node<'r>: SgNode<'r>; + fn get_lang(&self) -> &Self::Lang; + fn get_source(&self) -> &Self::Source; + fn do_edit(&mut self, edit: &Edit) -> Result<(), String>; + fn root_node(&self) -> Self::Node<'_>; + fn get_node_text<'a>(&'a self, node: &Self::Node<'a>) -> Cow<'a, str>; +} + +pub trait Content: Sized { + type Underlying: Clone + PartialEq; + fn get_range(&self, range: Range) -> &[Self::Underlying]; + /// Used for string replacement. We need this for + /// indentation and deindentation. + fn decode_str(src: &str) -> Cow<'_, [Self::Underlying]>; + /// Used for string replacement. We need this for + /// transformation. + fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<'_, str>; + /// Get the character column at the given position + fn get_char_column(&self, column: usize, offset: usize) -> usize; +} + +impl Content for String { + type Underlying = u8; + fn get_range(&self, range: Range) -> &[Self::Underlying] { + &self.as_bytes()[range] + } + fn decode_str(src: &str) -> Cow<'_, [Self::Underlying]> { + Cow::Borrowed(src.as_bytes()) + } + fn encode_bytes(bytes: &[Self::Underlying]) -> Cow<'_, str> { + Self::from_utf8_lossy(bytes) + } + + /// This is an O(n) operation optimized with SIMD. SIMD allows efficient processing + /// of unusually long lines. Modest improvements for standard code lines (~100 chars) + fn get_char_column(&self, _col: usize, offset: usize) -> usize { + // Use SIMD-optimized version from utils crate + thread_utils::get_char_column_simd(self, offset) + } +} diff --git a/crates/ast-engine/src/tree_sitter/mod.rs b/crates/ast-engine/src/tree_sitter/mod.rs new file mode 100644 index 0000000..2652bdb --- /dev/null +++ b/crates/ast-engine/src/tree_sitter/mod.rs @@ -0,0 +1,540 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +pub mod traversal; + +use crate::node::Root; +use crate::replacer::Replacer; +use crate::source::{Content, Doc, Edit, SgNode}; +use crate::{AstGrep, Matcher}; +use crate::{Language, Position, node::KindId}; +use std::borrow::Cow; +use std::num::NonZero; +use thiserror::Error; +use thread_utils::RapidMap; +pub use traversal::{TsPre, Visitor}; +pub use tree_sitter::Language as TSLanguage; +use tree_sitter::{InputEdit, LanguageError, Node, Parser, Point, Tree}; +pub use tree_sitter::{Point as TSPoint, Range as TSRange}; + +/// Represents tree-sitter related error +#[derive(Debug, Error)] +pub enum TSParseError { + #[error("incompatible `Language` is assigned to a `Parser`.")] + Language(#[from] LanguageError), + /// A general error when tree sitter fails to parse in time. It can be caused by + /// the following reasons but tree-sitter does not provide error detail. + /// * The timeout set with [`Parser::set_timeout_micros`] expired + /// * The cancellation flag set with [`Parser::set_cancellation_flag`] was flipped + /// * The parser has not yet had a language assigned with [`Parser::set_language`] + #[error("general error when tree-sitter fails to parse.")] + TreeUnavailable, +} + +#[inline] +fn parse_lang( + parse_fn: impl Fn(&mut Parser) -> Option, + ts_lang: &TSLanguage, +) -> Result { + let mut parser = Parser::new(); + parser.set_language(ts_lang)?; + if let Some(tree) = parse_fn(&mut parser) { + Ok(tree) + } else { + Err(TSParseError::TreeUnavailable) + } +} + +#[derive(Clone, Debug)] +pub struct StrDoc { + pub src: String, + pub lang: L, + pub tree: Tree, +} + +impl StrDoc { + pub fn try_new(src: &str, lang: L) -> Result { + let src = src.to_string(); + let ts_lang = lang.get_ts_language(); + let tree = + parse_lang(|p| p.parse(src.as_bytes(), None), &ts_lang).map_err(|e| e.to_string())?; + Ok(Self { src, lang, tree }) + } + pub fn new(src: &str, lang: L) -> Self { + Self::try_new(src, lang).expect("Parser tree error") + } + fn parse(&self, old_tree: Option<&Tree>) -> Result { + let source = self.get_source(); + let lang = self.get_lang().get_ts_language(); + parse_lang(|p| p.parse(source.as_bytes(), old_tree), &lang) + } +} + +impl Doc for StrDoc { + type Source = String; + type Lang = L; + type Node<'r> = Node<'r>; + fn get_lang(&self) -> &Self::Lang { + &self.lang + } + fn get_source(&self) -> &Self::Source { + &self.src + } + fn do_edit(&mut self, edit: &Edit) -> Result<(), String> { + let source = &mut self.src; + perform_edit(&mut self.tree, source, edit); + self.tree = self.parse(Some(&self.tree)).map_err(|e| e.to_string())?; + Ok(()) + } + fn root_node(&self) -> Node<'_> { + self.tree.root_node() + } + fn get_node_text<'a>(&'a self, node: &Self::Node<'a>) -> Cow<'a, str> { + Cow::Borrowed( + node.utf8_text(self.src.as_bytes()) + .expect("invalid source text encoding"), + ) + } +} + +struct NodeWalker<'tree> { + cursor: tree_sitter::TreeCursor<'tree>, + count: usize, +} + +impl<'tree> Iterator for NodeWalker<'tree> { + type Item = Node<'tree>; + fn next(&mut self) -> Option { + if self.count == 0 { + return None; + } + let ret = Some(self.cursor.node()); + self.cursor.goto_next_sibling(); + self.count -= 1; + ret + } +} + +impl ExactSizeIterator for NodeWalker<'_> { + fn len(&self) -> usize { + self.count + } +} + +impl<'r> SgNode<'r> for Node<'r> { + fn parent(&self) -> Option { + Node::parent(self) + } + fn ancestors(&self, root: Self) -> impl Iterator { + let mut ancestor = Some(root); + let self_id = self.id(); + std::iter::from_fn(move || { + let inner = ancestor.take()?; + if inner.id() == self_id { + return None; + } + ancestor = inner.child_with_descendant(*self); + Some(inner) + }) + // We must iterate up the tree to preserve backwards compatibility + .collect::>() + .into_iter() + .rev() + } + fn dfs(&self) -> impl Iterator { + TsPre::new(self) + } + fn child(&self, nth: usize) -> Option { + // TODO remove cast after migrating to tree-sitter + Node::child(self, nth) + } + fn children(&self) -> impl ExactSizeIterator { + let mut cursor = self.walk(); + cursor.goto_first_child(); + NodeWalker { + cursor, + count: self.child_count(), + } + } + fn child_by_field_id(&self, field_id: u16) -> Option { + Node::child_by_field_id(self, field_id) + } + fn next(&self) -> Option { + self.next_sibling() + } + fn prev(&self) -> Option { + self.prev_sibling() + } + fn next_all(&self) -> impl Iterator { + // if root is none, use self as fallback to return a type-stable Iterator + let node = self.parent().unwrap_or(*self); + let mut cursor = node.walk(); + cursor.goto_first_child_for_byte(self.start_byte()); + std::iter::from_fn(move || { + if cursor.goto_next_sibling() { + Some(cursor.node()) + } else { + None + } + }) + } + fn prev_all(&self) -> impl Iterator { + // if root is none, use self as fallback to return a type-stable Iterator + let node = self.parent().unwrap_or(*self); + let mut cursor = node.walk(); + cursor.goto_first_child_for_byte(self.start_byte()); + std::iter::from_fn(move || { + if cursor.goto_previous_sibling() { + Some(cursor.node()) + } else { + None + } + }) + } + fn is_named(&self) -> bool { + Node::is_named(self) + } + /// N.B. it is different from `is_named` && `is_leaf` + /// if a `Node` has no named children. + fn is_named_leaf(&self) -> bool { + self.named_child_count() == 0 + } + fn is_leaf(&self) -> bool { + self.child_count() == 0 + } + fn kind(&self) -> Cow<'_, str> { + Cow::Borrowed(Node::kind(self)) + } + fn kind_id(&self) -> KindId { + Node::kind_id(self) + } + fn node_id(&self) -> usize { + self.id() + } + fn range(&self) -> std::ops::Range { + self.start_byte()..self.end_byte() + } + fn start_pos(&self) -> Position { + let pos = self.start_position(); + let byte = self.start_byte(); + Position::new(pos.row, pos.column, byte) + } + fn end_pos(&self) -> Position { + let pos = self.end_position(); + let byte = self.end_byte(); + Position::new(pos.row, pos.column, byte) + } + // missing node is a tree-sitter specific concept + fn is_missing(&self) -> bool { + Node::is_missing(self) + } + fn is_error(&self) -> bool { + Node::is_error(self) + } + + fn field(&self, name: &str) -> Option { + self.child_by_field_name(name) + } + fn field_children(&self, field_id: Option) -> impl Iterator { + let field_id = field_id.and_then(NonZero::new); + let mut cursor = self.walk(); + cursor.goto_first_child(); + // if field_id is not found, iteration is done + let mut done = field_id.is_none(); + + std::iter::from_fn(move || { + if done { + return None; + } + while cursor.field_id() != field_id { + if !cursor.goto_next_sibling() { + return None; + } + } + let ret = cursor.node(); + if !cursor.goto_next_sibling() { + done = true; + } + Some(ret) + }) + } +} + +pub fn perform_edit(tree: &mut Tree, input: &mut S, edit: &Edit) -> InputEdit { + let edit = input.accept_edit(edit); + tree.edit(&edit); + edit +} + +/// tree-sitter specific language trait +pub trait LanguageExt: Language { + /// Create an [`AstGrep`] instance for the language + fn ast_grep>(&self, source: S) -> AstGrep> { + AstGrep::new(source, self.clone()) + } + + /// tree sitter language to parse the source + fn get_ts_language(&self) -> TSLanguage; + + fn injectable_languages(&self) -> Option<&'static [&'static str]> { + None + } + + /// get injected language regions in the root document. e.g. get `JavaScripts` in HTML + /// it will return a list of tuples of (language, regions). + /// The first item is the embedded region language, e.g. javascript + /// The second item is a list of regions in `tree_sitter`. + /// [also see](https://tree-sitter.github.io/tree-sitter/using-parsers#multi-language-documents) + fn extract_injections( + &self, + _root: crate::Node>, + ) -> RapidMap> { + RapidMap::default() + } +} + +fn position_for_offset(input: &[u8], offset: usize) -> Point { + debug_assert!(offset <= input.len()); + let (mut row, mut col) = (0, 0); + for c in &input[0..offset] { + if *c as char == '\n' { + row += 1; + col = 0; + } else { + col += 1; + } + } + Point::new(row, col) +} + +impl AstGrep> { + pub fn new>(src: S, lang: L) -> Self { + Self::str(src.as_ref(), lang) + } + + pub fn source(&self) -> &str { + self.doc.get_source().as_str() + } + + pub fn generate(self) -> String { + self.doc.src + } +} + +pub trait ContentExt: Content { + fn accept_edit(&mut self, edit: &Edit) -> InputEdit; +} +impl ContentExt for String { + fn accept_edit(&mut self, edit: &Edit) -> InputEdit { + let start_byte = edit.position; + let old_end_byte = edit.position + edit.deleted_length; + let new_end_byte = edit.position + edit.inserted_text.len(); + let input = unsafe { self.as_mut_vec() }; + let start_position = position_for_offset(input, start_byte); + let old_end_position = position_for_offset(input, old_end_byte); + input.splice(start_byte..old_end_byte, edit.inserted_text.clone()); + let new_end_position = position_for_offset(input, new_end_byte); + InputEdit { + start_byte, + old_end_byte, + new_end_byte, + start_position, + old_end_position, + new_end_position, + } + } +} + +impl Root> { + pub fn str(src: &str, lang: L) -> Self { + Self::try_new(src, lang).expect("should parse") + } + pub fn try_new(src: &str, lang: L) -> Result { + let doc = StrDoc::try_new(src, lang)?; + Ok(Self { doc }) + } + pub fn get_text(&self) -> &str { + &self.doc.src + } + + pub fn get_injections Option>(&self, get_lang: F) -> Vec { + let root = self.root(); + let range = self.lang().extract_injections(root); + range + .into_iter() + .filter_map(|(lang, ranges)| { + let lang = get_lang(&lang)?; + let source = self.doc.get_source(); + let mut parser = Parser::new(); + parser.set_included_ranges(&ranges).ok()?; + parser.set_language(&lang.get_ts_language()).ok()?; + let tree = parser.parse(source, None)?; + Some(Self { + doc: StrDoc { + src: self.doc.src.clone(), + lang, + tree, + }, + }) + }) + .collect() + } +} + +pub struct DisplayContext<'r> { + /// content for the matched node + pub matched: Cow<'r, str>, + /// content before the matched node + pub leading: &'r str, + /// content after the matched node + pub trailing: &'r str, + /// zero-based start line of the context + pub start_line: usize, +} + +/// these methods are only for `StrDoc` +impl<'r, L: LanguageExt> crate::Node<'r, StrDoc> { + #[doc(hidden)] + #[must_use] + pub fn display_context(&self, before: usize, after: usize) -> DisplayContext<'r> { + let source = self.root.doc.get_source().as_str(); + let bytes = source.as_bytes(); + let start = self.inner.start_byte(); + let end = self.inner.end_byte(); + let (mut leading, mut trailing) = (start, end); + let mut lines_before = before + 1; + while leading > 0 { + if bytes[leading - 1] == b'\n' { + lines_before -= 1; + if lines_before == 0 { + break; + } + } + leading -= 1; + } + let mut lines_after = after + 1; + // tree-sitter will append line ending to source so trailing can be out of bound + trailing = trailing.min(bytes.len()); + while trailing < bytes.len() { + if bytes[trailing] == b'\n' { + lines_after -= 1; + if lines_after == 0 { + break; + } + } + trailing += 1; + } + // lines_before means we matched all context, offset is `before` itself + let offset = if lines_before == 0 { + before + } else { + // otherwise, there are fewer than `before` line in src, compute the actual line + before + 1 - lines_before + }; + DisplayContext { + matched: self.text(), + leading: &source[leading..start], + trailing: &source[end..trailing], + start_line: self.start_pos().line() - offset, + } + } + + pub fn replace_all>>( + &self, + matcher: M, + replacer: R, + ) -> Vec> { + // TODO: support nested matches like Some(Some(1)) with pattern Some($A) + Visitor::new(&matcher) + .reentrant(false) + .visit(self.clone()) + .map(|matched| matched.make_edit(&matcher, &replacer)) + .collect() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use tree_sitter::Point; + + fn parse(src: &str) -> Result { + parse_lang(|p| p.parse(src, None), Tsx.get_ts_language()) + } + + #[test] + fn test_tree_sitter() -> Result<(), TSParseError> { + let tree = parse("var a = 1234")?; + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "program"); + assert_eq!(root_node.start_position().column, 0); + assert_eq!(root_node.end_position().column, 12); + assert_eq!( + root_node.to_sexp(), + "(program (variable_declaration (variable_declarator name: (identifier) value: (number))))" + ); + Ok(()) + } + + #[test] + fn test_object_literal() -> Result<(), TSParseError> { + let tree = parse("{a: $X}")?; + let root_node = tree.root_node(); + // wow this is not label. technically it is wrong but practically it is better LOL + assert_eq!( + root_node.to_sexp(), + "(program (expression_statement (object (pair key: (property_identifier) value: (identifier)))))" + ); + Ok(()) + } + + #[test] + fn test_string() -> Result<(), TSParseError> { + let tree = parse("'$A'")?; + let root_node = tree.root_node(); + assert_eq!( + root_node.to_sexp(), + "(program (expression_statement (string (string_fragment))))" + ); + Ok(()) + } + + #[test] + fn test_row_col() -> Result<(), TSParseError> { + let tree = parse("πŸ˜„")?; + let root = tree.root_node(); + assert_eq!(root.start_position(), Point::new(0, 0)); + // NOTE: Point in tree-sitter is counted in bytes instead of char + assert_eq!(root.end_position(), Point::new(0, 4)); + Ok(()) + } + + #[test] + fn test_edit() -> Result<(), TSParseError> { + let mut src = "a + b".to_string(); + let mut tree = parse(&src)?; + let _ = perform_edit( + &mut tree, + &mut src, + &Edit { + position: 1, + deleted_length: 0, + inserted_text: " * b".into(), + }, + ); + let tree2 = parse_lang(|p| p.parse(&src, Some(&tree)), Tsx.get_ts_language())?; + assert_eq!( + tree.root_node().to_sexp(), + "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + ); + assert_eq!( + tree2.root_node().to_sexp(), + "(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))" + ); + Ok(()) + } +} diff --git a/crates/ast-engine/src/tree_sitter/traversal.rs b/crates/ast-engine/src/tree_sitter/traversal.rs new file mode 100644 index 0000000..bb87549 --- /dev/null +++ b/crates/ast-engine/src/tree_sitter/traversal.rs @@ -0,0 +1,610 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! # Traverse Node AST +//! +//! ast-grep supports common tree traversal algorithms, including +//! * Pre order traversal +//! * Post order traversal +//! * Level order traversal +//! +//! Note tree traversal can also be used with Matcher. A traversal with Matcher will +//! produce a [`NodeMatch`] sequence where all items satisfies the Matcher. +//! +//! It is also possible to specify the reentrancy of a traversal. +//! That is, we can control whether a matching node should be visited when it is nested within another match. +//! For example, suppose we want to find all usages of calling `foo` in the source `foo(foo())`. +//! The code has two matching calls and we can configure a traversal +//! to report only the inner one, only the outer one or both. +//! +//! Pre and Post order traversals in this module are implemented using tree-sitter's cursor API without extra heap allocation. +//! It is recommended to use traversal instead of tree recursion to avoid stack overflow and memory overhead. +//! Level order is also included for completeness and should be used sparingly. + +use super::StrDoc; +use crate::matcher::{Matcher, MatcherExt}; +use crate::tree_sitter::LanguageExt; +use crate::{Doc, Node, NodeMatch, Root}; + +use tree_sitter as ts; + +use std::collections::VecDeque; +use std::marker::PhantomData; + +pub struct Visitor { + /// Whether a node will match if it contains or is contained in another match. + reentrant: bool, + /// Whether visit named node only + named_only: bool, + /// optional matcher to filter nodes + matcher: M, + /// The algorithm to traverse the tree, can be pre/post/level order + algorithm: PhantomData, +} + +impl Visitor { + pub const fn new(matcher: M) -> Self { + Self { + reentrant: true, + named_only: false, + matcher, + algorithm: PhantomData, + } + } +} + +impl Visitor { + pub fn algorithm(self) -> Visitor { + Visitor { + reentrant: self.reentrant, + named_only: self.named_only, + matcher: self.matcher, + algorithm: PhantomData, + } + } + + #[must_use] + pub fn reentrant(self, reentrant: bool) -> Self { + Self { reentrant, ..self } + } + + #[must_use] + pub fn named_only(self, named_only: bool) -> Self { + Self { named_only, ..self } + } +} + +impl Visitor +where + A: Algorithm, +{ + pub fn visit( + self, + node: Node<'_, StrDoc>, + ) -> Visit<'_, StrDoc, A::Traversal<'_, L>, M> + where + M: Matcher, + { + let traversal = A::traverse(node); + Visit { + reentrant: self.reentrant, + named: self.named_only, + matcher: self.matcher, + traversal, + lang: PhantomData, + } + } +} + +pub struct Visit<'t, D, T, M> { + reentrant: bool, + named: bool, + matcher: M, + traversal: T, + lang: PhantomData<&'t D>, +} +impl<'t, D, T, M> Visit<'t, D, T, M> +where + D: Doc + 't, + T: Traversal<'t, D>, + M: Matcher, +{ + #[inline] + fn mark_match(&mut self, depth: Option) { + if !self.reentrant { + self.traversal.calibrate_for_match(depth); + } + } +} + +impl<'t, D, T, M> Iterator for Visit<'t, D, T, M> +where + D: Doc + 't, + T: Traversal<'t, D>, + M: Matcher, +{ + type Item = NodeMatch<'t, D>; + fn next(&mut self) -> Option { + loop { + let match_depth = self.traversal.get_current_depth(); + let node = self.traversal.next()?; + let pass_named = !self.named || node.is_named(); + if let Some(node_match) = pass_named.then(|| self.matcher.match_node(node)).flatten() { + self.mark_match(Some(match_depth)); + return Some(node_match); + } + self.mark_match(None); + + } + } +} + +pub trait Algorithm { + type Traversal<'t, L: LanguageExt>: Traversal<'t, StrDoc>; + fn traverse(node: Node<'_, StrDoc>) -> Self::Traversal<'_, L>; +} + +pub struct PreOrder; +impl Algorithm for PreOrder { + type Traversal<'t, L: LanguageExt> = Pre<'t, L>; + fn traverse(node: Node<'_, StrDoc>) -> Self::Traversal<'_, L> { + Pre::new(&node) + } +} +pub struct PostOrder; +impl Algorithm for PostOrder { + type Traversal<'t, L: LanguageExt> = Post<'t, L>; + fn traverse(node: Node<'_, StrDoc>) -> Self::Traversal<'_, L> { + Post::new(&node) + } +} + +/// Traversal can iterate over node by using traversal algorithm. +/// +/// The `next` method should only handle normal, reentrant iteration. +/// If reentrancy is not desired, traversal should mutate cursor in `calibrate_for_match`. +/// Visit will maintain the matched node depth so traversal does not need to use extra field. +pub trait Traversal<'t, D: Doc + 't>: Iterator> { + /// Calibrate cursor position to skip overlapping matches. + /// node depth will be passed if matched, otherwise None. + fn calibrate_for_match(&mut self, depth: Option); + /// Returns the current depth of cursor depth. + /// Cursor depth is incremented by 1 when moving from parent to child. + /// Cursor depth at Root node is 0. + fn get_current_depth(&self) -> usize; +} + +/// Represents a pre-order traversal +pub struct TsPre<'tree> { + cursor: ts::TreeCursor<'tree>, + // record the starting node, if we return back to starting point + // we should terminate the dfs. + start_id: Option, + current_depth: usize, +} + +impl<'tree> TsPre<'tree> { + #[must_use] pub fn new(node: &ts::Node<'tree>) -> Self { + Self { + cursor: node.walk(), + start_id: Some(node.id()), + current_depth: 0, + } + } + fn step_down(&mut self) -> bool { + if self.cursor.goto_first_child() { + self.current_depth += 1; + true + } else { + false + } + } + + // retrace back to ancestors and find next node to explore + fn trace_up(&mut self, start: usize) { + let cursor = &mut self.cursor; + while cursor.node().id() != start { + // try visit sibling nodes + if cursor.goto_next_sibling() { + return; + } + self.current_depth -= 1; + // go back to parent node + if !cursor.goto_parent() { + // it should never fail here. However, tree-sitter has bad parsing bugs + // stop to avoid panic. https://github.com/ast-grep/ast-grep/issues/713 + break; + } + } + // terminate traversal here + self.start_id = None; + } +} + +/// Amortized time complexity is O(NlgN), depending on branching factor. +impl<'tree> Iterator for TsPre<'tree> { + type Item = ts::Node<'tree>; + // 1. Yield the node itself + // 2. Try visit the child node until no child available + // 3. Try visit next sibling after going back to parent + // 4. Repeat step 3 until returning to the starting node + fn next(&mut self) -> Option { + // start_id will always be Some until the dfs terminates + let start = self.start_id?; + let cursor = &mut self.cursor; + let inner = cursor.node(); // get current node + let ret = Some(inner); + // try going to children first + if self.step_down() { + return ret; + } + // if no child available, go to ancestor nodes + // until we get to the starting point + self.trace_up(start); + ret + } +} + +pub struct Pre<'tree, L: LanguageExt> { + root: &'tree Root>, + inner: TsPre<'tree>, +} +impl<'tree, L: LanguageExt> Iterator for Pre<'tree, L> { + type Item = Node<'tree, StrDoc>; + fn next(&mut self) -> Option { + let inner = self.inner.next()?; + Some(self.root.adopt(inner)) + } +} + +impl<'t, L: LanguageExt> Pre<'t, L> { + #[must_use] pub fn new(node: &Node<'t, StrDoc>) -> Self { + let inner = TsPre::new(&node.inner); + Self { + root: node.root, + inner, + } + } +} + +impl<'t, L: LanguageExt> Traversal<'t, StrDoc> for Pre<'t, L> { + fn calibrate_for_match(&mut self, depth: Option) { + // not entering the node, ignore + let Some(depth) = depth else { + return; + }; + // if already entering sibling or traced up, ignore + if self.inner.current_depth <= depth { + return; + } + debug_assert!(self.inner.current_depth > depth); + if let Some(start) = self.inner.start_id { + // revert the step down + self.inner.cursor.goto_parent(); + self.inner.trace_up(start); + } + } + + #[inline] + fn get_current_depth(&self) -> usize { + self.inner.current_depth + } +} + +/// Represents a post-order traversal +pub struct Post<'tree, L: LanguageExt> { + cursor: ts::TreeCursor<'tree>, + root: &'tree Root>, + start_id: Option, + current_depth: usize, + match_depth: usize, +} + +/// Amortized time complexity is O(NlgN), depending on branching factor. +impl<'tree, L: LanguageExt> Post<'tree, L> { + #[must_use] pub fn new(node: &Node<'tree, StrDoc>) -> Self { + let mut ret = Self { + cursor: node.inner.walk(), + root: node.root, + start_id: Some(node.inner.id()), + current_depth: 0, + match_depth: 0, + }; + ret.trace_down(); + ret + } + fn trace_down(&mut self) { + while self.cursor.goto_first_child() { + self.current_depth += 1; + } + } + fn step_up(&mut self) { + self.current_depth -= 1; + self.cursor.goto_parent(); + } +} + +/// Amortized time complexity is O(NlgN), depending on branching factor. +impl<'tree, L: LanguageExt> Iterator for Post<'tree, L> { + type Item = Node<'tree, StrDoc>; + fn next(&mut self) -> Option { + // start_id will always be Some until the dfs terminates + let start = self.start_id?; + let cursor = &mut self.cursor; + let node = self.root.adopt(cursor.node()); + // return to start + if node.inner.id() == start { + self.start_id = None; + } else if cursor.goto_next_sibling() { + // try visit sibling + self.trace_down(); + } else { + self.step_up(); + } + Some(node) + } +} + +impl<'t, L: LanguageExt> Traversal<'t, StrDoc> for Post<'t, L> { + fn calibrate_for_match(&mut self, depth: Option) { + if let Some(depth) = depth { + // Later matches' depth should always be greater than former matches. + // because we bump match_depth in `step_up` during traversal. + debug_assert!(depth >= self.match_depth); + self.match_depth = depth; + return; + } + // found new nodes to explore in trace_down, skip calibration. + if self.current_depth >= self.match_depth { + return; + } + let Some(start) = self.start_id else { + return; + }; + while self.cursor.node().id() != start { + self.match_depth = self.current_depth; + if self.cursor.goto_next_sibling() { + // try visit sibling + self.trace_down(); + return; + } + self.step_up(); + } + // terminate because all ancestors are skipped + self.start_id = None; + } + + #[inline] + fn get_current_depth(&self) -> usize { + self.current_depth + } +} + +/// Represents a level-order traversal. +/// +/// It is implemented with [`VecDeque`] since quadratic backtracking is too time consuming. +/// Though level-order is not used as frequently as other DFS traversals, +/// traversing a big AST with level-order should be done with caution since it might increase the memory usage. +pub struct Level<'tree, L: LanguageExt> { + deque: VecDeque>, + cursor: ts::TreeCursor<'tree>, + root: &'tree Root>, +} + +impl<'tree, L: LanguageExt> Level<'tree, L> { + #[must_use] pub fn new(node: &Node<'tree, StrDoc>) -> Self { + let mut deque = VecDeque::new(); + deque.push_back(node.inner); + let cursor = node.inner.walk(); + Self { + deque, + cursor, + root: node.root, + } + } +} + +/// Time complexity is O(N). Space complexity is O(N) +impl<'tree, L: LanguageExt> Iterator for Level<'tree, L> { + type Item = Node<'tree, StrDoc>; + fn next(&mut self) -> Option { + let inner = self.deque.pop_front()?; + let children = inner.children(&mut self.cursor); + self.deque.extend(children); + Some(self.root.adopt(inner)) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::language::Tsx; + use std::ops::Range; + + // recursive pre order as baseline + fn pre_order(node: Node>) -> Vec> { + let mut ret = vec![node.range()]; + ret.extend(node.children().flat_map(pre_order)); + ret + } + + // recursion baseline + fn post_order(node: Node>) -> Vec> { + let mut ret: Vec<_> = node.children().flat_map(post_order).collect(); + ret.push(node.range()); + ret + } + + fn pre_order_equivalent(source: &str) { + let grep = Tsx.ast_grep(source); + let node = grep.root(); + let iterative: Vec<_> = Pre::new(&node).map(|n| n.range()).collect(); + let recursive = pre_order(node); + assert_eq!(iterative, recursive); + } + + fn post_order_equivalent(source: &str) { + let grep = Tsx.ast_grep(source); + let node = grep.root(); + let iterative: Vec<_> = Post::new(&node).map(|n| n.range()).collect(); + let recursive = post_order(node); + assert_eq!(iterative, recursive); + } + + const CASES: &[&str] = &[ + "console.log('hello world')", + "let a = (a, b, c)", + "function test() { let a = 1; let b = 2; a === b}", + "[[[[[[]]]]], 1 , 2 ,3]", + "class A { test() { class B {} } }", + ]; + + #[test] + fn tes_pre_order() { + for case in CASES { + pre_order_equivalent(case); + } + } + + #[test] + fn test_post_order() { + for case in CASES { + post_order_equivalent(case); + } + } + + #[test] + fn test_different_order() { + for case in CASES { + let grep = Tsx.ast_grep(case); + let node = grep.root(); + let pre: Vec<_> = Pre::new(&node).map(|n| n.range()).collect(); + let post: Vec<_> = Post::new(&node).map(|n| n.range()).collect(); + let level: Vec<_> = Level::new(&node).map(|n| n.range()).collect(); + assert_ne!(pre, post); + assert_ne!(pre, level); + assert_ne!(post, level); + } + } + + #[test] + fn test_fused_traversal() { + for case in CASES { + let grep = Tsx.ast_grep(case); + let node = grep.root(); + let mut pre = Pre::new(&node); + let mut post = Post::new(&node); + while pre.next().is_some() {} + while post.next().is_some() {} + assert!(pre.next().is_none()); + assert!(pre.next().is_none()); + assert!(post.next().is_none()); + assert!(post.next().is_none()); + } + } + + #[test] + fn test_non_root_traverse() { + let grep = Tsx.ast_grep("let a = 123; let b = 123;"); + let node = grep.root(); + let pre: Vec<_> = Pre::new(&node).map(|n| n.range()).collect(); + let post: Vec<_> = Post::new(&node).map(|n| n.range()).collect(); + let node2 = node.child(0).unwrap(); + let pre2: Vec<_> = Pre::new(&node2).map(|n| n.range()).collect(); + let post2: Vec<_> = Post::new(&node2).map(|n| n.range()).collect(); + // traversal should stop at node + assert_ne!(pre, pre2); + assert_ne!(post, post2); + // child traversal should be a part of parent traversal + assert!(pre[1..].starts_with(&pre2)); + assert!(post.starts_with(&post2)); + } + + fn pre_order_with_matcher(node: Node>, matcher: &str) -> Vec> { + if node.matches(matcher) { + vec![node.range()] + } else { + node.children() + .flat_map(|n| pre_order_with_matcher(n, matcher)) + .collect() + } + } + + fn post_order_with_matcher(node: Node>, matcher: &str) -> Vec> { + let mut ret: Vec<_> = node + .children() + .flat_map(|n| post_order_with_matcher(n, matcher)) + .collect(); + if ret.is_empty() && node.matches(matcher) { + ret.push(node.range()); + } + ret + } + + const MATCHER_CASES: &[&str] = &[ + "Some(123)", + "Some(1, 2, Some(2))", + "NoMatch", + "NoMatch(Some(123))", + "Some(1, Some(2), Some(3))", + "Some(1, Some(2), Some(Some(3)))", + ]; + + #[test] + fn test_pre_order_visitor() { + let matcher = "Some($$$)"; + for case in MATCHER_CASES { + let grep = Tsx.ast_grep(case); + let node = grep.root(); + let recur = pre_order_with_matcher(grep.root(), matcher); + let visit: Vec<_> = Visitor::new(matcher) + .reentrant(false) + .visit(node) + .map(|n| n.range()) + .collect(); + assert_eq!(recur, visit); + } + } + #[test] + fn test_post_order_visitor() { + let matcher = "Some($$$)"; + for case in MATCHER_CASES { + let grep = Tsx.ast_grep(case); + let node = grep.root(); + let recur = post_order_with_matcher(grep.root(), matcher); + let visit: Vec<_> = Visitor::new(matcher) + .algorithm::() + .reentrant(false) + .visit(node) + .map(|n| n.range()) + .collect(); + assert_eq!(recur, visit); + } + } + + // match a leaf node will trace_up the cursor + #[test] + fn test_traversal_leaf() { + let matcher = "true"; + let case = "((((true))));true"; + let grep = Tsx.ast_grep(case); + let recur = pre_order_with_matcher(grep.root(), matcher); + let visit: Vec<_> = Visitor::new(matcher) + .reentrant(false) + .visit(grep.root()) + .map(|n| n.range()) + .collect(); + assert_eq!(recur, visit); + let recur = post_order_with_matcher(grep.root(), matcher); + let visit: Vec<_> = Visitor::new(matcher) + .algorithm::() + .reentrant(false) + .visit(grep.root()) + .map(|n| n.range()) + .collect(); + assert_eq!(recur, visit); + } +} diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml new file mode 100644 index 0000000..6df700e --- /dev/null +++ b/crates/language/Cargo.toml @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 + +[package] +name = "thread-language" +description = "Language definitions and parsers for Thread" +keywords = ["ast", "pattern", "codemod", "search", "rewrite", "languages"] +license = "AGPL-3.0-or-later AND MIT" +readme = "README.md" +categories = ["ast", "pattern", "codemod", "search", "rewrite",] +version = "0.1.0" +authors = [ + "Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>", + "Knitli Inc ", + "Adam Poulemanos for Knitli ", +] +edition.workspace = true +repository.workspace = true +rust-version.workspace = true +include.workspace = true + +[build-dependencies] +cc = "1.2.30" + +[dependencies] +thread-ast-engine = { workspace = true, features = ["parsing"] } +thread-utils = { workspace = true, default-features = false, features = [ + "hashers", +] } +ignore.workspace = true +serde.workspace = true +tree-sitter.workspace = true + +tree-sitter-bash = { version = "0.25.0", optional = true } +tree-sitter-c = { version = "0.24.1", optional = true } +tree-sitter-cpp = { version = "0.23.4", optional = true } +tree-sitter-c-sharp = { version = "0.23.1", optional = true } +tree-sitter-css = { version = "0.23.2", optional = true } +tree-sitter-elixir = { version = "0.3.4", optional = true } +tree-sitter-go = { version = "0.23.4", optional = true } +tree-sitter-haskell = { version = "0.23.1", optional = true } +tree-sitter-html = { version = "0.23.2", optional = true } +tree-sitter-java = { version = "0.23.5", optional = true } +tree-sitter-javascript = { version = "0.23.1", optional = true } +tree-sitter-json = { version = "0.24.8", optional = true } +tree-sitter-kotlin = { version = "0.4.0", optional = true, package = "tree-sitter-kotlin-sg" } +tree-sitter-lua = { version = "0.2.0", optional = true } +tree-sitter-php = { version = "0.23.11", optional = true } +tree-sitter-python = { version = "0.23.6", optional = true } +tree-sitter-ruby = { version = "0.23.1", optional = true } +tree-sitter-rust = { version = "0.24.0", optional = true } +tree-sitter-scala = { version = "0.24.0", optional = true } +tree-sitter-swift = { version = "0.7.1", optional = true } +tree-sitter-typescript = { version = "0.23.2", optional = true } +tree-sitter-yaml = { version = "0.7.1", optional = true } + +[features] + +builtin-parser = [ + "tree-sitter-bash", + "tree-sitter-c", + "tree-sitter-cpp", + "tree-sitter-c-sharp", + "tree-sitter-css", + "tree-sitter-elixir", + "tree-sitter-go", + "tree-sitter-haskell", + "tree-sitter-html", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-json", + "tree-sitter-kotlin", + "tree-sitter-lua", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-scala", + "tree-sitter-swift", + "tree-sitter-typescript", + "tree-sitter-yaml", +] +napi-lang = [ + "tree-sitter-css", + "tree-sitter-html", + "tree-sitter-javascript", + "tree-sitter-typescript", +] +profiling = [] +default = ["builtin-parser"] + +[dev-dependencies] +criterion = { version = "0.6", features = ["html_reports"] } +thread-ast-engine = { workspace = true, features = ["matching", "parsing"] } + +[[bench]] +name = "performance" +harness = false diff --git a/crates/language/LICENSE-AGPL-3.0-or-later b/crates/language/LICENSE-AGPL-3.0-or-later new file mode 100644 index 0000000..1b62c0f --- /dev/null +++ b/crates/language/LICENSE-AGPL-3.0-or-later @@ -0,0 +1,662 @@ +# GNU AFFERO GENERAL PUBLIC LICENSE + + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/language/LICENSE-MIT b/crates/language/LICENSE-MIT new file mode 100644 index 0000000..e3a8a65 --- /dev/null +++ b/crates/language/LICENSE-MIT @@ -0,0 +1,30 @@ + + +# MIT License + +Copyright (c) 2022 Herrington Darkholme + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +## This crate was created from forked code + +The above license and copyright applies to any code before the fork. Any changes since Ast-Grep v.0.38.7 are separately licensed. + +- See [LICENSE-AGPL-3.0-or-later](LICENSE-AGPL-3.0-or-later) +- For a description of the fork and what it includes, visit the [Thread repo](https://github.com/knitli/thread/tree/main/VENDORED.md) diff --git a/crates/language/README.md b/crates/language/README.md new file mode 100644 index 0000000..2e23dca --- /dev/null +++ b/crates/language/README.md @@ -0,0 +1,6 @@ + diff --git a/crates/language/VENDORED.md b/crates/language/VENDORED.md new file mode 100644 index 0000000..c9c1882 --- /dev/null +++ b/crates/language/VENDORED.md @@ -0,0 +1,69 @@ + +# Our Fork of Ast-Grep + +We forked most of the excellent [Ast-Grep][AG] codebase to create Thread. We originally tried using Ast-Grep as a library, but ran into limitations. The `core` module is intended to work as a library, but our plans for Thread required finer control over features at build-time. + +While Thread includes a CLI (and that’s likely your first encounter with it), our CLI is just the tip of the iceberg. The real focus is on service-oriented architecture for cloud and automation use. + +**We forked at Ast-Grep v0.38.7**. See [the original repo at that version](https://github.com/ast-grep/ast-grep/tree/0.38.7) for reference. + +--- + +## Why We Forked + +We tried multiple approaches to integrating Ast-Grep, from working with it as a library with a complex feature-gating scheme, to vendoring and dividing four crates into granular components (14 crates!). That latter one was overkill, and was probably us jumping the shark early :shark:⛷️. + +We settled on a middle ground. We forked `core`, `config`, and `language`, and will continue to use `dynamic` and others as dependencies as needed. We also did our best to make as few changes as possible -- mostly focusing on separating features with gating, and abstracting some core elements to better fit our service oriented approach. + +Our changes are mostly structuralβ€”we needed finer-grained control over organization, minimal cold start times, and clean separation between services. + +### Where the Fork Lives + +* [`thread-ast-engine`](https://github.com/knitli/thread/tree/main/crates/ast-engine): Fork of `ast-grep-core`. We separated its features into `parsing`, and `matching` features so that we could better control their usage in our services. +* [`thread-rule-engine`](https://github.com/knitli/thread/tree/main/crates/rule-engine): Fork of `ast-grep-config`. We isolated rule management, parsing, and validation functionality, and made changes to separate the logic from the assumption of a config file, allowing us more flexibility to implement rule-based operations in different environments. +* [`thread-language`](https://github.com/knitli/thread/tree/main/crates/language): We changed very little here, we needed the languages publicly exposed to feature gate each one separately. We also plan to add different languages more suitable for our needs. + +We admittedly didn't have this conversation with the Ast-Grep contributors, which we will once the dust settles a bit and we can divert attention from delivering an MVP. Our changes are intentionally reversible, and we'd like to find a way to return to using the core crates and contributing there (but that may not be realistic with different goals between the projects). + +### Licensing + +**Original Ast-Grep code** is MIT-licensed (see the `LICENSE-MIT` file in each crate). +**Our changes and anything Thread-specific** are licensed under the [AGPL v3.0](https://github.com/knitli/thread/blob/main/LICENSE.md). + +* If you want pure MIT, use Ast-Grep directly, or cherry-pick the original code. The relationships are: + + * `thread-ast-engine` β†’ `ast-grep-core` + * `thread-rule-engine` β†’ `ast-grep-config` + * `thread-language` β†’ `ast-grep-language` + +* Using our fork means AGPL; sharing required. If you want to treat your code based on Thread like :ring: Gollum :ring:, [contact us for a commercial license](mailto:licensing@knit.li), and you can keep your *precious*. +* Our project meets the [Reuse Specification](https://reuse.software/). Every file in the project is marked in its header with license information, or with an accompanying `.license` file. Code from `Ast-Grep` will be marked `AGPL-3.0-or-later AND MIT` (this isn't an `or` where you can choose between them). + +> Technically, you *can* only use the unchanged Ast-Grep bits under MITβ€”but you’d need to do the diffing yourself, and you’ll miss out on Thread-specific improvements (not sure why you would do that instead of just forking Ast-Grep...). AGPL means our changes (and anyone else’s) will always be open source. + +--- + +## We're Going to Contribute to Ast-Grep, too + +Most of Thread's Ast-Grep codebase is unchanged for now, and where we identify bugs or areas for improvement, we'll submit them upstream under Ast-Grep's MIT license. Similarly, we'll monitor changes to Ast-Grep and incorporate fixes and improvements into Thread. + +## So Are You Going to Try to Keep the Changes Minimal Forever? + +Probably not. Our first commitment is making Thread as great as we can, even if we diverge from Ast-Grep. We'd love to see the projects grow together, but they may not always align perfectly. Ast-Grep has its own roadmap and priorities, and we have ours. Thread is not Ast-Grep; it is just built on top of it. + +## Why Ast-Grep? + +Ast-Grep makes [Tree-sitter][ts] actually usable for code search/replace. We built on it because it solved the hard partsβ€”especially CST-wranglingβ€”so we could focus on new stuff, not rebuilding the same wheel.[^1] + +> For reasons lost to time, everyone in this ecosystem calls their [CSTs][csts] β€œASTs.” Maybe it’s like the first rule of Tree-sitter Club: we all pretend they’re ASTs :fist:. + +[^1]: If our initial attempts at integrating Ast-Grep represent how we would reinvent the wheel, we probably would have made our version square and in 15 parts, assembly required. + +[AG]: https://github.com/ast-grep/ast-grep +[ts]: https://github.com/tree-sitter/tree-sitter +[csts]: https://en.wikipedia.org/wiki/Concrete_syntax_tree diff --git a/crates/language/benches/performance.rs b/crates/language/benches/performance.rs new file mode 100644 index 0000000..139ec1f --- /dev/null +++ b/crates/language/benches/performance.rs @@ -0,0 +1,145 @@ +// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use criterion::{Criterion, criterion_group, criterion_main}; +use std::hint::black_box; +use std::path::Path; +use std::str::FromStr; +use thread_language::*; + +fn bench_pre_process_pattern(c: &mut Criterion) { + let patterns = [ + "$VAR", + "function $NAME($ARGS) { $BODY }", + "class $CLASS extends $PARENT { $METHODS }", + "import $MODULE from '$PATH'", + "const $VAR = $VALUE;", + "if ($CONDITION) { $THEN } else { $ELSE }", + "$$$A", // Anonymous multiple + "no dollars here", // No processing needed + ]; + + c.bench_function("pre_process_pattern", |b| { + b.iter(|| { + for pattern in &patterns { + let lang = thread_language::Python; + black_box(lang.pre_process_pattern(black_box(pattern))); + } + }) + }); +} + +fn bench_from_str(c: &mut Criterion) { + let languages = [ + "rust", + "rs", + "javascript", + "js", + "typescript", + "ts", + "python", + "py", + "java", + "cpp", + "c", + "go", + "html", + "css", + ]; + + c.bench_function("from_str", |b| { + b.iter(|| { + for lang_str in &languages { + black_box(SupportLang::from_str(black_box(lang_str)).ok()); + } + }) + }); +} + +fn bench_from_extension(c: &mut Criterion) { + let files = [ + "main.rs", + "app.js", + "index.ts", + "script.tsx", + "main.py", + "App.java", + "main.cpp", + "main.c", + "main.go", + "index.html", + "style.css", + "config.json", + "data.yaml", + "rare.scala", + ]; + + c.bench_function("from_extension", |b| { + b.iter(|| { + for file in &files { + let path = Path::new(black_box(file)); + black_box(SupportLang::from_path(path)); + } + }) + }); +} + +fn bench_language_loading(c: &mut Criterion) { + c.bench_function("language_loading", |b| { + b.iter(|| { + // Test cached language loading + for _ in 0..10 { + black_box(thread_language::parsers::language_rust()); + black_box(thread_language::parsers::language_javascript()); + black_box(thread_language::parsers::language_python()); + } + }) + }); +} + +fn bench_html_injection(c: &mut Criterion) { + let html_content = r#" + + + + + + + + + + + "#; + + c.bench_function("html_injection_extraction", |b| { + b.iter(|| { + let root = Html.ast_grep(black_box(html_content)); + black_box(Html.extract_injections(root.root())); + }) + }); +} + +criterion_group!( + benches, + bench_pre_process_pattern, + bench_from_str, + bench_from_extension, + bench_language_loading, + bench_html_injection +); +criterion_main!(benches); diff --git a/crates/language/src/bash.rs b/crates/language/src/bash.rs new file mode 100644 index 0000000..dbef12f --- /dev/null +++ b/crates/language/src/bash.rs @@ -0,0 +1,46 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Bash); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Bash); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Bash) +} + +#[test] +fn test_bash_pattern() { + test_match("123", "123"); + test_match("echo $A", "echo test"); + // TODO + // test_match("echo { $A }", "echo {1..10}"); + test_match("echo $abc", "echo $abc"); +} + +#[test] +fn test_bash_pattern_no_match() { + test_non_match("echo $abc", "echo test"); + test_non_match("echo $abc", "echo $ABC"); +} + +#[test] +fn test_bash_replace() { + // TODO: change the replacer to log $A + let ret = test_replace("echo 123", "echo $A", "log 123"); + assert_eq!(ret, "log 123"); +} diff --git a/crates/language/src/cpp.rs b/crates/language/src/cpp.rs new file mode 100644 index 0000000..169bcc4 --- /dev/null +++ b/crates/language/src/cpp.rs @@ -0,0 +1,51 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Cpp); +} + +#[test] +fn test_cpp_pattern() { + test_match("$A->b()", "expr->b()"); + test_match("if (a) { $$$VERYLONGNAME }", "if (a) { a;b;c; }"); + test_match("expr->$B()", "expr->b()"); + test_match("ns::ns2::$F()", "ns::ns2::func()"); + test_match("template ", "template "); + test_match("if constexpr ($C) {}", "if constexpr (13+5==18) {}"); + test_match( + "template typename std::enable_if<$C, T>::type;", + "template typename std::enable_if::value, T>::type;", + ); + test_match("if ($A)", "if (a | b) abc;"); + // see https://github.com/ast-grep/ast-grep/issues/1791 + test_match("struct $A: $B", "struct A: B {}"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Cpp) +} + +#[test] +fn test_cpp_replace() { + let ret = test_replace("expr->b()", "$A->b()", "func($A)->b()"); + assert_eq!(ret, "func(expr)->b()"); + let ret = test_replace("if (a) { a;b;c; }", "if (a) { $$$A }", "$$$A"); + assert_eq!(ret, "a;b;c;"); + // https://stackoverflow.com/questions/78663351 + let ret = test_replace( + "if (a) { a;b;c; }", + "if (a) { $$$VERYLONGNAME }", + "$$$VERYLONGNAME", + ); + assert_eq!(ret, "a;b;c;"); +} diff --git a/crates/language/src/csharp.rs b/crates/language/src/csharp.rs new file mode 100644 index 0000000..ca88a74 --- /dev/null +++ b/crates/language/src/csharp.rs @@ -0,0 +1,33 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, CSharp); +} + +#[test] +fn test_c_sharp_pattern() { + let target = "if (table == null) ThrowHelper.ThrowArgumentNullException(nameof(table));"; + test_match("int $A = 0;", "int nint = 0;"); + test_match("ThrowHelper.ThrowArgumentNullException($_)", target); + test_match("ThrowHelper.$_", target); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, CSharp) +} + +#[test] +fn test_c_sharp_replace() { + let ret = test_replace("int @int = 0;", "int $A = 0", "bool @bool = true"); + assert_eq!(ret, "bool @bool = true;"); +} diff --git a/crates/language/src/css.rs b/crates/language/src/css.rs new file mode 100644 index 0000000..3aa5552 --- /dev/null +++ b/crates/language/src/css.rs @@ -0,0 +1,36 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Css); +} + +#[test] +fn test_css_pattern() { + test_match("$A { color: red; }", ".a { color: red; }"); + test_match(".a { color: $COLOR; }", ".a { color: red; }"); + test_match(".a { $PROP: red; }", ".a { color: red; }"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Css) +} + +#[test] +fn test_css_replace() { + let ret = test_replace( + ".a {color: red; }", + ".a { color: $COLOR}", + ".a {background: $COLOR}", + ); + assert_eq!(ret, ".a {background: red}"); +} diff --git a/crates/language/src/elixir.rs b/crates/language/src/elixir.rs new file mode 100644 index 0000000..9a4d705 --- /dev/null +++ b/crates/language/src/elixir.rs @@ -0,0 +1,98 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Elixir); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Elixir); +} + +#[test] +fn test_elixir_str() { + test_match("IO.puts(\"$A\")", "IO.puts(\"123\")"); + test_match("IO.puts($A)", "IO.puts(123)"); + test_non_match("IO.puts(123)", "IO.puts(456)"); + test_non_match("\"123\"", "\"456\""); +} + +#[test] +fn test_elixir_pattern() { + test_match("$A", ":ok"); + test_match("$A != nil", "a != nil"); + test_match( + r#" + def $FUNC($$$ARGS) when $GUARDS do + $$$BODY + end + "#, + r#" + def add(a, b) when is_integer(a) and is_integer(b) do + a + b + end + "#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Elixir) +} + +#[test] +fn test_elixir_replace() { + let ret = test_replace( + "Stream.map([1, 2, 3], fn x -> x * 2 end)", + "Stream.map($$$ARGS)", + "Enum.map($$$ARGS)", + ); + assert_eq!(ret, "Enum.map([1, 2, 3], fn x -> x * 2 end)"); + + let ret = test_replace( + ":budgie = hd([:budgie, :cat, :dog])", + "$FIRST = hd($LIST)", + "[$FIRST | _] = $LIST", + ); + assert_eq!(ret, "[:budgie | _] = [:budgie, :cat, :dog]"); + + let ret = test_replace( + "opts[:hostname] || \"localhost\"", + "opts[$KEY] || $DEFAULT", + "Keyword.get(opts, $KEY, $DEFAULT)", + ); + assert_eq!(ret, "Keyword.get(opts, :hostname, \"localhost\")"); + + let ret = test_replace( + "Module.function(:a, :b)", + "Module.function($ARG1, $ARG2)", + "Module.function($ARG2, $ARG1)", + ); + assert_eq!(ret, "Module.function(:b, :a)"); + + let ret = test_replace( + "Greeter.greet(:hello, \"human\")", + "Greeter.greet($ARG1, $ARG2)", + "Greeter.greet($ARG1, name: $ARG2)", + ); + assert_eq!(ret, "Greeter.greet(:hello, name: \"human\")"); + + let ret = test_replace( + "for x <- [\"budgie\", \"cat\", \"dog\"], do: String.to_atom(x)", + "for $I <- $LIST, do: $MODULE.$FUNCTION($I)", + "Enum.map($LIST, fn $I -> $MODULE.$FUNCTION($I) end)", + ); + assert_eq!( + ret, + "Enum.map([\"budgie\", \"cat\", \"dog\"], fn x -> String.to_atom(x) end)" + ); +} diff --git a/crates/language/src/go.rs b/crates/language/src/go.rs new file mode 100644 index 0000000..9f50a46 --- /dev/null +++ b/crates/language/src/go.rs @@ -0,0 +1,66 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Go); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Go); +} + +#[test] +fn test_go_str() { + test_match("print($A)", "print(123)"); + test_match("print('123')", "print('123')"); + test_non_match("print('123')", "print('456')"); + test_non_match("'123'", "'456'"); +} + +#[test] +fn test_go_pattern() { + test_match("$A = 0", "a = 0"); + test_match( + r#"func $A($$$) $B { $$$ }"#, + r#" +func plus(a int, b int) int { + return a + b +}"#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Go) +} + +#[test] +fn test_go_replace() { + let ret = test_replace( + r#" +func intSeq() { + defer func() { + i++ + }() +}"#, + r#"defer func() { +$$$BODY }()"#, + r#"func b() { $$$BODY }"#, + ); + assert_eq!( + ret, + r#" +func intSeq() { + func b() { i++ } +}"# + ); +} diff --git a/crates/language/src/haskell.rs b/crates/language/src/haskell.rs new file mode 100644 index 0000000..b28e041 --- /dev/null +++ b/crates/language/src/haskell.rs @@ -0,0 +1,72 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Haskell); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Haskell); +} + +#[test] +fn test_haskell_str() { + test_match("return $A", "return 3"); + test_match(r#""abc""#, r#""abc""#); + test_match("$A $B", "f x"); + test_match("$A ($B $C)", "f (x y)"); + test_match("let $A = $B in $A + $A", "let x = 3 in x + x"); + test_non_match("$A $B", "f"); + test_non_match("$A + $A", "3 + 4"); + test_non_match("$A ($B $C)", "f x y"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Haskell) +} + +#[test] +fn test_haskell_replace() { + let ret = test_replace( + r#" +fibonacci :: [Int] +fibonacci = + 1 : 1 : zipWith (+) fibonacci (tail fibonacci) +"#, + r#"$F = $$$BODY"#, + r#"$F = undefined"#, + ); + assert_eq!( + ret, + r#" +fibonacci :: [Int] +fibonacci = undefined +"# + ); + + let ret = test_replace( + r#" +flip :: (a -> b -> c) -> b -> a -> c +flip f x y = f y x +"#, + r#"$F :: $A -> $B"#, + r#"$F :: ($B) -> $A"#, + ); + assert_eq!( + ret, + r#" +flip :: (b -> a -> c) -> (a -> b -> c) +flip f x y = f y x +"# + ); +} diff --git a/crates/language/src/html.rs b/crates/language/src/html.rs new file mode 100644 index 0000000..ad04c4f --- /dev/null +++ b/crates/language/src/html.rs @@ -0,0 +1,214 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::pre_process_pattern; +use thread_ast_engine::Language; +use thread_ast_engine::matcher::{Pattern, PatternBuilder, PatternError}; +use thread_ast_engine::tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange}; +use thread_ast_engine::{Doc, Node, matcher::KindMatcher}; +use thread_utils::RapidMap; + +// tree-sitter-html uses locale dependent iswalnum for tagName +// https://github.com/tree-sitter/tree-sitter-html/blob/b5d9758e22b4d3d25704b72526670759a9e4d195/src/scanner.c#L194 +#[derive(Clone, Copy, Debug)] +pub struct Html; +impl Language for Html { + fn expando_char(&self) -> char { + 'z' + } + fn pre_process_pattern<'q>(&self, query: &'q str) -> std::borrow::Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + fn kind_to_id(&self, kind: &str) -> u16 { + crate::parsers::language_html().id_for_node_kind(kind, true) + } + fn field_to_id(&self, field: &str) -> Option { + crate::parsers::language_html() + .field_id_for_name(field) + .map(|f| f.get()) + } + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, *self)) + } +} +impl LanguageExt for Html { + fn get_ts_language(&self) -> TSLanguage { + crate::parsers::language_html() + } + fn injectable_languages(&self) -> Option<&'static [&'static str]> { + Some(&["css", "js", "ts", "tsx", "scss", "less", "stylus", "coffee"]) + } + fn extract_injections( + &self, + root: Node>, + ) -> RapidMap> { + let lang = root.lang(); + let mut map = RapidMap::default(); + + // Pre-allocate common language vectors to avoid repeated allocations + let mut js_ranges = Vec::new(); + let mut css_ranges = Vec::new(); + let mut other_ranges: RapidMap> = RapidMap::default(); + + // Process script elements + let script_matcher = KindMatcher::new("script_element", lang); + for script in root.find_all(script_matcher) { + if let Some(content) = script.children().find(|c| c.kind() == "raw_text") { + let range = node_to_range(&content); + + // Fast path for common languages + match find_lang(&script) { + Some(lang_name) => { + if lang_name == "js" || lang_name == "javascript" { + js_ranges.push(range); + } else { + other_ranges + .entry(lang_name) + .or_default() + .push(range); + } + } + None => js_ranges.push(range), // Default to JavaScript + } + } + } + + // Process style elements + let style_matcher = KindMatcher::new("style_element", lang); + for style in root.find_all(style_matcher) { + if let Some(content) = style.children().find(|c| c.kind() == "raw_text") { + let range = node_to_range(&content); + + // Fast path for CSS (most common) + match find_lang(&style) { + Some(lang_name) => { + if lang_name == "css" { + css_ranges.push(range); + } else { + other_ranges + .entry(lang_name) + .or_default() + .push(range); + } + } + None => css_ranges.push(range), // Default to CSS + } + } + } + + // Only insert non-empty vectors to reduce map size + if !js_ranges.is_empty() { + map.insert("js".to_string(), js_ranges); + } + if !css_ranges.is_empty() { + map.insert("css".to_string(), css_ranges); + } + + // Merge other languages + for (lang_name, ranges) in other_ranges { + if !ranges.is_empty() { + map.insert(lang_name, ranges); + } + } + + map + } +} + +fn find_lang(node: &Node) -> Option { + let html = node.lang(); + let attr_matcher = KindMatcher::new("attribute", html); + let name_matcher = KindMatcher::new("attribute_name", html); + let val_matcher = KindMatcher::new("attribute_value", html); + node.find_all(attr_matcher).find_map(|attr| { + let name = attr.find(&name_matcher)?; + if name.text() != "lang" { + return None; + } + let val = attr.find(&val_matcher)?; + Some(val.text().to_string()) + }) +} + +fn node_to_range(node: &Node) -> TSRange { + let r = node.range(); + let start = node.start_pos(); + let sp = start.byte_point(); + let sp = tree_sitter::Point::new(sp.0, sp.1); + let end = node.end_pos(); + let ep = end.byte_point(); + let ep = tree_sitter::Point::new(ep.0, ep.1); + TSRange { + start_byte: r.start, + end_byte: r.end, + start_point: sp, + end_point: ep, + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Html); + } + + fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Html); + } + + #[test] + fn test_html_match() { + test_match("", ""); + test_match("<$TAG>", ""); + test_match("<$TAG class='foo'>$$$", "

"); + test_match("
$$$
", "
123
"); + test_non_match("<$TAG class='foo'>$$$", "
"); + test_non_match("
$$$
", "
123
"); + } + + fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Html) + } + + #[test] + fn test_html_replace() { + let ret = test_replace( + r#"
bar
"#, + r#"<$TAG class='foo'>$$$B"#, + r#"<$TAG class='$$$B'>foo"#, + ); + assert_eq!(ret, r#"
foo
"#); + } + + fn extract(src: &str) -> RapidMap> { + let root = Html.ast_grep(src); + Html.extract_injections(root.root()) + } + + #[test] + fn test_html_extraction() { + let map = extract(""); + assert!(map.contains_key("css")); + assert!(map.contains_key("js")); + assert_eq!(map["css"].len(), 1); + assert_eq!(map["js"].len(), 1); + } + + #[test] + fn test_explicit_lang() { + let map = extract( + "", + ); + assert!(map.contains_key("ts")); + assert_eq!(map["ts"].len(), 2); + assert_eq!(map["scss"].len(), 2); + } +} diff --git a/crates/language/src/json.rs b/crates/language/src/json.rs new file mode 100644 index 0000000..3fa5bc7 --- /dev/null +++ b/crates/language/src/json.rs @@ -0,0 +1,45 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Json); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Json); +} + +#[test] +fn test_json_str() { + test_match("123", "123"); + test_match("{\"d\": 123}", "{\"d\": 123}"); + test_non_match("344", "123"); + test_non_match("{\"key\": 123}", "{}"); +} + +#[test] +fn test_json_pattern() { + test_match("$A", "123"); + test_match(r#"[$A]"#, r#"[123]"#); + test_match(r#"{ $$$ }"#, r#"{"abc": 123}"#); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Json) +} + +#[test] +fn test_json_replace() { + let ret = test_replace(r#"{ "a": 123 }"#, r#"123"#, r#"456"#); + assert_eq!(ret, r#"{ "a": 456 }"#); +} diff --git a/crates/language/src/kotlin.rs b/crates/language/src/kotlin.rs new file mode 100644 index 0000000..e49d357 --- /dev/null +++ b/crates/language/src/kotlin.rs @@ -0,0 +1,61 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Kotlin); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Kotlin); +} + +#[test] +fn test_kotlin_str() { + test_match("println($A)", "println(123)"); + test_match("println('123')", "println('123')"); + test_non_match("println('123')", "println('456')"); + test_non_match("'123'", "'456'"); +} + +#[test] +fn test_kotlin_pattern() { + test_match("$A = 0", "a = 0"); + test_match( + r#"fun $A($$$): $B { $$$ }"#, + r#" +fun plus(a: Int, b: Int): Int { + return a + b +}"#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Kotlin) +} + +#[test] +fn test_kotlin_replace() { + let ret = test_replace( + r#" +fun plus(a: Int, b: Int): Int { + return a + b +}"#, + r#"fun $F($$$): $R { $$$BODY }"#, + r#"fun $F() { $$$BODY }"#, + ); + assert_eq!( + ret, + r#" +fun plus() { return a + b }"# + ); +} diff --git a/crates/language/src/lib.rs b/crates/language/src/lib.rs new file mode 100644 index 0000000..1dc93b8 --- /dev/null +++ b/crates/language/src/lib.rs @@ -0,0 +1,674 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! This module defines the supported programming languages for ast-grep. +//! +//! It provides a set of customized languages with expando_char / pre_process_pattern, +//! and a set of stub languages without preprocessing. +//! A rule of thumb: if your language does not accept identifiers like `$VAR`. +//! You need use `impl_lang_expando!` macro and a standalone file for testing. +//! Otherwise, you can define it as a stub language using `impl_lang!`. +//! To see the full list of languages, visit `` +pub mod parsers; + +mod bash; +mod cpp; +mod csharp; +mod css; +mod elixir; +mod go; +mod haskell; +mod html; +mod json; +mod kotlin; +mod lua; +mod php; +#[cfg(feature = "profiling")] +pub mod profiling; +mod python; +mod ruby; +mod rust; +mod scala; +mod swift; +mod yaml; + +pub use html::Html; +use thread_ast_engine::{Pattern, PatternBuilder, PatternError}; + +use ignore::types::{Types, TypesBuilder}; +use serde::de::Visitor; +use serde::{Deserialize, Deserializer, Serialize, de}; +use std::borrow::Cow; +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::path::Path; +use std::str::FromStr; +use thread_ast_engine::Node; +use thread_ast_engine::meta_var::MetaVariable; +use thread_ast_engine::tree_sitter::{StrDoc, TSLanguage, TSRange}; +use thread_utils::RapidMap; + +pub use thread_ast_engine::language::Language; +pub use thread_ast_engine::tree_sitter::LanguageExt; + +/// this macro implements bare-bone methods for a language +macro_rules! impl_lang { + ($lang: ident, $func: ident) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language() + .id_for_node_kind(kind, /*named*/ true) + } + fn field_to_id(&self, field: &str) -> Option { + self.get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, self.clone())) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + parsers::$func().into() + } + } + }; +} + +fn pre_process_pattern(expando: char, query: &str) -> std::borrow::Cow<'_, str> { + // Fast path: check if any processing is needed + let has_dollar = query.as_bytes().contains(&b'$'); + if !has_dollar { + return std::borrow::Cow::Borrowed(query); + } + + // Count exact size needed to avoid reallocations + let mut size_needed = 0; + let mut needs_processing = false; + let mut dollar_count = 0; + + for c in query.chars() { + if c == '$' { + dollar_count += 1; + } else { + let need_replace = matches!(c, 'A'..='Z' | '_') || dollar_count == 3; + if need_replace && dollar_count > 0 { + needs_processing = true; + } + size_needed += dollar_count + 1; + dollar_count = 0; + } + } + size_needed += dollar_count; + + // If no replacement needed, return borrowed + if !needs_processing { + return std::borrow::Cow::Borrowed(query); + } + + // Pre-allocate exact size and process in-place + let mut ret = String::with_capacity(size_needed); + dollar_count = 0; + + for c in query.chars() { + if c == '$' { + dollar_count += 1; + continue; + } + let need_replace = matches!(c, 'A'..='Z' | '_') || dollar_count == 3; + let sigil = if need_replace { expando } else { '$' }; + + // Push dollars directly without iterator allocation + for _ in 0..dollar_count { + ret.push(sigil); + } + dollar_count = 0; + ret.push(c); + } + + // Handle trailing dollars + let sigil = if dollar_count == 3 { expando } else { '$' }; + for _ in 0..dollar_count { + ret.push(sigil); + } + + std::borrow::Cow::Owned(ret) +} + +/// this macro will implement expando_char and pre_process_pattern +/// use this if your language does not accept $ as valid identifier char +macro_rules! impl_lang_expando { + ($lang: ident, $func: ident, $char: expr) => { + #[derive(Clone, Copy, Debug)] + pub struct $lang; + impl Language for $lang { + fn kind_to_id(&self, kind: &str) -> u16 { + self.get_ts_language() + .id_for_node_kind(kind, /*named*/ true) + } + fn field_to_id(&self, field: &str) -> Option { + self.get_ts_language() + .field_id_for_name(field) + .map(|f| f.get()) + } + fn expando_char(&self) -> char { + $char + } + fn pre_process_pattern<'q>(&self, query: &'q str) -> std::borrow::Cow<'q, str> { + pre_process_pattern(self.expando_char(), query) + } + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, self.clone())) + } + } + impl LanguageExt for $lang { + fn get_ts_language(&self) -> TSLanguage { + $crate::parsers::$func().into() + } + } + }; +} + +pub trait Alias: Display { + const ALIAS: &'static [&'static str]; +} + +/// Implements the `ALIAS` associated constant for the given lang, which is +/// then used to define the `alias` const fn and a `Deserialize` impl. +macro_rules! impl_alias { + ($lang:ident => $as:expr) => { + impl Alias for $lang { + const ALIAS: &'static [&'static str] = $as; + } + + impl fmt::Display for $lang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } + } + + impl<'de> Deserialize<'de> for $lang { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let vis = AliasVisitor { + aliases: Self::ALIAS, + }; + deserializer.deserialize_str(vis)?; + Ok($lang) + } + } + + impl From<$lang> for SupportLang { + fn from(_: $lang) -> Self { + Self::$lang + } + } + }; +} +/// Generates as convenience conversions between the lang types +/// and `SupportedType`. +macro_rules! impl_aliases { + ($($lang:ident => $as:expr),* $(,)?) => { + $(impl_alias!($lang => $as);)* + const fn alias(lang: SupportLang) -> &'static [&'static str] { + match lang { + $(SupportLang::$lang => $lang::ALIAS),* + } + } + }; +} + +/* Customized Language with expando_char / pre_process_pattern */ +// https://en.cppreference.com/w/cpp/language/identifiers +// Due to some issues in the tree-sitter parser, it is not possible to use +// unicode literals in identifiers for C/C++ parsers +impl_lang_expando!(C, language_c, 'Β΅'); +impl_lang_expando!(Cpp, language_cpp, 'Β΅'); +// https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/language-specification/lexical-structure#643-identifiers +// all letter number is accepted +// https://www.compart.com/en/unicode/category/Nl +impl_lang_expando!(CSharp, language_c_sharp, 'Β΅'); +// https://www.w3.org/TR/CSS21/grammar.html#scanner +impl_lang_expando!(Css, language_css, '_'); +// https://github.com/elixir-lang/tree-sitter-elixir/blob/a2861e88a730287a60c11ea9299c033c7d076e30/grammar.js#L245 +impl_lang_expando!(Elixir, language_elixir, 'Β΅'); +// we can use any Unicode code point categorized as "Letter" +// https://go.dev/ref/spec#letter +impl_lang_expando!(Go, language_go, 'Β΅'); +// GHC supports Unicode syntax per +// https://ghc.gitlab.haskell.org/ghc/doc/users_guide/exts/unicode_syntax.html +// and the tree-sitter-haskell grammar parses it too. +impl_lang_expando!(Haskell, language_haskell, 'Β΅'); +// https://github.com/fwcd/tree-sitter-kotlin/pull/93 +impl_lang_expando!(Kotlin, language_kotlin, 'Β΅'); +// PHP accepts unicode to be used as some name not var name though +impl_lang_expando!(Php, language_php, 'Β΅'); +// we can use any char in unicode range [:XID_Start:] +// https://docs.python.org/3/reference/lexical_analysis.html#identifiers +// see also [PEP 3131](https://peps.python.org/pep-3131/) for further details. +impl_lang_expando!(Python, language_python, 'Β΅'); +// https://github.com/tree-sitter/tree-sitter-ruby/blob/f257f3f57833d584050336921773738a3fd8ca22/grammar.js#L30C26-L30C78 +impl_lang_expando!(Ruby, language_ruby, 'Β΅'); +// we can use any char in unicode range [:XID_Start:] +// https://doc.rust-lang.org/reference/identifiers.html +impl_lang_expando!(Rust, language_rust, 'Β΅'); +//https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Identifiers +impl_lang_expando!(Swift, language_swift, 'Β΅'); + +// Stub Language without preprocessing +// Language Name, tree-sitter-name, alias, extension +impl_lang!(Bash, language_bash); +impl_lang!(Java, language_java); +impl_lang!(JavaScript, language_javascript); +impl_lang!(Json, language_json); +impl_lang!(Lua, language_lua); +impl_lang!(Scala, language_scala); +impl_lang!(Tsx, language_tsx); +impl_lang!(TypeScript, language_typescript); +impl_lang!(Yaml, language_yaml); +// See ripgrep for extensions +// https://github.com/BurntSushi/ripgrep/blob/master/crates/ignore/src/default_types.rs + +/// Represents all built-in languages. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Hash)] +pub enum SupportLang { + Bash, + C, + Cpp, + CSharp, + Css, + Go, + Elixir, + Haskell, + Html, + Java, + JavaScript, + Json, + Kotlin, + Lua, + Php, + Python, + Ruby, + Rust, + Scala, + Swift, + Tsx, + TypeScript, + Yaml, +} + +impl SupportLang { + pub const fn all_langs() -> &'static [SupportLang] { + use SupportLang::*; + &[ + Bash, C, Cpp, CSharp, Css, Elixir, Go, Haskell, Html, Java, JavaScript, Json, Kotlin, + Lua, Php, Python, Ruby, Rust, Scala, Swift, Tsx, TypeScript, Yaml, + ] + } + + pub fn file_types(&self) -> Types { + file_types(*self) + } +} + +impl fmt::Display for SupportLang { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{self:?}") + } +} + +#[derive(Debug)] +pub enum SupportLangErr { + LanguageNotSupported(String), +} + +impl Display for SupportLangErr { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + use SupportLangErr::*; + match self { + LanguageNotSupported(lang) => write!(f, "{lang} is not supported!"), + } + } +} + +impl std::error::Error for SupportLangErr {} + +impl<'de> Deserialize<'de> for SupportLang { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(SupportLangVisitor) + } +} + +struct SupportLangVisitor; + +impl Visitor<'_> for SupportLangVisitor { + type Value = SupportLang; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("SupportLang") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + v.parse().map_err(de::Error::custom) + } +} +struct AliasVisitor { + aliases: &'static [&'static str], +} + +impl Visitor<'_> for AliasVisitor { + type Value = &'static str; + + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "one of {:?}", self.aliases) + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + self.aliases + .iter() + .copied() + .find(|&a| v.eq_ignore_ascii_case(a)) + .ok_or_else(|| de::Error::invalid_value(de::Unexpected::Str(v), &self)) + } +} + +impl_aliases! { + Bash => &["bash"], + C => &["c"], + Cpp => &["cc", "c++", "cpp", "cxx"], + CSharp => &["cs", "csharp"], + Css => &["css"], + Elixir => &["ex", "elixir"], + Go => &["go", "golang"], + Haskell => &["hs", "haskell"], + Html => &["html"], + Java => &["java"], + JavaScript => &["javascript", "js", "jsx"], + Json => &["json"], + Kotlin => &["kotlin", "kt"], + Lua => &["lua"], + Php => &["php"], + Python => &["py", "python"], + Ruby => &["rb", "ruby"], + Rust => &["rs", "rust"], + Scala => &["scala"], + Swift => &["swift"], + TypeScript => &["ts", "typescript"], + Tsx => &["tsx"], + Yaml => &["yaml", "yml"], +} + +/// Implements the language names and aliases. +impl FromStr for SupportLang { + type Err = SupportLangErr; + fn from_str(s: &str) -> Result { + // Fast path: try exact matches first (most common case) + match s { + "bash" => return Ok(SupportLang::Bash), + "c" => return Ok(SupportLang::C), + "cpp" | "c++" => return Ok(SupportLang::Cpp), + "cs" | "csharp" => return Ok(SupportLang::CSharp), + "css" => return Ok(SupportLang::Css), + "elixir" | "ex" => return Ok(SupportLang::Elixir), + "go" | "golang" => return Ok(SupportLang::Go), + "haskell" | "hs" => return Ok(SupportLang::Haskell), + "html" => return Ok(SupportLang::Html), + "java" => return Ok(SupportLang::Java), + "javascript" | "js" => return Ok(SupportLang::JavaScript), + "json" => return Ok(SupportLang::Json), + "kotlin" | "kt" => return Ok(SupportLang::Kotlin), + "lua" => return Ok(SupportLang::Lua), + "php" => return Ok(SupportLang::Php), + "python" | "py" => return Ok(SupportLang::Python), + "ruby" | "rb" => return Ok(SupportLang::Ruby), + "rust" | "rs" => return Ok(SupportLang::Rust), + "scala" => return Ok(SupportLang::Scala), + "swift" => return Ok(SupportLang::Swift), + "typescript" | "ts" => return Ok(SupportLang::TypeScript), + "tsx" => return Ok(SupportLang::Tsx), + "yaml" | "yml" => return Ok(SupportLang::Yaml), + _ => {} // Fall through to case-insensitive search + } + + // Slow path: case-insensitive search for less common aliases + for &lang in Self::all_langs() { + for moniker in alias(lang) { + if s.eq_ignore_ascii_case(moniker) { + return Ok(lang); + } + } + } + Err(SupportLangErr::LanguageNotSupported(s.to_string())) + } +} + +macro_rules! execute_lang_method { + ($me: path, $method: ident, $($pname:tt),*) => { + use SupportLang as S; + match $me { + S::Bash => Bash.$method($($pname,)*), + S::C => C.$method($($pname,)*), + S::Cpp => Cpp.$method($($pname,)*), + S::CSharp => CSharp.$method($($pname,)*), + S::Css => Css.$method($($pname,)*), + S::Elixir => Elixir.$method($($pname,)*), + S::Go => Go.$method($($pname,)*), + S::Haskell => Haskell.$method($($pname,)*), + S::Html => Html.$method($($pname,)*), + S::Java => Java.$method($($pname,)*), + S::JavaScript => JavaScript.$method($($pname,)*), + S::Json => Json.$method($($pname,)*), + S::Kotlin => Kotlin.$method($($pname,)*), + S::Lua => Lua.$method($($pname,)*), + S::Php => Php.$method($($pname,)*), + S::Python => Python.$method($($pname,)*), + S::Ruby => Ruby.$method($($pname,)*), + S::Rust => Rust.$method($($pname,)*), + S::Scala => Scala.$method($($pname,)*), + S::Swift => Swift.$method($($pname,)*), + S::Tsx => Tsx.$method($($pname,)*), + S::TypeScript => TypeScript.$method($($pname,)*), + S::Yaml => Yaml.$method($($pname,)*), + } + } +} + +macro_rules! impl_lang_method { + ($method: ident, ($($pname:tt: $ptype:ty),*) => $return_type: ty) => { + #[inline] + fn $method(&self, $($pname: $ptype),*) -> $return_type { + execute_lang_method!{ self, $method, $($pname),* } + } + }; +} +impl Language for SupportLang { + impl_lang_method!(kind_to_id, (kind: &str) => u16); + impl_lang_method!(field_to_id, (field: &str) => Option); + impl_lang_method!(meta_var_char, () => char); + impl_lang_method!(expando_char, () => char); + impl_lang_method!(extract_meta_var, (source: &str) => Option); + impl_lang_method!(build_pattern, (builder: &PatternBuilder) => Result); + fn pre_process_pattern<'q>(&self, query: &'q str) -> Cow<'q, str> { + execute_lang_method! { self, pre_process_pattern, query } + } + fn from_path>(path: P) -> Option { + from_extension(path.as_ref()) + } +} + +impl LanguageExt for SupportLang { + impl_lang_method!(get_ts_language, () => TSLanguage); + impl_lang_method!(injectable_languages, () => Option<&'static [&'static str]>); + fn extract_injections( + &self, + root: Node>, + ) -> RapidMap> { + match self { + SupportLang::Html => Html.extract_injections(root), + _ => RapidMap::default(), + } + } +} + +const fn extensions(lang: SupportLang) -> &'static [&'static str] { + use SupportLang::*; + match lang { + Bash => &[ + "bash", "bats", "cgi", "command", "env", "fcgi", "ksh", "sh", "tmux", "tool", "zsh", + ], + C => &["c", "h"], + Cpp => &["cc", "hpp", "cpp", "c++", "hh", "cxx", "cu", "ino"], + CSharp => &["cs"], + Css => &["css", "scss"], + Elixir => &["ex", "exs"], + Go => &["go"], + Haskell => &["hs"], + Html => &["html", "htm", "xhtml"], + Java => &["java"], + JavaScript => &["cjs", "js", "mjs", "jsx"], + Json => &["json"], + Kotlin => &["kt", "ktm", "kts"], + Lua => &["lua"], + Php => &["php"], + Python => &["py", "py3", "pyi", "bzl"], + Ruby => &["rb", "rbw", "gemspec"], + Rust => &["rs"], + Scala => &["scala", "sc", "sbt"], + Swift => &["swift"], + TypeScript => &["ts", "cts", "mts"], + Tsx => &["tsx"], + Yaml => &["yaml", "yml"], + } +} + +/// Guess which programming language a file is written in +/// Adapt from `` +/// N.B do not confuse it with `FromStr` trait. This function is to guess language from file extension. +fn from_extension(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + + // Fast path: try most common extensions first + match ext { + "rs" => return Some(SupportLang::Rust), + "js" | "mjs" | "cjs" => return Some(SupportLang::JavaScript), + "ts" | "cts" | "mts" => return Some(SupportLang::TypeScript), + "tsx" => return Some(SupportLang::Tsx), + "py" | "py3" | "pyi" => return Some(SupportLang::Python), + "java" => return Some(SupportLang::Java), + "cpp" | "cc" | "cxx" => return Some(SupportLang::Cpp), + "c" => return Some(SupportLang::C), + "go" => return Some(SupportLang::Go), + "html" | "htm" => return Some(SupportLang::Html), + "css" => return Some(SupportLang::Css), + "json" => return Some(SupportLang::Json), + "yaml" | "yml" => return Some(SupportLang::Yaml), + _ => {} + } + + // Fallback: comprehensive search for less common extensions + SupportLang::all_langs() + .iter() + .copied() + .find(|&l| extensions(l).contains(&ext)) +} + +fn add_custom_file_type<'b>( + builder: &'b mut TypesBuilder, + file_type: &str, + suffix_list: &[&str], +) -> &'b mut TypesBuilder { + for suffix in suffix_list { + let glob = format!("*.{suffix}"); + builder + .add(file_type, &glob) + .expect("file pattern must compile"); + } + builder.select(file_type) +} + +fn file_types(lang: SupportLang) -> Types { + let mut builder = TypesBuilder::new(); + let exts = extensions(lang); + let lang_name = lang.to_string(); + add_custom_file_type(&mut builder, &lang_name, exts); + builder.build().expect("file type must be valid") +} + +pub fn config_file_type() -> Types { + let mut builder = TypesBuilder::new(); + let builder = add_custom_file_type(&mut builder, "yml", &["yml", "yaml"]); + builder.build().expect("yaml type must be valid") +} + +#[cfg(test)] +mod test { + use super::*; + use thread_ast_engine::{Pattern, matcher::MatcherExt}; + + pub fn test_match_lang(query: &str, source: &str, lang: impl LanguageExt) { + let cand = lang.ast_grep(source); + let pattern = Pattern::new(query, lang); + assert!( + pattern.find_node(cand.root()).is_some(), + "goal: {pattern:?}, candidate: {}", + cand.root().get_inner_node().to_sexp(), + ); + } + + pub fn test_non_match_lang(query: &str, source: &str, lang: impl LanguageExt) { + let cand = lang.ast_grep(source); + let pattern = Pattern::new(query, lang); + assert!( + pattern.find_node(cand.root()).is_none(), + "goal: {pattern:?}, candidate: {}", + cand.root().get_inner_node().to_sexp(), + ); + } + + pub fn test_replace_lang( + src: &str, + pattern: &str, + replacer: &str, + lang: impl LanguageExt, + ) -> String { + let mut source = lang.ast_grep(src); + assert!( + source + .replace(pattern, replacer) + .expect("should parse successfully") + ); + source.generate() + } + + #[test] + fn test_js_string() { + test_match_lang("'a'", "'a'", JavaScript); + test_match_lang("\"\"", "\"\"", JavaScript); + test_match_lang("''", "''", JavaScript); + } + + #[test] + fn test_guess_by_extension() { + let path = Path::new("foo.rs"); + assert_eq!(from_extension(path), Some(SupportLang::Rust)); + } + + // TODO: add test for file_types +} diff --git a/crates/language/src/lua.rs b/crates/language/src/lua.rs new file mode 100644 index 0000000..cc529ae --- /dev/null +++ b/crates/language/src/lua.rs @@ -0,0 +1,42 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Lua); +} + +#[test] +fn test_lua_pattern() { + test_match("s = $S", "s = 'string'"); + test_match("print($S)", "print('Hello World')"); + test_match("a = io.$METHOD($S)", "a = io.read('*number')"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Lua) +} + +#[test] +fn test_lua_replace() { + let ret = test_replace( + r#"function fact (n) + if n == 0 then + return 1 + else + return n * fact(n-1) + end + end"#, + "function $FUNC($ARG) $$$ end", + "$FUNC = function ($ARG) return 1 end", + ); + assert_eq!(ret, "fact = function (n) return 1 end"); +} diff --git a/crates/language/src/parsers.rs b/crates/language/src/parsers.rs new file mode 100644 index 0000000..0e4dd25 --- /dev/null +++ b/crates/language/src/parsers.rs @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! This mod maintains a list of tree-sitter parsers crate. +//! When feature flag `builtin-parser` is on, this mod will import all dependent crates. +//! However, tree-sitter bs cannot be compiled by wasm-pack. +//! In this case, we can use a blank implementation by turning feature flag off. +//! And use other implementation. + +#[cfg(feature = "builtin-parser")] +macro_rules! into_lang { + ($lang: ident, $field: ident) => { + $lang::$field.into() + }; + ($lang: ident) => { + into_lang!($lang, LANGUAGE) + }; +} + +#[cfg(not(feature = "builtin-parser"))] +macro_rules! into_lang { + ($lang: ident, $field: ident) => { + unimplemented!( + "tree-sitter parser is not implemented when feature flag [builtin-parser] is off." + ) + }; + ($lang: ident) => { + into_lang!($lang, LANGUAGE) + }; +} + +#[cfg(any(feature = "builtin-parser", feature = "napi-lang"))] +macro_rules! into_napi_lang { + ($lang: path) => { + $lang.into() + }; +} +#[cfg(not(any(feature = "builtin-parser", feature = "napi-lang")))] +macro_rules! into_napi_lang { + ($lang: path) => { + unimplemented!( + "tree-sitter parser is not implemented when feature flag [builtin-parser] is off." + ) + }; +} + +use std::sync::OnceLock; +use thread_ast_engine::tree_sitter::TSLanguage; + +// Cached language instances for zero-cost repeated access +static BASH_LANG: OnceLock = OnceLock::new(); +static C_LANG: OnceLock = OnceLock::new(); +static CPP_LANG: OnceLock = OnceLock::new(); +static CSHARP_LANG: OnceLock = OnceLock::new(); +static CSS_LANG: OnceLock = OnceLock::new(); +static ELIXIR_LANG: OnceLock = OnceLock::new(); +static GO_LANG: OnceLock = OnceLock::new(); +static HASKELL_LANG: OnceLock = OnceLock::new(); +static HTML_LANG: OnceLock = OnceLock::new(); +static JAVA_LANG: OnceLock = OnceLock::new(); +static JAVASCRIPT_LANG: OnceLock = OnceLock::new(); +static JSON_LANG: OnceLock = OnceLock::new(); +static KOTLIN_LANG: OnceLock = OnceLock::new(); +static LUA_LANG: OnceLock = OnceLock::new(); +static PHP_LANG: OnceLock = OnceLock::new(); +static PYTHON_LANG: OnceLock = OnceLock::new(); +static RUBY_LANG: OnceLock = OnceLock::new(); +static RUST_LANG: OnceLock = OnceLock::new(); +static SCALA_LANG: OnceLock = OnceLock::new(); +static SWIFT_LANG: OnceLock = OnceLock::new(); +static TSX_LANG: OnceLock = OnceLock::new(); +static TYPESCRIPT_LANG: OnceLock = OnceLock::new(); +static YAML_LANG: OnceLock = OnceLock::new(); + +pub fn language_bash() -> TSLanguage { + BASH_LANG + .get_or_init(|| into_lang!(tree_sitter_bash)) + .clone() +} + +pub fn language_c() -> TSLanguage { + C_LANG.get_or_init(|| into_lang!(tree_sitter_c)).clone() +} + +pub fn language_cpp() -> TSLanguage { + CPP_LANG.get_or_init(|| into_lang!(tree_sitter_cpp)).clone() +} + +pub fn language_c_sharp() -> TSLanguage { + CSHARP_LANG + .get_or_init(|| into_lang!(tree_sitter_c_sharp)) + .clone() +} + +pub fn language_css() -> TSLanguage { + CSS_LANG + .get_or_init(|| into_napi_lang!(tree_sitter_css::LANGUAGE)) + .clone() +} + +pub fn language_elixir() -> TSLanguage { + ELIXIR_LANG + .get_or_init(|| into_lang!(tree_sitter_elixir)) + .clone() +} + +pub fn language_go() -> TSLanguage { + GO_LANG.get_or_init(|| into_lang!(tree_sitter_go)).clone() +} + +pub fn language_haskell() -> TSLanguage { + HASKELL_LANG + .get_or_init(|| into_lang!(tree_sitter_haskell)) + .clone() +} + +pub fn language_html() -> TSLanguage { + HTML_LANG + .get_or_init(|| into_napi_lang!(tree_sitter_html::LANGUAGE)) + .clone() +} + +pub fn language_java() -> TSLanguage { + JAVA_LANG + .get_or_init(|| into_lang!(tree_sitter_java)) + .clone() +} + +pub fn language_javascript() -> TSLanguage { + JAVASCRIPT_LANG + .get_or_init(|| into_napi_lang!(tree_sitter_javascript::LANGUAGE)) + .clone() +} + +pub fn language_json() -> TSLanguage { + JSON_LANG + .get_or_init(|| into_lang!(tree_sitter_json)) + .clone() +} + +pub fn language_kotlin() -> TSLanguage { + KOTLIN_LANG + .get_or_init(|| into_lang!(tree_sitter_kotlin)) + .clone() +} + +pub fn language_lua() -> TSLanguage { + LUA_LANG.get_or_init(|| into_lang!(tree_sitter_lua)).clone() +} + +pub fn language_php() -> TSLanguage { + PHP_LANG + .get_or_init(|| into_lang!(tree_sitter_php, LANGUAGE_PHP_ONLY)) + .clone() +} + +pub fn language_python() -> TSLanguage { + PYTHON_LANG + .get_or_init(|| into_lang!(tree_sitter_python)) + .clone() +} + +pub fn language_ruby() -> TSLanguage { + RUBY_LANG + .get_or_init(|| into_lang!(tree_sitter_ruby)) + .clone() +} + +pub fn language_rust() -> TSLanguage { + RUST_LANG + .get_or_init(|| into_lang!(tree_sitter_rust)) + .clone() +} + +pub fn language_scala() -> TSLanguage { + SCALA_LANG + .get_or_init(|| into_lang!(tree_sitter_scala)) + .clone() +} + +pub fn language_swift() -> TSLanguage { + SWIFT_LANG + .get_or_init(|| into_lang!(tree_sitter_swift)) + .clone() +} + +pub fn language_tsx() -> TSLanguage { + TSX_LANG + .get_or_init(|| into_napi_lang!(tree_sitter_typescript::LANGUAGE_TSX)) + .clone() +} + +pub fn language_typescript() -> TSLanguage { + TYPESCRIPT_LANG + .get_or_init(|| into_napi_lang!(tree_sitter_typescript::LANGUAGE_TYPESCRIPT)) + .clone() +} + +pub fn language_yaml() -> TSLanguage { + YAML_LANG + .get_or_init(|| into_lang!(tree_sitter_yaml)) + .clone() +} diff --git a/crates/language/src/php.rs b/crates/language/src/php.rs new file mode 100644 index 0000000..8e0982f --- /dev/null +++ b/crates/language/src/php.rs @@ -0,0 +1,23 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Php); +} + +#[test] +fn test_php_pattern() { + // dummy example, php pattern actually does not work + test_match("123", "123"); +} + +// https://github.com/ast-grep/ast-grep/issues/639#issuecomment-1876622828 +// TODO: better php support diff --git a/crates/language/src/profiling.rs b/crates/language/src/profiling.rs new file mode 100644 index 0000000..97ea62e --- /dev/null +++ b/crates/language/src/profiling.rs @@ -0,0 +1,118 @@ +// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! Memory profiling utilities for performance analysis + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +/// A simple memory profiler that tracks allocations +pub struct MemoryProfiler; + +static ALLOCATED: AtomicUsize = AtomicUsize::new(0); +static DEALLOCATED: AtomicUsize = AtomicUsize::new(0); +static PEAK_USAGE: AtomicUsize = AtomicUsize::new(0); + +unsafe impl GlobalAlloc for MemoryProfiler { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + let ptr = System.alloc(layout); + if !ptr.is_null() { + let size = layout.size(); + let old_allocated = ALLOCATED.fetch_add(size, Ordering::Relaxed); + let current_usage = old_allocated + size - DEALLOCATED.load(Ordering::Relaxed); + + // Update peak usage + let mut peak = PEAK_USAGE.load(Ordering::Relaxed); + while current_usage > peak { + match PEAK_USAGE.compare_exchange_weak( + peak, + current_usage, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => break, + Err(x) => peak = x, + } + } + } + ptr + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + System.dealloc(ptr, layout); + DEALLOCATED.fetch_add(layout.size(), Ordering::Relaxed); + } +} + +/// Memory usage statistics +#[derive(Debug, Clone, Copy)] +pub struct MemoryStats { + pub allocated: usize, + pub deallocated: usize, + pub current_usage: usize, + pub peak_usage: usize, +} + +impl MemoryStats { + /// Get current memory statistics + pub fn current() -> Self { + let allocated = ALLOCATED.load(Ordering::Relaxed); + let deallocated = DEALLOCATED.load(Ordering::Relaxed); + let peak_usage = PEAK_USAGE.load(Ordering::Relaxed); + + Self { + allocated, + deallocated, + current_usage: allocated - deallocated, + peak_usage, + } + } + + /// Reset all counters + pub fn reset() { + ALLOCATED.store(0, Ordering::Relaxed); + DEALLOCATED.store(0, Ordering::Relaxed); + PEAK_USAGE.store(0, Ordering::Relaxed); + } +} + +/// Macro to profile memory usage of a code block +#[macro_export] +macro_rules! profile_memory { + ($name:expr, $code:block) => {{ + let start_stats = $crate::profiling::MemoryStats::current(); + let result = $code; + let end_stats = $crate::profiling::MemoryStats::current(); + + println!("{} - Memory Usage:", $name); + println!( + " Allocated: {} bytes", + end_stats.allocated - start_stats.allocated + ); + println!(" Peak Usage: {} bytes", end_stats.peak_usage); + println!(" Current Usage: {} bytes", end_stats.current_usage); + + result + }}; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memory_stats() { + MemoryStats::reset(); + let initial = MemoryStats::current(); + + // Allocate some memory + let _vec: Vec = vec![0; 1024]; + let after_alloc = MemoryStats::current(); + + assert!(after_alloc.allocated > initial.allocated); + assert!(after_alloc.current_usage > initial.current_usage); + } +} diff --git a/crates/language/src/python.rs b/crates/language/src/python.rs new file mode 100644 index 0000000..b8ea420 --- /dev/null +++ b/crates/language/src/python.rs @@ -0,0 +1,130 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Python); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Python); +} + +#[test] +fn test_python_str() { + test_match("print($A)", "print(123)"); + test_match("print('123')", "print('123')"); + test_non_match("print('123')", "print('456')"); + test_non_match("'123'", "'456'"); + // https://github.com/ast-grep/ast-grep/issues/276 + // python has fixed the wrong parsing issue + test_non_match( + "getattr($O, \"__spec__\", None)", + "getattr(response, \"render\", None)", + ); + test_match( + "getattr($O, \"render\", None)", + "getattr(response, \"render\", None)", + ); +} + +// https://github.com/ast-grep/ast-grep/issues/883 +#[test] +fn test_issue_883() { + test_match("r'^[A-Za-z0-9_-]+\\$'", "r'^[A-Za-z0-9_-]+\\$'"); +} + +#[test] +fn test_python_pattern() { + test_match("$A = 0", "a = 0"); + // A test case from https://peps.python.org/pep-0636/#appendix-a-quick-intro + test_match( + r#" +match $A: + case $B: + $C + case [$D(0, 0)]: + $E + case [$D($F, $G)]: + $H + case [$D(0, $I), $D(0, $J)]: + $K + case _: + $L +"#, + r#" +match points: + case []: + print("No points") + case [Point(0, 0)]: + print("The origin") + case [Point(x, y)]: + print(f"Single point {x}, {y}") + case [Point(0, y1), Point(0, y2)]: + print(f"Two on the Y axis at {y1}, {y2}") + case _: + print("Something else") +"#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Python) +} + +#[test] +fn test_python_replace() { + let ret = test_replace( + r#" +if flag: + a = value_pos +else: + a = value_neg"#, + r#" +if $FLAG: + $VAR = $POS +else: + $VAR = $NEG +"#, + "$VAR = $POS if $FLAG else $NEG", + ); + assert_eq!(ret, "\na = value_pos if flag else value_neg"); + + let ret = test_replace( + r#" +try: + f = open(file_path, "r") + file_content = f.read() +except: + pass +finally: + f.close()"#, + r#" +try: + $A = open($B, $C) + $D = $A.read() +except: + pass +finally: + $A.close()"#, + r#" +with open($B, $C) as $A: + $D = $A.open()"#, + ); + assert_eq!( + ret, + r#" + +with open(file_path, "r") as f: + file_content = f.open()"# + ); +} diff --git a/crates/language/src/ruby.rs b/crates/language/src/ruby.rs new file mode 100644 index 0000000..5b444c3 --- /dev/null +++ b/crates/language/src/ruby.rs @@ -0,0 +1,38 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; +use thread_ast_engine::Pattern; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Ruby); +} + +#[test] +fn test_ruby_pattern() { + test_match("Foo::bar", "Foo::bar"); +} + +// https://github.com/ast-grep/ast-grep/issues/713 +#[test] +fn test_ruby_tree_sitter_panic() { + let pattern = Pattern::new("Foo::barbaz", Ruby); + assert_eq!(pattern.fixed_string(), "barbaz"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Ruby) +} + +#[test] +fn test_ruby_replace() { + let ret = test_replace("Foo::bar()", "Foo::$METHOD()", "$METHOD()"); + assert_eq!(ret, "bar()"); +} diff --git a/crates/language/src/rust.rs b/crates/language/src/rust.rs new file mode 100644 index 0000000..42c0f56 --- /dev/null +++ b/crates/language/src/rust.rs @@ -0,0 +1,97 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; +use crate::test::{test_match_lang, test_replace_lang}; + +fn test_match(s1: &str, s2: &str) { + test_match_lang(s1, s2, Rust) +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Rust); +} + +#[test] +fn test_rust_pattern() { + // fix #6 + test_match("Some($A)", "fn test() { Some(123) }"); + test_match( + " +match $A { + Some($B) => $B, + None => $C, +}", + r#"fn test() { +patterns = match config.include.clone() { + Some(patterns) => patterns, + None => Vec::from([cwd + .join("**/*.toml") + .normalize() + .to_string_lossy() + .into_owned()]), +}; +}"#, + ); +} + +// it is fixed in https://github.com/tree-sitter/tree-sitter-rust/issues/218 +// but not released yet +#[test] +fn test_issue_1057() { + // fix #1057 + test_match("foo(\"meaning\");", "fn t() { foo(\"meaning\");}"); + test_non_match("foo(\"meaning\");", "fn t() { foo(\"service\");}"); +} + +#[test] +fn test_rust_wildcard_pattern() { + // fix #412 + test_match("|$A, $B|", "let w = v.into_iter().reduce(|x, y| x + y);"); + test_match("|$$A, $$B|", "let w = v.into_iter().reduce(|x, _| x + x);"); + test_match("let ($$X, $$Y) = $$$T;", "let (_, y) = (1, 2);"); +} + +#[test] +fn test_rust_spread_syntax() { + test_match( + "let ($X, $Y) = $$$T;", + "let (.., y) = (1,2,3,4,5,6,7,8,9,10);", + ); + test_match( + "$C { $$$A, ..$B};", + r#"User { + username: String::from(name), + ..DEFAULT_USER + };"#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + test_replace_lang(src, pattern, replacer, Rust) +} + +#[test] +fn test_rust_replace() { + let ret = test_replace("fn test() { Some(123) }", "Some($A)", "Ok($A)"); + assert_eq!(ret, "fn test() { Ok(123) }"); + let ret = test_replace( + r#" +patterns = match config.include.clone() { + Some(patterns) => patterns, + None => 123, +}"#, + "match $A { + Some($B) => $B, + None => $C, +}", + "$A.unwrap_or($C)", + ); + assert_eq!(ret, "\npatterns = config.include.clone().unwrap_or(123)"); +} diff --git a/crates/language/src/scala.rs b/crates/language/src/scala.rs new file mode 100644 index 0000000..6cc596c --- /dev/null +++ b/crates/language/src/scala.rs @@ -0,0 +1,58 @@ +#![cfg(test)] +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! Standalone Scala file to test syntax. +//! Scala does not need special processing and can be a stub lang. +//! But this file is created for testing Scala2 and Scala3. + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Scala); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Scala); +} + +#[test] +fn test_scala_str() { + test_match("println($A)", "println(123)"); + test_match("println(\"123\")", "println(\"123\")"); + test_non_match("println(\"123\")", "println(\"456\")"); + test_non_match("\"123\"", "\"456\""); +} + +#[test] +fn test_scala_pattern() { + test_match("val $A = 0", "val a = 0"); + test_match("foo($VAR)", "foo(bar)"); + test_match("type $A = String", "type Foo = String"); + test_match("$A.filter(_ == $B)", "foo.filter(_ == bar)"); + test_match("if ($A) $B else $C", "if (foo) bar else baz"); + // Scala 3 syntax + test_match("if $A then $B else $C", "if foo then bar else baz"); + test_non_match("if ($A) $B else $C", "if foo then bar else baz"); + test_non_match("type $A = Int", "type Foo = String"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Scala) +} + +#[test] +fn test_scala_replace() { + let ret = test_replace( + "foo.filter(_ == bar)", + "$A.filter(_ == $B)", + "$A.filter(_ == baz)", + ); + assert_eq!(ret, "foo.filter(_ == baz)"); +} diff --git a/crates/language/src/swift.rs b/crates/language/src/swift.rs new file mode 100644 index 0000000..9a2a2f0 --- /dev/null +++ b/crates/language/src/swift.rs @@ -0,0 +1,66 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Swift); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Swift); +} + +#[test] +fn test_swift_str() { + test_match("println(\"123\")", "println(\"123\")"); + test_non_match("println(\"123\")", "println(\"456\")"); + test_non_match("\"123\"", "\"456\""); +} + +#[test] +fn test_swift_pattern() { + test_match("fun($A)", "fun(123)"); + test_match("foo($$$)", "foo(1, 2, 3)"); + test_match( + "foo() { $E in $F }", + "foo() { s in + s.a = 123 + }", + ); + test_non_match("foo($$$) { $E in $F }", "foo(1, 2, 3)"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Swift) +} + +const SOURCE: &str = r#" +foo(a: A, b: B, c: C) { s in + s.a = a + s.b = b +}"#; +const EXPECTED: &str = r#" +foo(b: B, a: A, c: C) { s in + s.a = a + s.b = b +}"#; + +#[test] +fn test_swift_replace() { + let ret = test_replace( + SOURCE, + "foo(a: $A, b: $B, c: $C) { $E in $$$F }", + "foo(b: $B, a: $A, c: $C) { $E in + $$$F}", + ); + assert_eq!(ret, EXPECTED); +} diff --git a/crates/language/src/yaml.rs b/crates/language/src/yaml.rs new file mode 100644 index 0000000..1c02b73 --- /dev/null +++ b/crates/language/src/yaml.rs @@ -0,0 +1,64 @@ +#![cfg(test)] + +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Yaml); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Yaml); +} + +#[test] +fn test_yaml_str() { + test_match("123", "123"); + test_non_match("123", "'123'"); + // the pattern below should not match but match now + // test_non_match("\"123\"", "\"456\""); +} + +#[test] +fn test_yaml_pattern() { + test_match("foo: $BAR", "foo: 123"); + test_match("foo: $$$", "foo: [1, 2, 3]"); + test_match( + "foo: $BAR", + "foo: + - a + ", + ); + test_non_match("foo: $BAR", "bar: bar"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Yaml) +} + +const SOURCE: &str = r#" +key: value +list: + - item1 + - item2 +"#; +const EXPECTED: &str = r#" +value: key +list: + - item1 + - item2 +"#; + +#[test] +fn test_yaml_replace() { + let ret = test_replace(SOURCE, "$KEY: value", "value: $KEY"); + assert_eq!(ret, EXPECTED); +} diff --git a/crates/rule-engine/Cargo.toml b/crates/rule-engine/Cargo.toml new file mode 100644 index 0000000..0520b11 --- /dev/null +++ b/crates/rule-engine/Cargo.toml @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 + +[package] +name = "thread-rule-engine" +description = "Rule-based scanning and transformation engine for Thread" +keywords = ["ast", "pattern", "codemod", "search", "rewrite", "rules"] +categories = ["command-line-utilities", "development-tools", "parsing"] +version = "0.1.0" +license = "AGPL-3.0-or-later AND MIT" +readme = "README.md" +authors = [ + "Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>", + "Knitli Inc ", + "Adam Poulemanos for Knitli ", +] +edition.workspace = true +repository.workspace = true +rust-version.workspace = true +include.workspace = true + +[features] +default = ["scanning", "fixing"] +# The 'scanning' feature enables finding code via rules +scanning = ["thread-ast-engine/matching"] +# The 'fixing' feature enables transforming code via rules +fixing = ["thread-ast-engine/matching"] + +[dependencies] +thread-ast-engine = { workspace = true, default-features = false } +thread-utils = { workspace = true, default-features = false, features = [ + "hashers", + "simd", +] } +bit-set.workspace = true +globset = "0.4.16" +regex.workspace = true +serde.workspace = true +serde_yaml = { workspace = true } +serde_json = { workspace = true } +thiserror.workspace = true +schemars.workspace = true + +[dev-dependencies] +ast-grep-core = { version = "0.38.7", features = ["tree-sitter"] } +ast-grep-config = { version = "0.38.7" } +ast-grep-language = { version = "0.38.7", features = ["builtin-parser"] } +criterion = { version = "0.6", features = ["html_reports"] } +thread-ast-engine = { workspace = true, features = ["parsing", "matching"] } +thread-language = { workspace = true, features = ["builtin-parser"] } +tree-sitter-javascript = "0.23.1" +tree-sitter-python = "0.23.6" +tree-sitter-rust = "0.24.0" +tree-sitter-typescript = "0.23.2" +tree-sitter.workspace = true + +[build-dependencies] +cc = "1.2.30" + +[[bench]] +name = "simple_benchmarks" +harness = false + +[[bench]] +name = "ast_grep_comparison" +harness = false + +[[bench]] +name = "rule_engine_benchmarks" +harness = false + +[[bench]] +name = "comparison_benchmarks" +harness = false diff --git a/crates/rule-engine/LICENSE-AGPL-3.0-or-later b/crates/rule-engine/LICENSE-AGPL-3.0-or-later new file mode 100644 index 0000000..1b62c0f --- /dev/null +++ b/crates/rule-engine/LICENSE-AGPL-3.0-or-later @@ -0,0 +1,662 @@ +# GNU AFFERO GENERAL PUBLIC LICENSE + + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/crates/rule-engine/LICENSE-MIT b/crates/rule-engine/LICENSE-MIT new file mode 100644 index 0000000..e3a8a65 --- /dev/null +++ b/crates/rule-engine/LICENSE-MIT @@ -0,0 +1,30 @@ + + +# MIT License + +Copyright (c) 2022 Herrington Darkholme + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +## This crate was created from forked code + +The above license and copyright applies to any code before the fork. Any changes since Ast-Grep v.0.38.7 are separately licensed. + +- See [LICENSE-AGPL-3.0-or-later](LICENSE-AGPL-3.0-or-later) +- For a description of the fork and what it includes, visit the [Thread repo](https://github.com/knitli/thread/tree/main/VENDORED.md) diff --git a/crates/rule-engine/VENDORED.md b/crates/rule-engine/VENDORED.md new file mode 100644 index 0000000..c9c1882 --- /dev/null +++ b/crates/rule-engine/VENDORED.md @@ -0,0 +1,69 @@ + +# Our Fork of Ast-Grep + +We forked most of the excellent [Ast-Grep][AG] codebase to create Thread. We originally tried using Ast-Grep as a library, but ran into limitations. The `core` module is intended to work as a library, but our plans for Thread required finer control over features at build-time. + +While Thread includes a CLI (and that’s likely your first encounter with it), our CLI is just the tip of the iceberg. The real focus is on service-oriented architecture for cloud and automation use. + +**We forked at Ast-Grep v0.38.7**. See [the original repo at that version](https://github.com/ast-grep/ast-grep/tree/0.38.7) for reference. + +--- + +## Why We Forked + +We tried multiple approaches to integrating Ast-Grep, from working with it as a library with a complex feature-gating scheme, to vendoring and dividing four crates into granular components (14 crates!). That latter one was overkill, and was probably us jumping the shark early :shark:⛷️. + +We settled on a middle ground. We forked `core`, `config`, and `language`, and will continue to use `dynamic` and others as dependencies as needed. We also did our best to make as few changes as possible -- mostly focusing on separating features with gating, and abstracting some core elements to better fit our service oriented approach. + +Our changes are mostly structuralβ€”we needed finer-grained control over organization, minimal cold start times, and clean separation between services. + +### Where the Fork Lives + +* [`thread-ast-engine`](https://github.com/knitli/thread/tree/main/crates/ast-engine): Fork of `ast-grep-core`. We separated its features into `parsing`, and `matching` features so that we could better control their usage in our services. +* [`thread-rule-engine`](https://github.com/knitli/thread/tree/main/crates/rule-engine): Fork of `ast-grep-config`. We isolated rule management, parsing, and validation functionality, and made changes to separate the logic from the assumption of a config file, allowing us more flexibility to implement rule-based operations in different environments. +* [`thread-language`](https://github.com/knitli/thread/tree/main/crates/language): We changed very little here, we needed the languages publicly exposed to feature gate each one separately. We also plan to add different languages more suitable for our needs. + +We admittedly didn't have this conversation with the Ast-Grep contributors, which we will once the dust settles a bit and we can divert attention from delivering an MVP. Our changes are intentionally reversible, and we'd like to find a way to return to using the core crates and contributing there (but that may not be realistic with different goals between the projects). + +### Licensing + +**Original Ast-Grep code** is MIT-licensed (see the `LICENSE-MIT` file in each crate). +**Our changes and anything Thread-specific** are licensed under the [AGPL v3.0](https://github.com/knitli/thread/blob/main/LICENSE.md). + +* If you want pure MIT, use Ast-Grep directly, or cherry-pick the original code. The relationships are: + + * `thread-ast-engine` β†’ `ast-grep-core` + * `thread-rule-engine` β†’ `ast-grep-config` + * `thread-language` β†’ `ast-grep-language` + +* Using our fork means AGPL; sharing required. If you want to treat your code based on Thread like :ring: Gollum :ring:, [contact us for a commercial license](mailto:licensing@knit.li), and you can keep your *precious*. +* Our project meets the [Reuse Specification](https://reuse.software/). Every file in the project is marked in its header with license information, or with an accompanying `.license` file. Code from `Ast-Grep` will be marked `AGPL-3.0-or-later AND MIT` (this isn't an `or` where you can choose between them). + +> Technically, you *can* only use the unchanged Ast-Grep bits under MITβ€”but you’d need to do the diffing yourself, and you’ll miss out on Thread-specific improvements (not sure why you would do that instead of just forking Ast-Grep...). AGPL means our changes (and anyone else’s) will always be open source. + +--- + +## We're Going to Contribute to Ast-Grep, too + +Most of Thread's Ast-Grep codebase is unchanged for now, and where we identify bugs or areas for improvement, we'll submit them upstream under Ast-Grep's MIT license. Similarly, we'll monitor changes to Ast-Grep and incorporate fixes and improvements into Thread. + +## So Are You Going to Try to Keep the Changes Minimal Forever? + +Probably not. Our first commitment is making Thread as great as we can, even if we diverge from Ast-Grep. We'd love to see the projects grow together, but they may not always align perfectly. Ast-Grep has its own roadmap and priorities, and we have ours. Thread is not Ast-Grep; it is just built on top of it. + +## Why Ast-Grep? + +Ast-Grep makes [Tree-sitter][ts] actually usable for code search/replace. We built on it because it solved the hard partsβ€”especially CST-wranglingβ€”so we could focus on new stuff, not rebuilding the same wheel.[^1] + +> For reasons lost to time, everyone in this ecosystem calls their [CSTs][csts] β€œASTs.” Maybe it’s like the first rule of Tree-sitter Club: we all pretend they’re ASTs :fist:. + +[^1]: If our initial attempts at integrating Ast-Grep represent how we would reinvent the wheel, we probably would have made our version square and in 15 parts, assembly required. + +[AG]: https://github.com/ast-grep/ast-grep +[ts]: https://github.com/tree-sitter/tree-sitter +[csts]: https://en.wikipedia.org/wiki/Concrete_syntax_tree diff --git a/crates/rule-engine/benches/README.md b/crates/rule-engine/benches/README.md new file mode 100644 index 0000000..501d5df --- /dev/null +++ b/crates/rule-engine/benches/README.md @@ -0,0 +1,86 @@ + + +# Rule Engine Benchmarks + +This directory contains performance benchmarks for the `thread-rule-engine` crate. + +## Working Benchmarks + +### 1. `simple_benchmarks.rs` + +- **Status**: βœ… Working +- **Purpose**: Core thread-rule-engine functionality benchmarks +- **Benchmarks**: + - `bench_rule_parsing`: Simple and complex rule parsing from YAML + - `bench_rule_compilation`: Multiple rule compilation performance + - `bench_rule_transformation`: Rule transformation parsing + - `bench_yaml_deserialization`: Large YAML rule deserialization + +### 2. `ast_grep_comparison.rs` + +- **Status**: βœ… Working +- **Purpose**: Direct performance comparison between thread-rule-engine and ast-grep-config +- **Benchmarks**: + - `bench_rule_parsing_comparison`: Side-by-side parsing performance + - `bench_rule_matching_comparison`: Pattern matching speed comparison + - `bench_combined_scan_comparison`: Multi-rule scanning performance + - `bench_memory_usage_comparison`: Memory allocation patterns + +## Test Data + +The benchmarks use realistic code samples: + +- `test_data/sample_typescript.ts` - TypeScript code with classes, functions, async/await +- `test_data/sample_javascript.js` - JavaScript with ES6+ features +- `test_data/sample_python.py` - Python with async functions and decorators +- `test_data/sample_rust.rs` - Rust with structs, traits, and macros + +## Running Benchmarks + +```bash +# Run all benchmarks +cargo bench + +# Run specific benchmark +cargo bench --bench simple_benchmarks +cargo bench --bench ast_grep_comparison + +# Generate HTML reports +cargo bench +open target/criterion/report/index.html +``` + +## Key Features Benchmarked + +1. **Rule Parsing**: YAML to internal rule representation +2. **Pattern Compilation**: Converting patterns to matchers +3. **Memory Usage**: Allocation patterns during rule creation +4. **Transformation Processing**: Rule transformation parsing +5. **Deserialization**: YAML parsing performance + +## Performance Expectations + +- **Simple rules**: Should parse in < 100ΞΌs +- **Complex rules**: Should parse in < 1ms +- **Memory overhead**: Should be comparable to ast-grep-config +- **Compilation**: Should scale linearly with rule complexity + +## Architecture + +The benchmarks are designed to: + +- Measure only the thread-rule-engine operations (not AST matching) +- Use realistic rule patterns from actual use cases +- Focus on serialization/deserialization performance +- Compare directly with ast-grep-config where possible + +## Limitations + +- Some benchmarks focus only on parsing/compilation, not pattern matching +- AST matching benchmarks require complex setup and may not be representative +- Memory benchmarks measure allocation patterns, not peak usage diff --git a/crates/rule-engine/benches/ast_grep_comparison.rs b/crates/rule-engine/benches/ast_grep_comparison.rs new file mode 100644 index 0000000..e5014e9 --- /dev/null +++ b/crates/rule-engine/benches/ast_grep_comparison.rs @@ -0,0 +1,280 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; + +// Thread imports +use thread_language::{LanguageExt as ThreadLanguageExt, SupportLang as ThreadSupportLang}; +use thread_rule_engine::{ + CombinedScan as ThreadCombinedScan, GlobalRules as ThreadGlobalRules, + from_yaml_string as thread_from_yaml_string, +}; + +// AstGrep imports +use ast_grep_config::{ + CombinedScan as AstGrepCombinedScan, GlobalRules as AstGrepGlobalRules, + from_yaml_string as ast_grep_from_yaml_string, +}; +use ast_grep_language::{LanguageExt as AstGrepLanguageExt, SupportLang as AstGrepSupportLang}; + +struct ComparisonData { + rules: Vec<&'static str>, + test_code: &'static str, +} + +impl ComparisonData { + fn new() -> Self { + Self { + rules: vec![ + r#" +id: simple-console-log +message: found console.log +severity: info +language: TypeScript +rule: + pattern: console.log($A) +"#, + r#" +id: function-declaration +message: found function declaration +severity: info +language: TypeScript +rule: + pattern: function $F($$$) { $$$ } +"#, /* + r#" +id: class-with-constructor +message: found class with constructor +severity: info +language: TypeScript +rule: + all: + - pattern: class $C { $$$ } + - has: + pattern: constructor_type($$$) { $$$ } + stopBy: end +"#,*/ + r#" +id: import-statement +message: found import statement +severity: info +language: TypeScript +rule: + any: + - pattern: import $M from '$P' + - pattern: import { $$$ } from '$P' + - pattern: import * as $M from '$P' +"#, + r#" +id: async-function-with-await +message: found async function with await +severity: info +language: TypeScript +rule: + all: + - pattern: async function $F($$$) { $$$ } + - has: + pattern: await $E + stopBy: end +"#, + ], + test_code: include_str!("../test_data/sample_typescript.ts"), + } + } +} + +fn bench_rule_parsing_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("rule_parsing_comparison"); + + for (rule_idx, rule_yaml) in data.rules.iter().enumerate() { + // Benchmark thread-rule-engine + group.bench_with_input( + BenchmarkId::new("thread_rule_engine", rule_idx), + rule_yaml, + |b, yaml| { + let globals = ThreadGlobalRules::default(); + b.iter(|| { + let _rules = + thread_from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + }); + }, + ); + + // Benchmark ast-grep-config + group.bench_with_input( + BenchmarkId::new("ast_grep_config", rule_idx), + rule_yaml, + |b, yaml| { + let globals = AstGrepGlobalRules::default(); + b.iter(|| { + let _rules = + ast_grep_from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + }); + }, + ); + } + + group.finish(); +} + +fn bench_rule_matching_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("rule_matching_comparison"); + + let test_rule = r#" +id: test-console-log +message: found console.log +severity: info +language: TypeScript +rule: + pattern: console.log($A) +"#; + + // Prepare rules for both libraries + let thread_globals = ThreadGlobalRules::default(); + let ast_grep_globals = AstGrepGlobalRules::default(); + + let thread_rules = thread_from_yaml_string::(test_rule, &thread_globals) + .expect("should parse"); + let ast_grep_rules = + ast_grep_from_yaml_string::(test_rule, &ast_grep_globals) + .expect("should parse"); + + let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); + let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); + + // Benchmark thread-rule-engine + group.bench_function("thread_rule_engine", |b| { + b.iter(|| { + let matches: Vec<_> = thread_grep + .root() + .find_all(&thread_rules[0].matcher) + .collect(); + black_box(matches); + }); + }); + + // Benchmark ast-grep-config + group.bench_function("ast_grep_config", |b| { + b.iter(|| { + let matches: Vec<_> = ast_grep_grep + .root() + .find_all(&ast_grep_rules[0].matcher) + .collect(); + black_box(matches); + }); + }); + + group.finish(); +} + +fn bench_combined_scan_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("combined_scan_comparison"); + + // Prepare rules for both libraries + let thread_globals = ThreadGlobalRules::default(); + let ast_grep_globals = AstGrepGlobalRules::default(); + + let mut thread_rules = Vec::new(); + let mut ast_grep_rules = Vec::new(); + + for rule_yaml in &data.rules { + let thread_rule = thread_from_yaml_string::(rule_yaml, &thread_globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + let ast_grep_rule = + ast_grep_from_yaml_string::(rule_yaml, &ast_grep_globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + + thread_rules.push(thread_rule); + ast_grep_rules.push(ast_grep_rule); + } + + // Create combined scanners + let thread_rule_refs: Vec<_> = thread_rules.iter().collect(); + let ast_grep_rule_refs: Vec<_> = ast_grep_rules.iter().collect(); + + let thread_combined_scan = ThreadCombinedScan::new(thread_rule_refs); + let ast_grep_combined_scan = AstGrepCombinedScan::new(ast_grep_rule_refs); + + let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); + let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); + + // Benchmark thread-rule-engine + group.bench_function("thread_rule_engine", |b| { + b.iter(|| { + let result = thread_combined_scan.scan(black_box(&thread_grep), false); + black_box(result); + }); + }); + + // Benchmark ast-grep-config + group.bench_function("ast_grep_config", |b| { + b.iter(|| { + let result = ast_grep_combined_scan.scan(black_box(&ast_grep_grep), false); + black_box(result); + }); + }); + + group.finish(); +} + +fn bench_memory_usage_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("memory_usage_comparison"); + + // Benchmark memory allocation during rule creation + group.bench_function("thread_rule_engine_memory", |b| { + let globals = ThreadGlobalRules::default(); + b.iter(|| { + let mut rules = Vec::new(); + for rule_yaml in &data.rules { + let rule = thread_from_yaml_string::(rule_yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + rules.push(rule); + } + black_box(rules); + }); + }); + + group.bench_function("ast_grep_config_memory", |b| { + let globals = AstGrepGlobalRules::default(); + b.iter(|| { + let mut rules = Vec::new(); + for rule_yaml in &data.rules { + let rule = ast_grep_from_yaml_string::(rule_yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + rules.push(rule); + } + black_box(rules); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_rule_parsing_comparison, + bench_rule_matching_comparison, + bench_combined_scan_comparison, + bench_memory_usage_comparison +); +criterion_main!(benches); diff --git a/crates/rule-engine/benches/comparison_benchmarks.rs b/crates/rule-engine/benches/comparison_benchmarks.rs new file mode 100644 index 0000000..91ecb07 --- /dev/null +++ b/crates/rule-engine/benches/comparison_benchmarks.rs @@ -0,0 +1,266 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; + +use thread_language::{LanguageExt as ThreadLanguageExt, SupportLang as ThreadSupportLang}; + +use ast_grep_language::{LanguageExt as AstGrepLanguageExt, SupportLang as AstGrepSupportLang}; + +use thread_rule_engine::{ + CombinedScan as ThreadCombinedScan, GlobalRules as ThreadGlobalRules, + from_yaml_string as thread_from_yaml_string, +}; + +use ast_grep_config::{ + CombinedScan as AstGrepCombinedScan, GlobalRules as AstGrepGlobalRules, + from_yaml_string as ast_grep_from_yaml_string, +}; + +struct ComparisonData { + rules: Vec<&'static str>, + test_code: &'static str, +} + +impl ComparisonData { + fn new() -> Self { + Self { + rules: vec![ + r#" +id: simple-console-log +message: found console.log +severity: info +language: Tsx +rule: + pattern: console.log($A) +"#, + r#" +id: function-declaration +message: found function declaration +severity: info +language: Tsx +rule: + pattern: function $F($$$) { $$$ } +"#, + r#" +id: class-with-constructor +message: found class with constructor +severity: info +language: Tsx +rule: + all: + - pattern: class $C { $$$ } + - has: + pattern: constructor($$$) { $$$ } + stopBy: end +"#, + ], + test_code: include_str!("../test_data/sample_typescript.ts"), + } + } +} + +fn bench_rule_parsing_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("rule_parsing_comparison"); + + for (rule_idx, rule_yaml) in data.rules.iter().enumerate() { + // Benchmark thread-rule-engine + group.bench_with_input( + BenchmarkId::new("thread_rule_engine", rule_idx), + rule_yaml, + |b, yaml| { + let globals = ThreadGlobalRules::default(); + b.iter(|| { + let _rules = + thread_from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + }); + }, + ); + + // Benchmark ast-grep-config + group.bench_with_input( + BenchmarkId::new("ast_grep_config", rule_idx), + rule_yaml, + |b, yaml| { + let globals = AstGrepGlobalRules::default(); + b.iter(|| { + let _rules = ast_grep_from_yaml_string::( + black_box(yaml), + &globals, + ) + .expect("should parse"); + }); + }, + ); + } + + group.finish(); +} + +fn bench_rule_matching_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("rule_matching_comparison"); + + let test_rule = r#" +id: test-console-log +message: found console.log +severity: info +language: Tsx +rule: + pattern: console.log($A) +"#; + + // Prepare rules for both libraries + let thread_globals = ThreadGlobalRules::default(); + let ast_grep_globals = AstGrepGlobalRules::default(); + + let thread_rule = thread_from_yaml_string::(test_rule, &thread_globals) + .expect("should parse")[0] + .clone(); + let ast_grep_rule_config = + ast_grep_from_yaml_string::(test_rule, &ast_grep_globals) + .expect("should parse")[0] + .clone(); + + // Convert the config to a RuleCore to get the matcher + let ast_grep_rule = + ast_grep_config::RuleConfig::try_from(ast_grep_rule_config, &ast_grep_globals) + .expect("should convert to RuleCore"); + + let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); + let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); + + // Benchmark thread-rule-engine + group.bench_function("thread_rule_engine", |b| { + b.iter(|| { + let matches: Vec<_> = Vec::from_iter(thread_grep.root().find_all(&thread_rule.matcher)); + black_box(matches); + }); + }); + + // Benchmark ast-grep-config + group.bench_function("ast_grep_config", |b| { + b.iter(|| { + // Use the same matcher as in thread_rule_engine + let matches: Vec<_> = + Vec::from_iter(ast_grep_grep.root().find_all(&ast_grep_rule.matcher)); + black_box(matches); + }); + }); + + group.finish(); +} + +fn bench_combined_scan_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("combined_scan_comparison"); + + // Prepare rules for both libraries + let thread_globals = ThreadGlobalRules::default(); + let ast_grep_globals = AstGrepGlobalRules::default(); + + let mut thread_rules = Vec::new(); + let mut ast_grep_rules = Vec::new(); + + for rule_yaml in &data.rules { + let thread_rule = thread_from_yaml_string::(rule_yaml, &thread_globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + let ast_grep_rule = ast_grep_from_yaml_string::( + rule_yaml, + &ast_grep_globals, + ) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + + thread_rules.push(thread_rule); + ast_grep_rules.push(ast_grep_rule); + } + + // Create combined scanners + let thread_rule_refs: Vec<_> = thread_rules.iter().collect(); + let ast_grep_rule_refs: Vec<_> = ast_grep_rules.iter().collect(); + + let thread_combined_scan = ThreadCombinedScan::new(thread_rule_refs); + let ast_grep_combined_scan = AstGrepCombinedScan::new(ast_grep_rule_refs); + + let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); + let ast_grep_grep = ast_grep_language::SupportLang::TypeScript.ast_grep(data.test_code); + + // Benchmark thread-rule-engine + group.bench_function("thread_rule_engine", |b| { + b.iter(|| { + let result = thread_combined_scan.scan(black_box(&thread_grep), false); + black_box(result); + }); + }); + + // Benchmark ast-grep-config + group.bench_function("ast_grep_config", |b| { + b.iter(|| { + let result = ast_grep_combined_scan.scan(black_box(&ast_grep_grep), false); + black_box(result); + }); + }); + + group.finish(); +} + +fn bench_memory_usage_comparison(c: &mut Criterion) { + let data = ComparisonData::new(); + let mut group = c.benchmark_group("memory_usage_comparison"); + + // Benchmark memory allocation during rule creation + group.bench_function("thread_rule_engine_memory", |b| { + let globals = ThreadGlobalRules::default(); + b.iter(|| { + let mut rules = Vec::new(); + for rule_yaml in &data.rules { + let rule = + thread_from_yaml_string::(rule_yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + rules.push(rule); + } + black_box(rules); + }); + }); + + group.bench_function("ast_grep_config_memory", |b| { + let globals = AstGrepGlobalRules::default(); + b.iter(|| { + let mut rules = Vec::new(); + for rule_yaml in &data.rules { + let rule = + ast_grep_from_yaml_string::(rule_yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + rules.push(rule); + } + black_box(rules); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_rule_parsing_comparison, + bench_rule_matching_comparison, + bench_combined_scan_comparison, + bench_memory_usage_comparison +); +criterion_main!(benches); diff --git a/crates/rule-engine/benches/rule.yml b/crates/rule-engine/benches/rule.yml new file mode 100644 index 0000000..1e4602c --- /dev/null +++ b/crates/rule-engine/benches/rule.yml @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + +id: class-with-constructor +message: found class with constructor +severity: info +language: TypeScript +rule: + all: + - pattern: class $C { $$$ } + - has: + pattern: constructor_type($$$) { $$$ } + stopBy: end diff --git a/crates/rule-engine/benches/rule_engine_benchmarks.rs b/crates/rule-engine/benches/rule_engine_benchmarks.rs new file mode 100644 index 0000000..7bed1ba --- /dev/null +++ b/crates/rule-engine/benches/rule_engine_benchmarks.rs @@ -0,0 +1,327 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; + +use thread_language::{LanguageExt, SupportLang}; + +use thread_rule_engine::{CombinedScan, GlobalRules, RuleCollection, from_yaml_string}; + +pub type BenchLanguage = SupportLang; + +// Benchmark data structures +struct BenchmarkData { + simple_patterns: Vec<&'static str>, + complex_patterns: Vec<&'static str>, + code_samples: Vec<(&'static str, &'static str)>, // (language, code) +} + +impl BenchmarkData { + fn new() -> Self { + Self { + simple_patterns: vec![ + "console.log($A)", + "function $F() { $$$ }", + "let $VAR = $VALUE", + "import $MODULE from '$PATH'", + "class $CLASS { $$$ }", + ], + complex_patterns: vec![ + r#" +id: complex-pattern-1 +language: TypeScript +rule: + all: + - pattern: console.log($A) + - inside: + pattern: function $F() { $$$ } + stopBy: end +"#, + r#" +id: complex-pattern-2 +language: TypeScript +rule: + any: + - pattern: let $VAR = $VALUE + - pattern: const $VAR = $VALUE + - pattern: var $VAR = $VALUE + constraints: + VAR: + regex: ^[a-z]+$ +"#, + r#" +id: complex-pattern-3 +language: TypeScript +rule: + all: + - pattern: class $CLASS { $$$ } + - has: + pattern: constructor($$$) { $$$ } + stopBy: end + - has: + pattern: $METHOD($$$) { $$$ } + stopBy: end + constraints: + CLASS: + regex: ^[A-Z][a-zA-Z0-9]*$ +"#, + ], + code_samples: vec![ + ( + "typescript", + include_str!("../test_data/sample_typescript.ts"), + ), + ( + "javascript", + include_str!("../test_data/sample_javascript.js"), + ), + ("python", include_str!("../test_data/sample_python.py")), + ("rust", include_str!("../test_data/sample_rust.rs")), + ], + } + } +} + +fn bench_rule_parsing(c: &mut Criterion) { + let data = BenchmarkData::new(); + let globals = GlobalRules::default(); + + let mut group = c.benchmark_group("rule_parsing"); + + // Benchmark simple rule parsing + for (i, pattern) in data.simple_patterns.iter().enumerate() { + let yaml = format!( + r#" +id: test-rule-{} +message: test rule +severity: info +language: TypeScript +rule: + pattern: {} +"#, + i, pattern + ); + + group.bench_with_input(BenchmarkId::new("simple_rule", i), &yaml, |b, yaml| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + }); + }); + } + + // Benchmark complex rule parsing + for (i, pattern) in data.complex_patterns.iter().enumerate() { + group.bench_with_input( + BenchmarkId::new("complex_rule", i), + pattern, + |b, pattern| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(pattern), &globals) + .expect("should parse"); + }); + }, + ); + } + + group.finish(); +} + +fn bench_rule_matching(c: &mut Criterion) { + let data = BenchmarkData::new(); + let globals = GlobalRules::default(); + + let mut group = c.benchmark_group("rule_matching"); + + // Create test rules + let simple_rule_yaml = r#" +id: test-console-log +message: found console.log +severity: info +language: TypeScript +rule: + pattern: console.log($A) +"#; + + let complex_rule_yaml = r#" +id: test-function-with-console +message: found function with console.log +severity: info +language: TypeScript +rule: + all: + - pattern: console.log($A) + - inside: + pattern: function $F() { $$$ } + stopBy: end +"#; + + let simple_rules = + from_yaml_string::(simple_rule_yaml, &globals).expect("should parse"); + let complex_rules = + from_yaml_string::(complex_rule_yaml, &globals).expect("should parse"); + + // Test against sample code + for (lang_name, code) in &data.code_samples { + if *lang_name == "typescript" { + let grep = BenchLanguage::TypeScript.ast_grep(code); + + group.bench_with_input( + BenchmarkId::new("simple_match", lang_name), + &(grep.clone(), &simple_rules), + |b, (grep, rules)| { + b.iter(|| { + let root = grep.root(); + let matches: Vec<_> = rules + .iter() + .flat_map(|rule| root.find_all(&rule.matcher)) + .collect(); + black_box(matches); + }); + }, + ); + group.bench_with_input( + BenchmarkId::new("complex_match", lang_name), + &(grep.clone(), &complex_rules), + |b, (grep, rules)| { + b.iter(|| { + let root = grep.root(); + let matches: Vec<_> = Vec::from_iter( + rules.iter().flat_map(|rule| root.find_all(&rule.matcher)), + ); + black_box(matches); + }); + }, + ); + } + } + + group.finish(); +} + +fn bench_rule_collection(c: &mut Criterion) { + let data = BenchmarkData::new(); + let globals = GlobalRules::default(); + + let mut group = c.benchmark_group("rule_collection"); + + // Create multiple rules + let mut rules = Vec::new(); + for (i, pattern) in data.simple_patterns.iter().enumerate() { + let yaml = format!( + r#" +id: test-rule-{} +message: test rule {} +severity: info +language: TypeScript +rule: + pattern: {} +"#, + i, i, pattern + ); + + let rule = from_yaml_string::(&yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); + rules.push(rule); + } + // Benchmark collection creation + group.bench_function("collection_creation", |b| { + b.iter(|| { + let _collection = RuleCollection::try_new(black_box(rules.clone())) + .expect("should create collection"); + }); + }); + + // Benchmark combined scan + + let rule_refs: Vec<_> = rules.iter().collect(); + let combined_scan = CombinedScan::new(rule_refs); + + for (lang_name, code) in &data.code_samples { + if *lang_name == "typescript" { + let grep = BenchLanguage::TypeScript.ast_grep(code); + + group.bench_with_input( + BenchmarkId::new("combined_scan", lang_name), + &(grep.clone(), &combined_scan), + |b, (grep, scan)| { + b.iter(|| { + let result = scan.scan(black_box(grep), false); + black_box(result); + }); + }, + ); + } + } + + group.finish(); +} + +fn bench_rule_transformation(c: &mut Criterion) { + let globals = GlobalRules::default(); + let mut group = c.benchmark_group("rule_transformation"); + + let transform_rule_yaml = r#" +id: test-transform +message: test transformation +severity: info +language: TypeScript +rule: + pattern: console.log($A) +transform: + UPPER: + uppercase: + source: $A + LOWER: + lowercase: + source: $A + SUBSTRING: + substring: + source: $A + startChar: 1 + endChar: -1 +"#; + + let rule = from_yaml_string::(transform_rule_yaml, &globals) + .expect("should parse")[0] + .clone(); + + let test_code = r#" +function test() { + console.log("Hello World"); + console.log('test string'); + console.log(`template ${variable}`); +} +"#; + + let grep = BenchLanguage::TypeScript.ast_grep(test_code); + + group.bench_function("transformation", |b| { + b.iter(|| { + let matches: Vec<_> = grep.root().find_all(&rule.matcher).collect(); + for node_match in matches { + let env = node_match.get_env(); + // Access transformed variables + let _ = env.get_transformed("UPPER"); + let _ = env.get_transformed("LOWER"); + let _ = env.get_transformed("SUBSTRING"); + } + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_rule_parsing, + bench_rule_matching, + bench_rule_collection, + bench_rule_transformation +); +criterion_main!(benches); diff --git a/crates/rule-engine/benches/simple_benchmarks.rs b/crates/rule-engine/benches/simple_benchmarks.rs new file mode 100644 index 0000000..f163ed1 --- /dev/null +++ b/crates/rule-engine/benches/simple_benchmarks.rs @@ -0,0 +1,251 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; +use thread_language::SupportLang; + +use thread_rule_engine::{GlobalRules, from_yaml_string}; + +// Benchmark data +struct BenchmarkData { + simple_patterns: Vec<&'static str>, + complex_rules: Vec<&'static str>, + test_code: &'static str, +} + +impl BenchmarkData { + fn new() -> Self { + Self { + simple_patterns: vec![ + "console.log($A)", + "function $F() { $$$ }", + "let $VAR = $VALUE", + "import $MODULE from '$PATH'", + "class $CLASS { $$$ }", + ], + complex_rules: vec![ + r#" +id: complex-pattern-1 +language: TypeScript +rule: + all: + - pattern: console.log($A) + - inside: + pattern: function $F() { $$$ } + stopBy: end +"#, + r#" +id: complex-pattern-2 +language: TypeScript +rule: + any: + - pattern: let $VAR = $VALUE + - pattern: const $VAR = $VALUE + - pattern: var $VAR = $VALUE +"#, + ], + test_code: include_str!("../test_data/sample_typescript.ts"), + } + } +} + +fn bench_rule_parsing(c: &mut Criterion) { + let data = BenchmarkData::new(); + let globals = GlobalRules::default(); + let mut group = c.benchmark_group("rule_parsing"); + + // Benchmark simple rule parsing + for (i, pattern) in data.simple_patterns.iter().enumerate() { + let yaml = format!( + r#" +id: test-rule-{} +message: test rule +severity: info +language: TypeScript +rule: + pattern: {} +"#, + i, pattern + ); + + group.bench_with_input(BenchmarkId::new("simple_rule", i), &yaml, |b, yaml| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + }); + }); + } + + // Benchmark complex rule parsing + for (i, pattern) in data.complex_rules.iter().enumerate() { + group.bench_with_input( + BenchmarkId::new("complex_rule", i), + pattern, + |b, pattern| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(pattern), &globals) + .expect("should parse"); + }); + }, + ); + } + + group.finish(); +} + +fn bench_rule_compilation(c: &mut Criterion) { + let data = BenchmarkData::new(); + let globals = GlobalRules::default(); + let mut group = c.benchmark_group("rule_compilation"); + + // Create multiple rules + let mut rule_yamls = Vec::new(); + for (i, pattern) in data.simple_patterns.iter().enumerate() { + let yaml = format!( + r#" +id: test-rule-{} +message: test rule {} +severity: info +language: TypeScript +rule: + pattern: {} +"#, + i, i, pattern + ); + rule_yamls.push(yaml); + } + + // Benchmark rule compilation + group.bench_function("multiple_rules", |b| { + b.iter(|| { + let mut all_rules = Vec::new(); + for yaml in &rule_yamls { + let rules = from_yaml_string::(black_box(yaml), &globals) + .expect("should parse"); + all_rules.extend(rules); + } + black_box(all_rules); + }); + }); + + group.finish(); +} + +fn bench_rule_transformation(c: &mut Criterion) { + let globals = GlobalRules::default(); + let mut group = c.benchmark_group("rule_transformation"); + + let transform_rule_yaml = r#" +id: test-transform +message: test transformation +severity: info +language: TypeScript +rule: + pattern: console.log($A) +transform: + transformed: + substring: + source: $A + startChar: 1 + endChar: -1 +"#; + + group.bench_function("transformation_parsing", |b| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(transform_rule_yaml), &globals) + .expect("should parse"); + }); + }); + + group.finish(); +} + +fn bench_yaml_deserialization(c: &mut Criterion) { + let globals = GlobalRules::default(); + let mut group = c.benchmark_group("yaml_deserialization"); + + let large_rule_yaml = r#" +id: large-rule +message: large rule with many patterns +severity: info +language: TypeScript +rule: + any: + - pattern: console.log($A) + - pattern: console.warn($A) + - pattern: console.error($A) + - pattern: console.debug($A) + - pattern: console.info($A) + - pattern: console.trace($A) + - pattern: console.table($A) + - pattern: console.group($A) + - pattern: console.groupEnd($A) + - pattern: console.time($A) + - pattern: console.timeEnd($A) + - pattern: console.count($A) + - pattern: console.countReset($A) + - pattern: console.clear($A) + - pattern: console.assert($A, $B) + - pattern: console.dir($A) + - pattern: console.dirxml($A) + all: + - pattern: console.log($A) + - inside: + pattern: function $B() {$$$} + stopBy: end +constraints: + B: + regex: test + A: + regex: ^[a-zA-Z_][a-zA-Z0-9_]*$ + +transform: + substring: + source: $A + startChar: 1 + endChar: -1 + convert: + toCase: lowerCase + source: $A + substring: + source: $A + startChar: 1 + endChar: -1 + convert: + toCase: upperCase + source: $A + substring: + source: $A + startChar: 1 + endChar: -1 + convert: + toCase: camelCase + source: $A + convert: + toCase: camelCase + source: $A + convert: + toCase: snakeCase + source: $A +"#; + + group.bench_function("large_rule_parsing", |b| { + b.iter(|| { + let _rule = from_yaml_string::(black_box(large_rule_yaml), &globals) + .expect("should parse"); + }); + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_rule_parsing, + bench_rule_compilation, + bench_rule_transformation, + bench_yaml_deserialization +); +criterion_main!(benches); diff --git a/crates/rule-engine/serialization_analysis/README_SERIALIZATION_ANALYSIS.md b/crates/rule-engine/serialization_analysis/README_SERIALIZATION_ANALYSIS.md new file mode 100644 index 0000000..81a7168 --- /dev/null +++ b/crates/rule-engine/serialization_analysis/README_SERIALIZATION_ANALYSIS.md @@ -0,0 +1,197 @@ + + +# Serialization Dependency Analysis Tools + +This directory contains a comprehensive analysis of serialization dependencies in the `thread-rule-engine` crate and tools to help with the separation effort. + +## πŸ“ Files Created + +### πŸ” Analysis Tools + +1. **[`serialization_analysis.yml`](./serialization_analysis.yml)** + - AST-Grep rules to systematically find serialization dependencies + - 11 different rule patterns covering all aspects of serialization usage + - Can be used with the `ast-grep` CLI tool for detailed code analysis + +2. **[`analyze_serialization.rs`](./analyze_serialization.rs)** + - Rust tool for comprehensive dependency analysis + - Categorizes dependencies by type and severity + - Generates separation strategy recommendations + - Can be compiled and run for detailed reporting + +### πŸ“Š Reports + +3. **[`SERIALIZATION_ANALYSIS_REPORT.md`](./SERIALIZATION_ANALYSIS_REPORT.md)** + - **Comprehensive analysis report** with findings and recommendations + - Identifies high-impact files and separation challenges + - Provides 4-phase separation strategy with effort estimates + - **START HERE** for understanding the scope of the problem + +### πŸ› οΈ Helper Tools + +4. **[`separation_helper.sh`](./separation_helper.sh)** βœ… *executable* + - Interactive shell script with multiple analysis functions + - Validates current state and identifies feature gate candidates + - Creates patches for feature gating + - Generates separation roadmaps and checklists + +## πŸš€ Quick Start + +### 1. Read the Analysis Report + +```bash +# Start with the comprehensive analysis +cat SERIALIZATION_ANALYSIS_REPORT.md +``` + +### 2. Run the Interactive Helper + +```bash +# Run the helper script for guided analysis +./separation_helper.sh +``` + +### 3. Use AST-Grep for Detailed Analysis + +```bash +# Install ast-grep if not available +npm install -g @ast-grep/cli + +# Run specific serialization analysis +ast-grep --config serialization_analysis.yml scan ../src/ +``` + +### 4. Compile and Run the Analysis Tool + +```bash +# Compile the Rust analysis tool +rustc --edition 2021 analyze_serialization.rs -o analyze_serialization + +# Run the analysis +./analyze_serialization +``` + +## πŸ” Key Findings Summary + +Based on the comprehensive analysis: + +### πŸ“ˆ Impact Assessment + +- **Serialization density**: 70-80% of the codebase +- **High-impact files**: 8+ files with deep integration +- **Separation difficulty**: **HIGH** - requires architectural changes + +### 🎯 Root Cause + +The crate is fundamentally architected around **YAML/JSON configuration input**, making serialization central to its operation rather than optional. + +### πŸ“‹ Critical Files + +1. `src/lib.rs` - Public API is serialization-based +2. `src/rule_config.rs` - Entire config system assumes serialization +3. `src/rule_core.rs` - Core logic mixed with serialization +4. `src/rule/mod.rs` - Every rule type has serialization derives + +## πŸ—ΊοΈ Separation Strategy + +### Phase 1: Feature Gating (1-2 weeks) + +- βœ… **Low risk** - Feature gate files with minimal serialization +- Target files: `combined.rs`, `label.rs`, `check_var.rs` + +### Phase 2: Abstraction Layer (3-4 weeks) + +- ⚠️ **Medium risk** - Create trait abstractions for core functionality +- Design `RuleMatcher` and `RuleBuilder` traits + +### Phase 3: Core Logic Extraction (6-8 weeks) + +- ❌ **High risk** - Extract matching logic from serialization concerns +- Create non-serializable rule representations + +### Phase 4: Alternative APIs (4-6 weeks) + +- ⚠️ **Medium risk** - Provide programmatic rule construction +- Implement builder patterns for direct API usage + +## πŸ› οΈ Using the Tools + +### AST-Grep Analysis + +```bash +# Find all serde derives +ast-grep --lang rust --pattern '#[derive($$$)]' ../src/ | grep -E 'Serialize|Deserialize' + +# Find serialization function calls +ast-grep --lang rust --pattern 'deserialize($$$)' ../src/ +ast-grep --lang rust --pattern 'serialize($$$)' ../src/ + +# Find DeserializeEnv usage +ast-grep --lang rust --pattern 'DeserializeEnv' ../src/ +``` + +### Helper Script Functions + +The interactive helper provides: + +- Current state validation +- Serialization usage analysis +- Feature gate candidate identification +- Abstraction point suggestions +- Patch file generation +- Separation roadmap creation + +### Analysis Tool Features + +The Rust analysis tool categorizes dependencies by: + +- **Dependency type** (SerdeDerive, DeserializationCall, etc.) +- **Category** (Core Serialization, Schema Generation, etc.) +- **Severity** (High, Medium, Low impact) +- **Separation difficulty** assessment + +## ⚠️ Important Considerations + +### Backward Compatibility + +Any separation effort must maintain the existing YAML/JSON-based public API for backward compatibility. + +### Performance Impact + +Abstraction layers may introduce performance overhead that should be benchmarked. + +### Test Coverage + +The test suite heavily relies on YAML-based rule construction and will need parallel test infrastructure. + +### External Dependencies + +Tools and documentation generation depend on JsonSchema derives, which complicates separation. + +## πŸ“ Next Steps + +1. **Review the analysis report** to understand the full scope +2. **Run the helper script** to explore current state +3. **Start with Phase 1** feature gating for quick wins +4. **Design abstraction layer** before attempting major refactoring +5. **Create migration plan** that maintains backward compatibility + +## 🀝 Contributing + +When working on separation: + +- Use the analysis tools to validate changes +- Update the tools if new serialization patterns are introduced +- Maintain the separation roadmap as work progresses +- Test both serialized and non-serialized code paths + +--- + +**Created by**: Serialization Analysis Task +**Date**: January 2025 +**Purpose**: Support separation of serialization logic from core rule engine functionality for WASM deployment diff --git a/crates/rule-engine/serialization_analysis/SERIALIZATION_ANALYSIS_REPORT.md b/crates/rule-engine/serialization_analysis/SERIALIZATION_ANALYSIS_REPORT.md new file mode 100644 index 0000000..caf56fe --- /dev/null +++ b/crates/rule-engine/serialization_analysis/SERIALIZATION_ANALYSIS_REPORT.md @@ -0,0 +1,279 @@ + + +# SERIALIZATION DEPENDENCY ANALYSIS REPORT + +## Executive Summary + +The `thread-rule-engine` crate has **extensive and deeply integrated serialization dependencies** that touch nearly every aspect of the codebase. Based on my analysis of the source code, here are the key findings: + +- **Total files analyzed**: 15+ core files +- **High-impact files**: 8+ files with deep serialization integration +- **Serialization dependency density**: ~70-80% of the codebase +- **Separation difficulty**: **HIGH** - requires significant architectural changes + +## Core Problem Analysis + +### The Fundamental Issue + +The crate is architected around the assumption that **rules come from YAML/JSON configuration files** and must be serialized/deserialized. This creates a tight coupling between: + +1. **Rule definition structures** (all have `Serialize`/`Deserialize` derives) +2. **Core matching logic** (operates on deserialized rules) +3. **Configuration parsing** (entire pipeline assumes serialized input) +4. **Schema generation** (for external tools and validation) + +### Dependency Categories + +#### 1. Core Serialization (HIGH IMPACT) + +- **Serde derives**: Present on virtually every public struct/enum +- **Serializable types**: `SerializableRule`, `SerializableRuleConfig`, `SerializableRuleCore` +- **Pattern matching**: `PatternStyle`, `Strictness` enums with serialization +- **Field attributes**: Extensive use of `#[serde(default)]`, `#[serde(flatten)]`, etc. + +**Files affected**: `rule/mod.rs`, `rule_config.rs`, `rule_core.rs`, `fixer.rs`, `transform/mod.rs` + +#### 2. Serialization Operations (MEDIUM IMPACT) + +- **Deserialization functions**: `from_str`, `from_yaml_string`, `deserialize_rule` +- **Environment handling**: `DeserializeEnv` struct and methods +- **Rule parsing**: Conversion from serialized to runtime representations + +**Files affected**: `lib.rs`, `rule/deserialize_env.rs`, `rule/mod.rs` + +#### 3. Schema Generation (LOW-MEDIUM IMPACT) + +- **JsonSchema derives**: On all public types for external tooling +- **Schema metadata**: Type annotations and documentation + +**Files affected**: All struct/enum definitions + +#### 4. Crate-Specific Serialization (HIGH IMPACT) + +- **Maybe wrapper**: Optional field serialization helper +- **Transform system**: Meta-variable transformations with serialization +- **Relation handling**: Complex nested rule serialization + +**Files affected**: `maybe.rs`, `transform/`, `rule/relational_rule.rs` + +## Detailed File Analysis + +### High-Impact Files (Difficult to Separate) + +#### 1. `src/lib.rs` + +- **Serialization density**: ~60% +- **Dependencies**: Serde imports, YAML parsing, public API with serialization +- **Core functions**: `from_str`, `from_yaml_string` - fundamental to crate operation +- **Separation difficulty**: **VERY HIGH** - public API is serialization-based + +#### 2. `src/rule_config.rs` + +- **Serialization density**: ~80% +- **Dependencies**: Massive serialization integration +- **Core functions**: Rule validation, message generation, fixer creation +- **Separation difficulty**: **VERY HIGH** - entire config system assumes serialization + +#### 3. `src/rule_core.rs` + +- **Serialization density**: ~70% +- **Dependencies**: Rule deserialization, environment handling +- **Core functions**: Rule matching, meta-variable handling +- **Separation difficulty**: **HIGH** - core logic mixed with serialization + +#### 4. `src/rule/mod.rs` + +- **Serialization density**: ~85% +- **Dependencies**: Every rule type has serialization +- **Core functions**: Pattern matching, rule composition +- **Separation difficulty**: **VERY HIGH** - fundamental architecture + +#### 5. `src/fixer.rs` + +- **Serialization density**: ~50% +- **Dependencies**: Serializable fixer configs, template parsing +- **Core functions**: Code replacement generation +- **Separation difficulty**: **MEDIUM** - some separation possible + +### Medium-Impact Files + +#### 6. `src/transform/mod.rs` + +- **Serialization density**: ~40% +- **Dependencies**: Transform serialization, meta-variable handling +- **Core functions**: Variable transformation logic +- **Separation difficulty**: **MEDIUM** - logic could be abstracted + +#### 7. `src/rule_collection.rs` + +- **Serialization density**: ~30% +- **Dependencies**: Glob pattern serialization, rule aggregation +- **Core functions**: Rule organization and filtering +- **Separation difficulty**: **LOW-MEDIUM** - mostly organizational + +## Key Integration Points + +### 1. The DeserializeEnv Pattern + +```rust +pub struct DeserializeEnv { + pub lang: L, + pub registration: RuleRegistration, +} +``` + +This is the **central hub** for all deserialization operations. Every rule, pattern, and transform goes through this environment. + +### 2. Serializable Wrapper Types + +```rust +pub struct SerializableRule { /* all fields with serde annotations */ } +pub enum Rule { /* runtime representation */ } +``` + +The crate has **dual representations** - serializable versions and runtime versions, with conversion functions between them. + +### 3. The Maybe Pattern + +```rust +#[serde(default, skip_serializing_if = "Maybe::is_absent")] +pub pattern: Maybe, +``` + +Extensive use of custom `Maybe` wrapper for optional field serialization with specific semantics. + +## Separation Strategy & Recommendations + +### Phase 1: Feature Gating (Immediate - Low Risk) + +**Target**: Files with minimal serialization integration + +- `src/combined.rs` - Scanning logic (mostly core functionality) +- `src/label.rs` - Label formatting (minimal serialization) +- `src/check_var.rs` - Variable checking (pure logic) + +**Action**: Add `#[cfg(feature = "serde")]` to imports and derives + +### Phase 2: Abstraction Layer (Short-term - Medium Risk) + +**Target**: Create trait-based abstractions for core functionality + +```rust +// New abstraction layer +pub trait RuleEngine { + fn match_node(&self, node: Node) -> Option; + fn potential_kinds(&self) -> Option; +} + +// Implement for both serializable and non-serializable versions +impl RuleEngine for RuleConfig { /* ... */ } +impl RuleEngine for RuntimeRuleConfig { /* ... */ } +``` + +### Phase 3: Core Logic Extraction (Medium-term - High Risk) + +**Target**: Extract pure matching logic from serialization concerns + +**Files to refactor**: + +- Extract matching logic from `Rule` enum into separate traits +- Create non-serializable versions of core types +- Implement conversion layers + +### Phase 4: Alternative Construction API (Long-term - High Risk) + +**Target**: Provide programmatic rule construction API + +```rust +// New programmatic API (no serialization required) +pub struct RuleBuilder { + lang: L, +} + +impl RuleBuilder { + pub fn pattern(pattern: &str) -> PatternRule { /* ... */ } + pub fn kind(kind: &str) -> KindRule { /* ... */ } + pub fn inside(rule: impl Rule) -> InsideRule { /* ... */ } +} +``` + +## Critical Challenges + +### 1. **Public API Dependency** + +The crate's **entire public API** assumes YAML/JSON input. Changing this breaks backward compatibility. + +**Mitigation**: Version the API, provide both serialized and programmatic interfaces. + +### 2. **Nested Serialization Complexity** + +Rules have deeply nested serializable structures with custom serde logic. + +**Mitigation**: Create builder patterns and conversion traits rather than trying to feature-gate existing types. + +### 3. **Test Suite Dependencies** + +Most tests create rules via YAML strings, making testing of non-serialized versions difficult. + +**Mitigation**: Create parallel test infrastructure with programmatic rule construction. + +### 4. **Schema Generation Requirements** + +External tools depend on JsonSchema generation for rule validation. + +**Mitigation**: Keep serializable types for external tooling, create internal non-serializable versions. + +## Recommended Architecture + +### Current Architecture + +``` +YAML/JSON β†’ SerializableRule β†’ Rule β†’ Matcher β†’ Results + ↑ ↑ ↑ ↑ + (serde) (conversion) (matching) (core) +``` + +### Proposed Architecture + +``` +Option A: YAML/JSON β†’ SerializableRule β†’ Rule β†’ Matcher β†’ Results +Option B: RuleBuilder β†’ Rule β†’ Matcher β†’ Results + ↑ ↑ + (unified) (core) +``` + +### Implementation Strategy + +1. **Keep existing API** for backward compatibility +2. **Add feature flag** `serde` (default enabled) +3. **Create trait abstractions** for core functionality +4. **Implement programmatic API** alongside serialization API +5. **Gradually migrate internals** to use abstractions + +## Effort Estimation + +- **Phase 1 (Feature gating)**: 1-2 weeks +- **Phase 2 (Abstraction layer)**: 3-4 weeks +- **Phase 3 (Core extraction)**: 6-8 weeks +- **Phase 4 (Alternative API)**: 4-6 weeks + +**Total effort**: 3-5 months for complete separation + +## Risk Assessment + +- **High risk**: Breaking changes to public API +- **Medium risk**: Performance impact from abstraction layers +- **Low risk**: Feature gating of optional components + +## Conclusion + +The serialization integration in `thread-rule-engine` is **extensive and architectural**. Simple feature gating won't solve the problem - it requires **fundamental architectural changes** with trait abstractions and dual APIs. + +**Recommendation**: Start with Phase 1 feature gating for easy wins, then invest in the longer-term architectural changes if WASM deployment without serialization is a hard requirement. + +The good news: The **core matching logic** is sound and can be extracted. The challenge is **unwinding 70%+ of the codebase** that assumes serialized input. diff --git a/crates/rule-engine/serialization_analysis/analyze_serialization b/crates/rule-engine/serialization_analysis/analyze_serialization new file mode 100755 index 0000000..3ac5c40 Binary files /dev/null and b/crates/rule-engine/serialization_analysis/analyze_serialization differ diff --git a/crates/rule-engine/serialization_analysis/analyze_serialization.rs b/crates/rule-engine/serialization_analysis/analyze_serialization.rs new file mode 100644 index 0000000..0d49bcb --- /dev/null +++ b/crates/rule-engine/serialization_analysis/analyze_serialization.rs @@ -0,0 +1,466 @@ +// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +//! Serialization dependency analysis tool for the rule-engine crate. +//! +//! This tool helps identify and categorize all serialization-related code +//! to support the separation of serialization logic from core functionality. + +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +#[derive(Debug, Clone)] +pub struct SerializationDependency { + pub file_path: String, + pub line_number: usize, + pub dependency_type: DependencyType, + pub code_snippet: String, + pub context: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum DependencyType { + // Direct serde usage + SerdeDerive, + SerdeImport, + SerializableType, + + // Serialization functions + SerializationCall, + DeserializationCall, + YamlCall, + + + // Schema generation + JsonSchemaUsage, + SchemaGeneration, + + // Serde field attributes + SerdeAttribute, + + // Crate-specific patterns + DeserializeEnvUsage, + MaybeWrapper, + TransformFunction, + ConfigCreation, + + // Error handling + SerializationError, +} + +impl DependencyType { + pub fn category(&self) -> &'static str { + match self { + DependencyType::SerdeDerive | + DependencyType::SerdeImport | + DependencyType::SerializableType | + DependencyType::SerdeAttribute => "Core Serialization", + + DependencyType::SerializationCall | + DependencyType::DeserializationCall | + DependencyType::YamlCall => "Serialization Operations", + + DependencyType::JsonSchemaUsage | + DependencyType::SchemaGeneration => "Schema Generation", + + DependencyType::DeserializeEnvUsage | + DependencyType::MaybeWrapper | + DependencyType::TransformFunction | + DependencyType::ConfigCreation => "Crate-Specific Serialization", + + DependencyType::SerializationError => "Error Handling", + } + } + + pub fn severity(&self) -> SerializationSeverity { + match self { + // High impact - these are fundamental to serialization + DependencyType::SerdeDerive | + DependencyType::SerializableType | + DependencyType::DeserializeEnvUsage => SerializationSeverity::High, + + // Medium impact - important but could potentially be abstracted + DependencyType::SerializationCall | + DependencyType::DeserializationCall | + DependencyType::YamlCall | + DependencyType::JsonSchemaUsage | + DependencyType::TransformFunction | + DependencyType::ConfigCreation => SerializationSeverity::Medium, + + // Low impact - imports and attributes that could be feature-gated + DependencyType::SerdeImport | + DependencyType::SerdeAttribute | + DependencyType::SchemaGeneration | + DependencyType::MaybeWrapper | + DependencyType::SerializationError => SerializationSeverity::Low, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SerializationSeverity { + High, // Core to serialization, hard to separate + Medium, // Important but could be abstracted + Low, // Can be feature-gated or easily separated +} + +#[derive(Debug)] +pub struct FileAnalysis { + pub file_path: String, + pub dependencies: Vec, + pub serialization_density: f64, // Percentage of lines with serialization code + pub core_functionality: Vec, // Non-serialization functions/types + pub separation_difficulty: SerializationSeverity, +} + +#[derive(Debug)] +pub struct SerializationAnalysisReport { + pub files: Vec, + pub dependency_summary: HashMap, + pub category_summary: HashMap, + pub high_impact_files: Vec, + pub separation_strategy: SeparationStrategy, +} + +#[derive(Debug)] +pub struct SeparationStrategy { + pub feature_gate_candidates: Vec, + pub abstraction_layer_needed: Vec, + pub core_logic_files: Vec, + pub serialization_only_files: Vec, + pub mixed_responsibility_files: Vec, +} + +impl SerializationAnalysisReport { + /// Generate a comprehensive analysis report + pub fn generate_report() -> Self { + let mut report = Self { + files: Vec::new(), + dependency_summary: HashMap::new(), + category_summary: HashMap::new(), + high_impact_files: Vec::new(), + separation_strategy: SeparationStrategy { + feature_gate_candidates: Vec::new(), + abstraction_layer_needed: Vec::new(), + core_logic_files: Vec::new(), + serialization_only_files: Vec::new(), + mixed_responsibility_files: Vec::new(), + }, + }; + + // Analyze each Rust file in the src directory + if let Ok(entries) = fs::read_dir("../src") { + for entry in entries.flatten() { + if let Some(extension) = entry.path().extension() { + if extension == "rs" { + let file_analysis = Self::analyze_file(&entry.path()); + report.process_file_analysis(file_analysis); + } + } + } + } + + report.generate_strategy(); + report + } + + fn analyze_file(file_path: &Path) -> FileAnalysis { + let file_content = fs::read_to_string(file_path).unwrap_or_default(); + let lines: Vec<&str> = file_content.lines().collect(); + let mut dependencies = Vec::new(); + let mut core_functionality = Vec::new(); + + // Simulate AST-Grep analysis (in real implementation, this would call ast-grep) + for (line_num, line) in lines.iter().enumerate() { + // Check for serialization patterns + if let Some(dep) = Self::detect_serialization_dependency(line, line_num + 1) { + dependencies.push(dep); + } + + // Detect core functionality (functions, structs, impls that don't seem serialization-related) + if Self::is_core_functionality(line) { + core_functionality.push(line.trim().to_string()); + } + } + + let total_lines = lines.len(); + let serialization_lines = dependencies.len(); + let serialization_density = if total_lines > 0 { + (serialization_lines as f64 / total_lines as f64) * 100.0 + } else { + 0.0 + }; + + let separation_difficulty = Self::assess_separation_difficulty(&dependencies); + + FileAnalysis { + file_path: file_path.to_string_lossy().to_string(), + dependencies, + serialization_density, + core_functionality, + separation_difficulty, + } + } + + fn detect_serialization_dependency(line: &str, line_number: usize) -> Option { + let line = line.trim(); + + // Check for various serialization patterns + if line.contains("#[derive(") && (line.contains("Serialize") || line.contains("Deserialize")) { + return Some(SerializationDependency { + file_path: String::new(), // Will be set by caller + line_number, + dependency_type: DependencyType::SerdeDerive, + code_snippet: line.to_string(), + context: "Serde derive macro".to_string(), + }); + } + + if line.starts_with("use serde") || line.contains("use serde_yaml") || line.contains("use serde_json") || line.contains("use schemars") { + return Some(SerializationDependency { + file_path: String::new(), + line_number, + dependency_type: DependencyType::SerdeImport, + code_snippet: line.to_string(), + context: "Serialization import".to_string(), + }); + } + + if line.contains("deserialize(") || line.contains("serialize(") || line.contains("yaml::") || line.contains("serde_yaml::") || line.contains("from_yaml_string") { + let dep_type = if line.contains("deserialize(") { + DependencyType::DeserializationCall + } else if line.contains("serialize(") { + DependencyType::SerializationCall + } else if line.contains("yaml::") || line.contains("serde_yaml::") || line.contains("from_yaml_string") { + DependencyType::YamlCall + }; + + return Some(SerializationDependency { + file_path: String::new(), + line_number, + dependency_type: dep_type, + code_snippet: line.to_string(), + context: "Serialization function call".to_string(), + }); + } + + if line.contains("JsonSchema") { + return Some(SerializationDependency { + file_path: String::new(), + line_number, + dependency_type: DependencyType::JsonSchemaUsage, + code_snippet: line.to_string(), + context: "JSON schema usage".to_string(), + }); + } + + if line.contains("DeserializeEnv") { + return Some(SerializationDependency { + file_path: String::new(), + line_number, + dependency_type: DependencyType::DeserializeEnvUsage, + code_snippet: line.to_string(), + context: "Deserialization environment".to_string(), + }); + } + + if line.contains("Maybe::") || line.contains(": Maybe<") { + return Some(SerializationDependency { + file_path: String::new(), + line_number, + dependency_type: DependencyType::MaybeWrapper, + code_snippet: line.to_string(), + context: "Maybe wrapper for optional serialization".to_string(), + }); + } + + None + } + + fn is_core_functionality(line: &str) -> bool { + let line = line.trim(); + + // Look for core functionality patterns + if line.starts_with("impl Matcher") || + line.starts_with("impl Pattern") || + line.starts_with("impl Rule") || + line.starts_with("impl<") && line.contains("RuleMatcher") || + line.starts_with("impl<") && line.contains("Matcher") || + line.starts_with("fn match_node") || + line.starts_with("fn potential_kinds") || + line.contains("find(") || + line.contains("ast_grep(") { + return true; + } + + false + } + + fn assess_separation_difficulty(dependencies: &[SerializationDependency]) -> SerializationSeverity { + let high_count = dependencies.iter().filter(|d| d.dependency_type.severity() == SerializationSeverity::High).count(); + let medium_count = dependencies.iter().filter(|d| d.dependency_type.severity() == SerializationSeverity::Medium).count(); + + if high_count > 5 { + SerializationSeverity::High + } else if high_count > 0 || medium_count > 10 { + SerializationSeverity::Medium + } else { + SerializationSeverity::Low + } + } + + fn process_file_analysis(&mut self, mut file_analysis: FileAnalysis) { + // Update file paths in dependencies + for dep in &mut file_analysis.dependencies { + dep.file_path = file_analysis.file_path.clone(); + } + + // Update summaries + for dep in &file_analysis.dependencies { + *self.dependency_summary.entry(dep.dependency_type.clone()).or_insert(0) += 1; + *self.category_summary.entry(dep.dependency_type.category().to_string()).or_insert(0) += 1; + } + + // Track high-impact files + if file_analysis.separation_difficulty == SerializationSeverity::High { + self.high_impact_files.push(file_analysis.file_path.clone()); + } + + self.files.push(file_analysis); + } + + fn generate_strategy(&mut self) { + for file in &self.files { + match file.separation_difficulty { + SerializationSeverity::Low => { + if file.serialization_density > 50.0 { + self.separation_strategy.serialization_only_files.push(file.file_path.clone()); + } else { + self.separation_strategy.feature_gate_candidates.push(file.file_path.clone()); + } + } + SerializationSeverity::Medium => { + if file.core_functionality.len() > file.dependencies.len() { + self.separation_strategy.abstraction_layer_needed.push(file.file_path.clone()); + } else { + self.separation_strategy.mixed_responsibility_files.push(file.file_path.clone()); + } + } + SerializationSeverity::High => { + if !file.core_functionality.is_empty() { + self.separation_strategy.mixed_responsibility_files.push(file.file_path.clone()); + } + } + } + + // Identify files with primarily core logic + if file.serialization_density < 25.0 && !file.core_functionality.is_empty() { + self.separation_strategy.core_logic_files.push(file.file_path.clone()); + } + } + } + + /// Generate a detailed report as a string + pub fn format_report(&self) -> String { + let mut report = String::new(); + + report.push_str("# SERIALIZATION DEPENDENCY ANALYSIS REPORT\n\n"); + report.push_str("## Executive Summary\n\n"); + report.push_str(&format!("- **Total files analyzed**: {}\n", self.files.len())); + report.push_str(&format!("- **High-impact files**: {}\n", self.high_impact_files.len())); + report.push_str(&format!("- **Total serialization dependencies**: {}\n", + self.dependency_summary.values().sum::())); + + report.push_str("\n## Dependency Categories\n\n"); + for (category, count) in &self.category_summary { + report.push_str(&format!("- **{}**: {} occurrences\n", category, count)); + } + + report.push_str("\n## Detailed Dependency Breakdown\n\n"); + for (dep_type, count) in &self.dependency_summary { + report.push_str(&format!("- **{:?}**: {} ({})\n", + dep_type, count, dep_type.category())); + } + + report.push_str("\n## High-Impact Files (Difficult to Separate)\n\n"); + for file in &self.high_impact_files { + if let Some(analysis) = self.files.iter().find(|f| f.file_path == *file) { + report.push_str(&format!("### {}\n", file)); + report.push_str(&format!("- Serialization density: {:.1}%\n", analysis.serialization_density)); + report.push_str(&format!("- Dependencies: {}\n", analysis.dependencies.len())); + report.push_str(&format!("- Core functions: {}\n\n", analysis.core_functionality.len())); + } + } + + report.push_str("\n## SEPARATION STRATEGY\n\n"); + + report.push_str("### 1. Feature Gate Candidates (Easy wins)\n"); + for file in &self.separation_strategy.feature_gate_candidates { + report.push_str(&format!("- `{}`\n", file)); + } + + report.push_str("\n### 2. Core Logic Files (Keep in core)\n"); + for file in &self.separation_strategy.core_logic_files { + report.push_str(&format!("- `{}`\n", file)); + } + + report.push_str("\n### 3. Serialization-Only Files (Move to separate module)\n"); + for file in &self.separation_strategy.serialization_only_files { + report.push_str(&format!("- `{}`\n", file)); + } + + report.push_str("\n### 4. Need Abstraction Layer\n"); + for file in &self.separation_strategy.abstraction_layer_needed { + report.push_str(&format!("- `{}`\n", file)); + } + + report.push_str("\n### 5. Mixed Responsibility (Requires Refactoring)\n"); + for file in &self.separation_strategy.mixed_responsibility_files { + report.push_str(&format!("- `{}`\n", file)); + } + + report.push_str("\n## RECOMMENDATIONS\n\n"); + report.push_str("1. **Immediate actions**: Feature-gate files with low serialization impact\n"); + report.push_str("2. **Short-term**: Create abstraction layer for files needing it\n"); + report.push_str("3. **Medium-term**: Refactor mixed responsibility files\n"); + report.push_str("4. **Long-term**: Consider trait-based abstraction for core serialization needs\n"); + + report + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dependency_categorization() { + assert_eq!(DependencyType::SerdeDerive.category(), "Core Serialization"); + assert_eq!(DependencyType::SerializationCall.category(), "Serialization Operations"); + assert_eq!(DependencyType::JsonSchemaUsage.category(), "Schema Generation"); + } + + #[test] + fn test_severity_assessment() { + assert_eq!(DependencyType::SerdeDerive.severity(), SerializationSeverity::High); + assert_eq!(DependencyType::SerdeImport.severity(), SerializationSeverity::Low); + assert_eq!(DependencyType::SerializationCall.severity(), SerializationSeverity::Medium); + } +} + +// Example usage in main function or CLI +fn main() { + let report = SerializationAnalysisReport::generate_report(); + println!("{}", report.format_report()); + + // Optionally save to file + if let Err(e) = fs::write("serialization_analysis_report.md", report.format_report()) { + eprintln!("Failed to write report: {}", e); + } else { + println!("\nReport saved to serialization_analysis_report.md"); + } +} diff --git a/crates/rule-engine/serialization_analysis/separation_helper.sh b/crates/rule-engine/serialization_analysis/separation_helper.sh new file mode 100755 index 0000000..4320856 --- /dev/null +++ b/crates/rule-engine/serialization_analysis/separation_helper.sh @@ -0,0 +1,404 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Serialization Separation Helper Script +# This script provides practical tools for separating serialization logic +# from core functionality in the thread-rule-engine crate + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" +CRATE_DIR="$SCRIPT_DIR" +SRC_DIR="$CRATE_DIR/src" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}=== Thread Rule Engine Serialization Separation Helper ===${NC}" +echo "" + +# Function to print section headers +print_section() { + echo -e "${GREEN}=== $1 ===${NC}" +} + +# Function to print warnings +print_warning() { + echo -e "${YELLOW}⚠️ $1${NC}" +} + +# Function to print errors +print_error() { + echo -e "${RED}❌ $1${NC}" +} + +# Function to print success +print_success() { + echo -e "${GREEN}βœ… $1${NC}" +} + +# Check if ast-grep is available +check_ast_grep() { + if command -v ast-grep &> /dev/null; then + print_success "ast-grep is available" + return 0 + else + print_error "ast-grep is not installed. Please install it first:" + echo " npm install -g @ast-grep/cli" + echo " or visit: https://ast-grep.github.io/guide/quick-start.html" + return 1 + fi +} + +# Function to analyze current serialization usage +analyze_current_usage() { + print_section "Current Serialization Usage Analysis" + + echo "Analyzing Serde derive usage..." + if command -v ast-grep &> /dev/null; then + cd "$SRC_DIR" + echo "" + echo "Files with Serde derives:" + ast-grep --lang rust -p '#[derive($$$)]' --json | jq -r '.[] | select(.text | test("Serialize|Deserialize")) | .file' | sort | uniq + + echo "" + echo "Serde import statements:" + ast-grep --lang rust -p 'use serde' --json | jq -r '.[] | "\(.file):\(.range.start.line): \(.text)"' + + echo "" + echo "DeserializeEnv usage:" + ast-grep --lang rust -p 'DeserializeEnv' --json | jq -r '.[] | "\(.file):\(.range.start.line): \(.text)"' + else + print_warning "ast-grep not available, performing basic grep analysis..." + echo "" + echo "Files with Serde derives:" + grep -r "derive.*Serialize\|derive.*Deserialize" . --include="*.rs" | cut -d: -f1 | sort | uniq + + echo "" + echo "Serde imports:" + grep -r "use serde" . --include="*.rs" + fi +} + +# Function to identify feature gate candidates +identify_feature_gate_candidates() { + print_section "Feature Gate Candidates (Phase 1)" + + echo "These files have minimal serialization and can be feature-gated easily:" + echo "" + + # Files with low serialization density + local candidates=( + "src/combined.rs" + "src/label.rs" + "src/check_var.rs" + "src/maybe.rs" + ) + + for file in "${candidates[@]}"; do + if [[ -f "$CRATE_DIR/$file" ]]; then + local serde_count total_lines density + serde_count="$(grep -c "serde\|Serialize\|Deserialize" "$CRATE_DIR/$file" 2>/dev/null)" + echo "$serde_count" + total_lines="$(wc -l < "$CRATE_DIR/$file" 2>/dev/null)" + density="$((serde_count * 100 / total_lines))" + + if [[ $density -lt 30 ]]; then + echo -e " βœ… ${GREEN}$file${NC} - Serialization density: ${density}%" + else + echo -e " ⚠️ ${YELLOW}$file${NC} - Serialization density: ${density}% (review needed)" + fi + else + echo -e " ❓ $file - File not found" + fi + done +} + +# Function to suggest abstraction points +suggest_abstractions() { + print_section "Abstraction Layer Suggestions (Phase 2)" + + cat << 'EOF' +Consider creating these trait abstractions: + +1. Core Rule Matching: + ```rust + pub trait RuleMatcher { + fn match_node(&self, node: Node) -> Option; + fn potential_kinds(&self) -> Option; + } + ``` + +2. Rule Construction: + ```rust + pub trait RuleBuilder { + type Rule: RuleMatcher; + fn pattern(pattern: &str) -> Result; + fn kind(kind: &str) -> Result; + fn compose(rules: Vec) -> Self::Rule; + } + ``` + +3. Configuration Management: + ```rust + pub trait ConfigManager { + type Config; + fn from_rules(rules: Vec) -> Self::Config; + fn scan(&self, source: &str) -> ScanResult; + } + ``` + +Files that would benefit from abstraction: +EOF + + local abstraction_candidates=( + "src/rule_core.rs - Extract matching logic from serialization" + "src/fixer.rs - Separate fix logic from config parsing" + "src/transform/mod.rs - Abstract transformation logic" + ) + + for candidate in "${abstraction_candidates[@]}"; do + echo " β€’ $candidate" + done +} + +# Function to create feature gate patches +create_feature_gate_patches() { + print_section "Creating Feature Gate Patches" + + local patch_dir="$CRATE_DIR/separation_patches" + mkdir -p "$patch_dir" + + # Create Cargo.toml patch + cat > "$patch_dir/Cargo.toml.patch" << 'EOF' +# Add to [features] section +[features] +default = ["serde", "schema"] +serde = ["dep:serde", "dep:serde_yaml", "dep:serde_json"] +schema = ["dep:schemars", "serde"] + +# Make serde dependencies optional +[dependencies] +serde = { workspace = true, optional = true } +serde_yaml = { workspace = true, optional = true } +serde_json = { workspace = true, optional = true } +schemars = { workspace = true, optional = true } +EOF + + # Create lib.rs patch + cat > "$patch_dir/lib.rs.patch" << 'EOF' +// Add feature gates to imports +#[cfg(feature = "serde")] +use serde::Deserialize; +#[cfg(feature = "serde")] +use serde_yaml::{with::singleton_map_recursive::deserialize, Deserializer, Error as YamlError}; + +// Feature gate serialization functions +#[cfg(feature = "serde")] +pub fn from_str<'de, T: Deserialize<'de>>(s: &'de str) -> Result { + let deserializer = Deserializer::from_str(s); + deserialize(deserializer) +} + +#[cfg(feature = "serde")] +pub fn from_yaml_string<'a, L: Language + Deserialize<'a>>( + yamls: &'a str, + registration: &GlobalRules, +) -> Result>, RuleConfigError> { + // ... existing implementation +} +EOF + + print_success "Feature gate patches created in: $patch_dir/" + echo "Review and apply these patches as appropriate." +} + +# Function to run dependency analysis +run_dependency_analysis() { + print_section "Detailed Dependency Analysis" + + if [[ -f "$CRATE_DIR/analyze_serialization.rs" ]]; then + echo "Running custom dependency analysis..." + cd "$CRATE_DIR" + # Note: This would need the analysis tool to be compiled and executable + echo "To run the analysis tool:" + echo " cd crates/rule-engine" + echo " rustc --edition 2021 analyze_serialization.rs -o analyze_serialization" + echo " ./analyze_serialization" + else + print_warning "Custom analysis tool not found. Using basic analysis..." + fi + + echo "" + echo "Manual checks to perform:" + echo "1. Count serde derives: grep -r 'derive.*Serialize' src/ | wc -l" + echo "2. Find serialization calls: grep -r 'deserialize\|serialize' src/ | wc -l" + echo "3. Check schema usage: grep -r 'JsonSchema' src/ | wc -l" + echo "4. Identify core logic: grep -r 'impl.*Matcher' src/" +} + +# Function to generate separation roadmap +generate_roadmap() { + print_section "Separation Roadmap" + + cat << 'EOF' +## Phase 1: Feature Gating (1-2 weeks) +- [ ] Add optional serde dependencies to Cargo.toml +- [ ] Feature gate imports in lib.rs +- [ ] Feature gate simple files (combined.rs, label.rs, etc.) +- [ ] Update tests to handle feature flags +- [ ] Verify compilation with/without serde feature + +## Phase 2: Abstraction Layer (3-4 weeks) +- [ ] Design core traits (RuleMatcher, RuleBuilder) +- [ ] Implement traits for existing types +- [ ] Create non-serializable rule representations +- [ ] Add conversion between serializable/non-serializable +- [ ] Update internal APIs to use traits + +## Phase 3: Core Logic Extraction (6-8 weeks) +- [ ] Extract matching logic from Rule enum +- [ ] Create separate runtime rule types +- [ ] Implement programmatic rule construction API +- [ ] Refactor RuleCore to use abstractions +- [ ] Update transform system + +## Phase 4: Alternative APIs (4-6 weeks) +- [ ] Design builder pattern API +- [ ] Implement programmatic configuration +- [ ] Add direct rule construction methods +- [ ] Create migration guide +- [ ] Performance optimization + +## Testing Strategy +- [ ] Create feature flag test matrix +- [ ] Add programmatic API tests +- [ ] Performance benchmarks +- [ ] Migration validation tests + +## Documentation +- [ ] Update README with feature flags +- [ ] Document separation architecture +- [ ] Create migration guide +- [ ] Update examples +EOF +} + +# Function to validate current state +validate_current_state() { + print_section "Current State Validation" + + echo "Checking crate structure..." + + if [[ ! -f "$CRATE_DIR/Cargo.toml" ]]; then + print_error "Cargo.toml not found in $CRATE_DIR" + return 1 + fi + + if [[ ! -d "$SRC_DIR" ]]; then + print_error "src/ directory not found" + return 1 + fi + + # Check for key files + local key_files=( + "src/lib.rs" + "src/rule_config.rs" + "src/rule_core.rs" + "src/rule/mod.rs" + ) + + for file in "${key_files[@]}"; do + if [[ -f "$CRATE_DIR/$file" ]]; then + print_success "$file exists" + else + print_error "$file not found" + fi + done + + echo "" + echo "Checking dependencies..." + if grep -q "serde.*=" "$CRATE_DIR/Cargo.toml"; then + print_warning "Serde dependencies found (expected)" + fi + + if grep -q "schemars.*=" "$CRATE_DIR/Cargo.toml"; then + print_warning "Schemars dependency found (expected)" + fi +} + +# Main menu +show_menu() { + echo "" + echo "Available actions:" + echo "1. Validate current state" + echo "2. Analyze current serialization usage" + echo "3. Identify feature gate candidates" + echo "4. Suggest abstraction points" + echo "5. Create feature gate patches" + echo "6. Run dependency analysis" + echo "7. Generate separation roadmap" + echo "8. Run all analyses" + echo "0. Exit" + echo "" +} + +# Main execution +main() { + cd "$CRATE_DIR" + + while true; do + show_menu + read -p "Select an action (0-8): " choice + echo "" + + case $choice in + 1) validate_current_state ;; + 2) analyze_current_usage ;; + 3) identify_feature_gate_candidates ;; + 4) suggest_abstractions ;; + 5) create_feature_gate_patches ;; + 6) run_dependency_analysis ;; + 7) generate_roadmap ;; + 8) + validate_current_state + echo "" + analyze_current_usage + echo "" + identify_feature_gate_candidates + echo "" + suggest_abstractions + echo "" + generate_roadmap + ;; + 0) + echo "Goodbye!" + exit 0 + ;; + *) + print_error "Invalid choice. Please select 0-8." + ;; + esac + + echo "" + read -p "Press Enter to continue..." + done +} + +# Check prerequisites +if ! check_ast_grep; then + echo "" + print_warning "Some features will be limited without ast-grep" + echo "" +fi + +# Run main menu +main diff --git a/crates/rule-engine/serialization_analysis/serialization_analysis.yml b/crates/rule-engine/serialization_analysis/serialization_analysis.yml new file mode 100644 index 0000000..7229a43 --- /dev/null +++ b/crates/rule-engine/serialization_analysis/serialization_analysis.yml @@ -0,0 +1,187 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + +# AST-Grep rules to analyze serialization dependencies in rule-engine crate +# This file contains rules to find and categorize all serialization-related code + +rules: + # Rule 1: Find all serde derive macros + - id: serde-derive-usage + message: "Serde derive macro found" + language: rust + rule: + kind: attribute + has: + kind: meta_item + has: + any: + - pattern: derive($$$DERIVES$$$) + - pattern: serde($$$ARGS$$$) + transform: + DERIVES_TEXT: + source: $DERIVES + ARGS_TEXT: + source: $ARGS + + # Rule 2: Find all struct/enum definitions with Serialize/Deserialize + - id: serializable-types + message: "Type with Serialize/Deserialize traits" + language: rust + rule: + any: + - kind: struct_item + - kind: enum_item + has: + kind: attribute + has: + kind: meta_item + has: + pattern: derive($$$DERIVES$$$) + regex: "(Serialize|Deserialize)" + transform: + TYPE_NAME: + source: $TYPE_NAME + DERIVES_LIST: + source: $DERIVES + + # Rule 3: Find serde imports + - id: serde-imports + message: "Serde import statement" + language: rust + rule: + kind: use_declaration + has: + any: + - pattern: use serde::$$$ + - pattern: use serde_yaml::$$$ + - pattern: use serde_json::$$$ + - pattern: use schemars::$$$ + + # Rule 4: Find serialization/deserialization function calls + - id: serde-function-calls + message: "Serialization/Deserialization function call" + language: rust + rule: + kind: call_expression + has: + field: function + any: + - pattern: deserialize + - pattern: serialize + - pattern: from_str + - pattern: to_string + - pattern: serde_yaml::from_str + - pattern: serde_yaml::to_string + - pattern: from_yaml_string + - regex: "(deserialize|serialize)" + + # Rule 5: Find JsonSchema trait usage + - id: json-schema-usage + message: "JsonSchema trait usage" + language: rust + rule: + any: + - kind: attribute + has: + pattern: JsonSchema + - kind: impl_item + has: + pattern: JsonSchema + - kind: type_identifier + pattern: JsonSchema + + # Rule 6: Find serialization-specific fields and attributes + - id: serde-field-attributes + message: "Serde field attribute" + language: rust + rule: + kind: attribute + has: + kind: meta_item + any: + - pattern: serde($$$ARGS$$$) + - pattern: skip_serializing_if + - pattern: rename_all + - pattern: flatten + - pattern: default + + # Rule 7: Find DeserializeEnv usage (specific to this crate) + - id: deserialize-env-usage + message: "DeserializeEnv usage" + language: rust + rule: + any: + - kind: type_identifier + pattern: DeserializeEnv + - kind: call_expression + has: + field: function + pattern: deserialize_rule + + # Rule 8: Find serialization error handling + - id: serialization-errors + message: "Serialization error types" + language: rust + rule: + any: + - kind: type_identifier + regex: "(YamlError|SerializeError|DeserializeError)" + - kind: enum_variant + regex: "(Yaml|Serialize|Deserialize)" + + # Rule 9: Find Maybe wrapper usage (serialization helper) + - id: maybe-wrapper-usage + message: "Maybe wrapper for optional serialization" + language: rust + rule: + any: + - kind: type_identifier + pattern: Maybe + - kind: call_expression + has: + field: function + any: + - pattern: Maybe::Present + - pattern: Maybe::Absent + + # Rule 10: Find transform and conversion functions + - id: transform-functions + message: "Transformation functions for serialization" + language: rust + rule: + kind: function_item + any: + - has: + field: name + regex: "(transform|convert|deserialize|serialize)" + - has: + field: body + has: + any: + - pattern: deserialize + - pattern: serialize + + # Rule 11: Find config and rule creation patterns + - id: config-creation-patterns + message: "Configuration creation with serialization" + language: rust + rule: + any: + - kind: struct_expression + has: + field: name + regex: "(SerializableRule|SerializableRuleConfig|SerializableRuleCore)" + - kind: call_expression + has: + field: function + regex: "(from_str|from_yaml_string|try_from)" + +utils: + # Utility rule to find serialization-heavy files + serialization-heavy-file: + any: + - matches: serde-derive-usage + - matches: serde-imports + - matches: serializable-types diff --git a/crates/rule-engine/serialization_analysis/serialization_analysis_report.md b/crates/rule-engine/serialization_analysis/serialization_analysis_report.md new file mode 100644 index 0000000..2e384b2 --- /dev/null +++ b/crates/rule-engine/serialization_analysis/serialization_analysis_report.md @@ -0,0 +1,96 @@ + + +# SERIALIZATION DEPENDENCY ANALYSIS REPORT + +## Executive Summary + +- **Total files analyzed**: 9 +- **High-impact files**: 4 +- **Total serialization dependencies**: 110 + +## Dependency Categories + +- **Core Serialization**: 27 occurrences +- **Crate-Specific Serialization**: 74 occurrences +- **Serialization Operations**: 6 occurrences +- **Schema Generation**: 3 occurrences + +## Detailed Dependency Breakdown + +- **DeserializeEnvUsage**: 38 (Crate-Specific Serialization) +- **JsonSchemaUsage**: 3 (Schema Generation) +- **DeserializationCall**: 5 (Serialization Operations) +- **SerdeImport**: 15 (Core Serialization) +- **SerdeDerive**: 12 (Core Serialization) +- **SerializationCall**: 1 (Serialization Operations) +- **MaybeWrapper**: 36 (Crate-Specific Serialization) + +## High-Impact Files (Difficult to Separate) + +### src/check_var.rs + +- Serialization density: 2.1% +- Dependencies: 7 +- Core functions: 1 + +### src/fixer.rs + +- Serialization density: 7.9% +- Dependencies: 28 +- Core functions: 6 + +### src/rule_core.rs + +- Serialization density: 4.8% +- Dependencies: 22 +- Core functions: 27 + +### src/rule_config.rs + +- Serialization density: 1.8% +- Dependencies: 14 +- Core functions: 34 + +## SEPARATION STRATEGY + +### 1. Feature Gate Candidates (Easy wins) + +- `src/combined.rs` +- `src/rule_collection.rs` + +### 2. Core Logic Files (Keep in core) + +- `src/combined.rs` +- `src/lib.rs` +- `src/label.rs` +- `src/check_var.rs` +- `src/fixer.rs` +- `src/rule_core.rs` +- `src/rule_config.rs` + +### 3. Serialization-Only Files (Move to separate module) + +### 4. Need Abstraction Layer + +- `src/label.rs` + +### 5. Mixed Responsibility (Requires Refactoring) + +- `src/lib.rs` +- `src/maybe.rs` +- `src/check_var.rs` +- `src/fixer.rs` +- `src/rule_core.rs` +- `src/rule_config.rs` + +## RECOMMENDATIONS + +1. **Immediate actions**: Feature-gate files with low serialization impact +2. **Short-term**: Create abstraction layer for files needing it +3. **Medium-term**: Refactor mixed responsibility files +4. **Long-term**: Consider trait-based abstraction for core serialization needs diff --git a/crates/rule-engine/src/check_var.rs b/crates/rule-engine/src/check_var.rs new file mode 100644 index 0000000..3b34746 --- /dev/null +++ b/crates/rule-engine/src/check_var.rs @@ -0,0 +1,335 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::RuleCore; +use crate::fixer::{Fixer, FixerError}; +use crate::rule::Rule; +use crate::rule::referent_rule::RuleRegistration; +use crate::rule_config::RuleConfigError; +use crate::rule_core::RuleCoreError; +use crate::transform::{Transform, TransformError}; + +use thread_utils::{RapidMap, RapidSet}; + +type RResult = std::result::Result; + +pub enum CheckHint<'r> { + Global, + Normal, + Rewriter(&'r RapidSet<&'r str>), +} + +/// Different rule sections have different variable scopes/check procedure. +/// so we need to check rules with different hints. +pub fn check_rule_with_hint<'r>( + rule: &'r Rule, + utils: &'r RuleRegistration, + constraints: &'r RapidMap, + transform: &'r Option, + fixer: &Vec, + hint: CheckHint<'r>, +) -> RResult<()> { + match hint { + CheckHint::Global => { + // do not check utils defined here because global rules are not yet ready + check_vars(rule, utils, constraints, transform, fixer)?; + } + CheckHint::Normal => { + check_utils_defined(rule, constraints)?; + check_vars(rule, utils, constraints, transform, fixer)?; + } + // upper_vars is needed to check metavar defined in containing vars + CheckHint::Rewriter(upper_vars) => { + if fixer.is_empty() { + return Err(RuleCoreError::Fixer(FixerError::InvalidRewriter)); + } + check_utils_defined(rule, constraints)?; + check_vars_in_rewriter(rule, utils, constraints, transform, fixer, upper_vars)?; + } + } + Ok(()) +} + +fn check_vars_in_rewriter<'r>( + rule: &'r Rule, + utils: &'r RuleRegistration, + constraints: &'r RapidMap, + transform: &'r Option, + fixer: &Vec, + upper_var: &RapidSet<&str>, +) -> RResult<()> { + let vars = get_vars_from_rules(rule, utils); + let vars = check_var_in_constraints(vars, constraints)?; + let mut vars = check_var_in_transform(vars, transform)?; + for v in upper_var { + vars.insert(v); + } + check_var_in_fix(vars, fixer)?; + Ok(()) +} + +fn check_utils_defined(rule: &Rule, constraints: &RapidMap) -> RResult<()> { + rule.verify_util()?; + for constraint in constraints.values() { + constraint.verify_util()?; + } + Ok(()) +} + +fn check_vars<'r>( + rule: &'r Rule, + utils: &'r RuleRegistration, + constraints: &'r RapidMap, + transform: &'r Option, + fixer: &Vec, +) -> RResult<()> { + let vars = get_vars_from_rules(rule, utils); + let vars = check_var_in_constraints(vars, constraints)?; + let vars = check_var_in_transform(vars, transform)?; + check_var_in_fix(vars, fixer)?; + Ok(()) +} + +fn get_vars_from_rules<'r>(rule: &'r Rule, utils: &'r RuleRegistration) -> RapidSet<&'r str> { + let mut vars = rule.defined_vars(); + for var in utils.get_local_util_vars() { + vars.insert(var); + } + vars +} + +fn check_var_in_constraints<'r>( + mut vars: RapidSet<&'r str>, + constraints: &'r RapidMap, +) -> RResult> { + for rule in constraints.values() { + for var in rule.defined_vars() { + vars.insert(var); + } + } + for var in constraints.keys() { + let var: &str = var; + if !vars.contains(var) { + return Err(RuleCoreError::UndefinedMetaVar( + var.to_owned(), + "constraints", + )); + } + } + Ok(vars) +} + +fn check_var_in_transform<'r>( + mut vars: RapidSet<&'r str>, + transform: &'r Option, +) -> RResult> { + let Some(transform) = transform else { + return Ok(vars); + }; + for var in transform.keys() { + // vars already has the transform value. Report error! + if !vars.insert(var) { + return Err(RuleCoreError::Transform(TransformError::AlreadyDefined( + var.to_string(), + ))); + } + } + for trans in transform.values() { + let needed = trans.used_vars(); + if !vars.contains(needed) { + return Err(RuleCoreError::UndefinedMetaVar( + needed.to_string(), + "transform", + )); + } + } + Ok(vars) +} + +fn check_var_in_fix(vars: RapidSet<&str>, fixers: &Vec) -> RResult<()> { + for fixer in fixers { + for var in fixer.used_vars() { + if !vars.contains(&var) { + return Err(RuleCoreError::UndefinedMetaVar(var.to_string(), "fix")); + } + } + } + Ok(()) +} + +pub fn check_rewriters_in_transform( + rule: &RuleCore, + rewriters: &RapidMap, +) -> Result<(), RuleConfigError> { + if let Some(err) = check_one_rewriter_in_rule(rule, rewriters) { + return Err(err); + } + let error = rewriters + .values() + .find_map(|rewriter| check_one_rewriter_in_rule(rewriter, rewriters)); + if let Some(err) = error { + return Err(err); + } + Ok(()) +} + +fn check_one_rewriter_in_rule( + rule: &RuleCore, + rewriters: &RapidMap, +) -> Option { + let transform = rule.transform.as_ref()?; + let mut used_rewriters = transform + .values() + .flat_map(|trans| trans.used_rewriters().iter()); + let undefined_writers = used_rewriters.find(|r| !rewriters.contains_key(*r))?; + Some(RuleConfigError::UndefinedRewriter( + undefined_writers.to_string(), + )) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::TypeScript; + use crate::{DeserializeEnv, SerializableRuleCore, from_str}; + + #[test] + fn test_defined_vars() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str( + r" +rule: {pattern: $A = $B} +constraints: + A: { pattern: $C = $D } +transform: + E: + substring: + source: $B + startCar: 1", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + assert_eq!( + matcher.defined_vars(), + ["A", "B", "C", "D", "E"].into_iter().collect() + ); + } + + fn get_undefined(src: &str) -> (String, &str) { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str(src).expect("should deser"); + match ser_rule.get_matcher(env) { + Err(RuleCoreError::UndefinedMetaVar(name, section)) => (name, section), + _ => panic!("unexpected error"), + } + } + + #[test] + fn test_undefined_vars_in_constraints() { + let (name, section) = get_undefined( + r" +rule: {pattern: $A} +constraints: {B: {pattern: bbb}} +", + ); + assert_eq!(name, "B"); + assert_eq!(section, "constraints"); + } + #[test] + fn test_undefined_vars_in_transform() { + let (name, section) = get_undefined( + r" +rule: {pattern: $A} +constraints: {A: {pattern: $C}} +transform: + B: + replace: {source: $C, replace: a, by: b } + D: + replace: {source: $E, replace: a, by: b } +", + ); + assert_eq!(name, "E"); + assert_eq!(section, "transform"); + } + #[test] + fn test_undefined_vars_in_fix() { + let (name, section) = get_undefined( + r" +rule: {pattern: $A} +constraints: {A: {pattern: $C}} +transform: + B: + replace: {source: $C, replace: a, by: b } +fix: $D +", + ); + assert_eq!(name, "D"); + assert_eq!(section, "fix"); + } + + #[test] + fn test_defined_vars_in_utils() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str( + r" +rule: {matches: test} +utils: + test: { pattern: $B}", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + assert_eq!(matcher.defined_vars(), ["B"].into_iter().collect()); + } + + #[test] + fn test_use_vars_in_utils() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str( + r" +utils: + test: { pattern: $B } +rule: { matches: test } +fix: $B = 123", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + assert_eq!(matcher.defined_vars(), ["B"].into_iter().collect()); + } + + #[test] + fn test_defined_vars_cyclic() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str( + r" +rule: { matches: test1 } +utils: + test1: { pattern: $B, inside: {matches: test2} } + test2: { pattern: $A, has: {matches: test1} }", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + assert_eq!(matcher.defined_vars(), ["A", "B"].into_iter().collect()); + } + + #[test] + fn test_transform_already_defined() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = from_str( + r" +rule: { pattern: $A = $B } +transform: + B: { substring: { source: $A } }", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env); + match matcher { + Err(RuleCoreError::Transform(TransformError::AlreadyDefined(b))) => { + assert_eq!(b, "B"); + } + _ => panic!("unexpected error"), + } + } +} diff --git a/crates/rule-engine/src/combined.rs b/crates/rule-engine/src/combined.rs new file mode 100644 index 0000000..099350c --- /dev/null +++ b/crates/rule-engine/src/combined.rs @@ -0,0 +1,503 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::{RuleConfig, SerializableRule, SerializableRuleConfig, SerializableRuleCore, Severity}; + +use thread_ast_engine::language::Language; +use thread_ast_engine::matcher::{Matcher, MatcherExt}; +use thread_ast_engine::{AstGrep, Doc, Node, NodeMatch}; + +use thread_utils::{RapidMap, RapidSet, map_with_capacity}; + +pub struct ScanResult<'t, 'r, D: Doc, L: Language> { + pub diffs: Vec<(&'r RuleConfig, NodeMatch<'t, D>)>, + pub matches: Vec<(&'r RuleConfig, Vec>)>, +} + +/// store the index to the rule and the matched node +/// it will be converted to ScanResult by resolving the rule +struct ScanResultInner<'t, D: Doc> { + diffs: Vec<(usize, NodeMatch<'t, D>)>, + matches: RapidMap>>, + unused_suppressions: Vec>, +} + +impl<'t, D: Doc> ScanResultInner<'t, D> { + pub fn into_result<'r, L: Language>( + self, + combined: &CombinedScan<'r, L>, + separate_fix: bool, + ) -> ScanResult<'t, 'r, D, L> { + let mut diffs: Vec<_> = self + .diffs + .into_iter() + .map(|(idx, nm)| (combined.get_rule(idx), nm)) + .collect(); + let mut matches: Vec<_> = self + .matches + .into_iter() + .map(|(idx, nms)| (combined.get_rule(idx), nms)) + .collect(); + if let Some(rule) = combined.unused_suppression_rule { + if separate_fix { + diffs.extend(self.unused_suppressions.into_iter().map(|nm| (rule, nm))); + diffs.sort_unstable_by_key(|(_, nm)| nm.range().start); + } else if !self.unused_suppressions.is_empty() { + // do not push empty suppression to matches + let mut supprs = self.unused_suppressions; + supprs.sort_unstable_by_key(|nm| nm.range().start); + matches.push((rule, supprs)); + } + } + ScanResult { diffs, matches } + } +} + +enum SuppressKind { + /// suppress the whole file + File, + /// suppress specific line + Line(usize), +} + +fn get_suppression_kind(node: &Node<'_, impl Doc>) -> Option { + if !node.kind().contains("comment") || !node.text().contains(IGNORE_TEXT) { + return None; + } + let line = node.start_pos().line(); + let suppress_next_line = if let Some(prev) = node.prev() { + prev.start_pos().line() != line + } else { + true + }; + // if the first line is suppressed and the next line is empty, + // we suppress the whole file see gh #1541 + if line == 0 + && suppress_next_line + && node + .next() + .map(|next| next.start_pos().line() >= 2) + .unwrap_or(true) + { + return Some(SuppressKind::File); + } + let key = if suppress_next_line { line + 1 } else { line }; + Some(SuppressKind::Line(key)) +} + +struct Suppressions { + file: Option, + /// line number which may be suppressed + lines: RapidMap, +} + +impl Suppressions { + fn collect_all(root: &AstGrep) -> (Self, RapidMap>) { + let mut suppressions = Self { + file: None, + lines: RapidMap::default(), + }; + let mut suppression_nodes = RapidMap::default(); + for node in root.root().dfs() { + let is_all_suppressed = suppressions.collect(&node, &mut suppression_nodes); + if is_all_suppressed { + break; + } + } + (suppressions, suppression_nodes) + } + /// collect all suppression nodes from the root node + /// returns if the whole file need to be suppressed, including unused sup + /// see #1541 + fn collect<'r, D: Doc>( + &mut self, + node: &Node<'r, D>, + suppression_nodes: &mut RapidMap>, + ) -> bool { + let Some(sup) = get_suppression_kind(node) else { + return false; + }; + let suppressed = Suppression { + suppressed: parse_suppression_set(&node.text()), + node_id: node.node_id(), + }; + suppression_nodes.insert(node.node_id(), node.clone()); + match sup { + SuppressKind::File => { + let is_all_suppressed = suppressed.suppressed.is_none(); + self.file = Some(suppressed); + is_all_suppressed + } + SuppressKind::Line(key) => { + self.lines.insert( + key, + Suppression { + suppressed: parse_suppression_set(&node.text()), + node_id: node.node_id(), + }, + ); + false + } + } + } + + fn file_suppression(&self) -> MaySuppressed<'_> { + if let Some(sup) = &self.file { + MaySuppressed::Yes(sup) + } else { + MaySuppressed::No + } + } + + fn line_suppression(&self, node: &Node<'_, D>) -> MaySuppressed<'_> { + let line = node.start_pos().line(); + if let Some(sup) = self.lines.get(&line) { + MaySuppressed::Yes(sup) + } else { + MaySuppressed::No + } + } +} + +struct Suppression { + /// None = suppress all + suppressed: Option>, + node_id: usize, +} + +enum MaySuppressed<'a> { + Yes(&'a Suppression), + No, +} + +impl MaySuppressed<'_> { + fn suppressed_id(&self, rule_id: &str) -> Option { + let suppression = match self { + MaySuppressed::No => return None, + MaySuppressed::Yes(s) => s, + }; + if let Some(set) = &suppression.suppressed { + if set.contains(rule_id) { + Some(suppression.node_id) + } else { + None + } + } else { + Some(suppression.node_id) + } + } +} + +const IGNORE_TEXT: &str = "ast-grep-ignore"; + +/// A struct to group all rules according to their potential kinds. +/// This can greatly reduce traversal times and skip unmatchable rules. +/// Rules are referenced by their index in the rules vector. +pub struct CombinedScan<'r, L: Language> { + rules: Vec<&'r RuleConfig>, + /// a vec of vec, mapping from kind to a list of rule index + kind_rule_mapping: Vec>, + /// a rule for unused_suppressions + unused_suppression_rule: Option<&'r RuleConfig>, +} + +impl<'r, L: Language> CombinedScan<'r, L> { + pub fn new(mut rules: Vec<&'r RuleConfig>) -> Self { + // process fixable rule first, the order by id + // note, mapping.push will invert order so we sort fixable order in reverse + rules.sort_unstable_by_key(|r| (r.fix.is_some(), &r.id)); + let mut mapping = Vec::new(); + + // Pre-calculate the maximum kind to avoid repeated resizing + let max_kind = rules + .iter() + .filter_map(|rule| rule.matcher.potential_kinds()) + .map(|bitset| bitset.iter().max().unwrap_or(0)) + .max() + .unwrap_or(0); + + // Pre-allocate with known capacity to avoid allocations during insertion + mapping.resize(max_kind + 1, Vec::new()); + + for (idx, rule) in rules.iter().enumerate() { + let Some(kinds) = rule.matcher.potential_kinds() else { + eprintln!("rule `{}` must have kind", &rule.id); + continue; + }; + for kind in &kinds { + // Now we can safely index without bounds checking + mapping[kind].push(idx); + } + } + + // Shrink the mapping to remove empty vectors at the end + while let Some(last) = mapping.last() { + if last.is_empty() { + mapping.pop(); + } else { + break; + } + } + + Self { + rules, + kind_rule_mapping: mapping, + unused_suppression_rule: None, + } + } + + pub fn set_unused_suppression_rule(&mut self, rule: &'r RuleConfig) { + if matches!(rule.severity, Severity::Off) { + return; + } + self.unused_suppression_rule = Some(rule); + } + + pub fn scan<'a, D>(&self, root: &'a AstGrep, separate_fix: bool) -> ScanResult<'a, '_, D, L> + where + D: Doc, + { + let mut result = ScanResultInner { + diffs: Vec::with_capacity(32), // Pre-allocate for common case + matches: map_with_capacity(self.rules.len()), + unused_suppressions: Vec::with_capacity(8), + }; + let (suppressions, mut suppression_nodes) = Suppressions::collect_all(root); + let file_sup = suppressions.file_suppression(); + if let MaySuppressed::Yes(s) = file_sup { + if s.suppressed.is_none() { + return result.into_result(self, separate_fix); + } + } + for node in root.root().dfs() { + let kind = node.kind_id() as usize; + let Some(rule_idx) = self.kind_rule_mapping.get(kind) else { + continue; + }; + let line_sup = suppressions.line_suppression(&node); + for &idx in rule_idx { + let rule = &self.rules[idx]; + let Some(ret) = rule.matcher.match_node(node.clone()) else { + continue; + }; + if let Some(id) = file_sup.suppressed_id(&rule.id) { + suppression_nodes.remove(&id); + continue; + } + if let Some(id) = line_sup.suppressed_id(&rule.id) { + suppression_nodes.remove(&id); + continue; + } + if rule.fix.is_none() || !separate_fix { + let matches = result.matches.entry(idx).or_default(); + matches.push(ret); + } else { + result.diffs.push((idx, ret)); + } + } + } + result.unused_suppressions = suppression_nodes + .into_values() + .map(NodeMatch::from) + .collect(); + result.into_result(self, separate_fix) + } + + pub fn get_rule(&self, idx: usize) -> &'r RuleConfig { + self.rules[idx] + } + + pub fn unused_config(severity: Severity, lang: L) -> RuleConfig { + let rule: SerializableRule = crate::from_str(r#"{"any": []}"#).unwrap(); + let core = SerializableRuleCore { + rule, + constraints: None, + fix: crate::from_str(r#"''"#).unwrap(), + transform: None, + utils: None, + }; + let config = SerializableRuleConfig { + core, + id: "unused-suppression".to_string(), + severity, + files: None, + ignores: None, + language: lang, + message: "Unused 'ast-grep-ignore' directive.".into(), + metadata: None, + note: None, + rewriters: None, + url: None, + labels: None, + }; + RuleConfig::try_from(config, &Default::default()).unwrap() + } +} + +fn parse_suppression_set(text: &str) -> Option> { + let (_, after) = text.trim().split_once(IGNORE_TEXT)?; + let after = after.trim(); + if after.is_empty() { + return None; + } + let (_, rules) = after.split_once(':')?; + let set = rules.split(',').map(|r| r.trim().to_string()).collect(); + Some(set) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::SerializableRuleConfig; + use crate::from_str; + use crate::test::TypeScript; + use thread_ast_engine::tree_sitter::{LanguageExt, StrDoc}; + + fn create_rule() -> RuleConfig { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'console.log($A)'} +language: Tsx", + ) + .expect("parse"); + RuleConfig::try_from(rule, &Default::default()).expect("work") + } + + fn test_scan(source: &str, test_fn: F) + where + F: Fn( + Vec<( + &'_ RuleConfig, + Vec>>, + )>, + ), + { + let root = TypeScript::Tsx.ast_grep(source); + let rule = create_rule(); + let rules = vec![&rule]; + let scan = CombinedScan::new(rules); + let scanned = scan.scan(&root, false); + test_fn(scanned.matches); + } + + #[test] + fn test_ignore_node() { + let source = r#" + // ast-grep-ignore + console.log('ignored all') + console.log('no ignore') + // ast-grep-ignore: test + console.log('ignore one') + // ast-grep-ignore: not-test + console.log('ignore another') + // ast-grep-ignore: not-test, test + console.log('multiple ignore') + "#; + test_scan(source, |scanned| { + let matches = &scanned[0]; + assert_eq!(matches.1.len(), 2); + assert_eq!(matches.1[0].text(), "console.log('no ignore')"); + assert_eq!(matches.1[1].text(), "console.log('ignore another')"); + }); + } + + #[test] + fn test_ignore_node_same_line() { + let source = r#" + console.log('ignored all') // ast-grep-ignore + console.log('no ignore') + console.log('ignore one') // ast-grep-ignore: test + console.log('ignore another') // ast-grep-ignore: not-test + console.log('multiple ignore') // ast-grep-ignore: not-test, test + "#; + test_scan(source, |scanned| { + let matches = &scanned[0]; + assert_eq!(matches.1.len(), 2); + assert_eq!(matches.1[0].text(), "console.log('no ignore')"); + assert_eq!(matches.1[1].text(), "console.log('ignore another')"); + }); + } + + fn test_scan_unused(source: &str, test_fn: F) + where + F: Fn( + Vec<( + &'_ RuleConfig, + Vec>>, + )>, + ), + { + let root = TypeScript::Tsx.ast_grep(source); + let rule = create_rule(); + let rules = vec![&rule]; + let mut scan = CombinedScan::new(rules); + let mut unused = create_rule(); + unused.id = "unused-suppression".to_string(); + scan.set_unused_suppression_rule(&unused); + let scanned = scan.scan(&root, false); + test_fn(scanned.matches); + } + + #[test] + fn test_non_used_suppression() { + let source = r#" + console.log('no ignore') + console.debug('not used') // ast-grep-ignore: test + console.log('multiple ignore') // ast-grep-ignore: test + "#; + test_scan_unused(source, |scanned| { + assert_eq!(scanned.len(), 2); + let unused = &scanned[1]; + assert_eq!(unused.1.len(), 1); + assert_eq!(unused.1[0].text(), "// ast-grep-ignore: test"); + }); + } + + #[test] + fn test_file_suppression() { + let source = r#"// ast-grep-ignore: test + + console.log('ignored') + console.debug('report') // ast-grep-ignore: test + console.log('report') // ast-grep-ignore: test + "#; + test_scan_unused(source, |scanned| { + assert_eq!(scanned.len(), 1); + let unused = &scanned[0]; + assert_eq!(unused.1.len(), 2); + }); + let source = r#"// ast-grep-ignore: test + console.debug('above is not file sup') + console.log('not ignored') + "#; + test_scan_unused(source, |scanned| { + assert_eq!(scanned.len(), 2); + assert_eq!(scanned[0].0.id, "test"); + assert_eq!(scanned[1].0.id, "unused-suppression"); + }); + } + + #[test] + fn test_file_suppression_all() { + let source = r#"// ast-grep-ignore + + console.log('ignored') + console.debug('report') // ast-grep-ignore: test + console.log('report') // ast-grep-ignore + "#; + test_scan_unused(source, |scanned| { + assert_eq!(scanned.len(), 0); + }); + let source = r#"// ast-grep-ignore + + console.debug('no hit') + "#; + test_scan_unused(source, |scanned| { + assert_eq!(scanned.len(), 0); + }); + } +} diff --git a/crates/rule-engine/src/fixer.rs b/crates/rule-engine/src/fixer.rs new file mode 100644 index 0000000..4ca7b9e --- /dev/null +++ b/crates/rule-engine/src/fixer.rs @@ -0,0 +1,358 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::DeserializeEnv; +use crate::maybe::Maybe; +use crate::rule::{Relation, Rule, RuleSerializeError, StopBy}; +use crate::transform::Transformation; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use thread_ast_engine::replacer::{Content, Replacer, TemplateFix, TemplateFixError}; +use thread_ast_engine::{Doc, Language, Matcher, NodeMatch}; + +use std::ops::Range; +use thread_utils::{RapidMap, RapidSet}; + +/// A pattern string or fix object to auto fix the issue. +/// It can reference metavariables appeared in rule. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(untagged)] +pub enum SerializableFixer { + Str(String), + Config(Box), + List(Vec), +} + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct SerializableFixConfig { + template: String, + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + expand_end: Maybe, + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + expand_start: Maybe, + #[serde(skip_serializing_if = "Option::is_none")] + title: Option, +} + +#[derive(Error, Debug)] +pub enum FixerError { + #[error("Fixer template is invalid.")] + InvalidTemplate(#[from] TemplateFixError), + #[error("Fixer expansion contains invalid rule.")] + WrongExpansion(#[from] RuleSerializeError), + #[error("Rewriter must have exactly one fixer.")] + InvalidRewriter, + #[error("Fixer in list must have title.")] + MissingTitle, +} + +#[derive(Clone, Debug)] +struct Expansion { + matches: Rule, + stop_by: StopBy, +} + +impl Expansion { + fn parse( + relation: &Maybe, + env: &DeserializeEnv, + ) -> Result, FixerError> { + let inner = match relation { + Maybe::Absent => return Ok(None), + Maybe::Present(r) => r.clone(), + }; + let stop_by = StopBy::try_from(inner.stop_by, env)?; + let matches = env.deserialize_rule(inner.rule)?; + Ok(Some(Self { matches, stop_by })) + } +} + +#[derive(Clone, Debug)] +pub struct Fixer { + template: TemplateFix, + expand_start: Option, + expand_end: Option, + title: Option, +} + +impl Fixer { + fn do_parse( + serialized: &SerializableFixConfig, + env: &DeserializeEnv, + transform: &Option>, + ) -> Result { + let SerializableFixConfig { + template: fix, + expand_end, + expand_start, + title, + } = serialized; + let expand_start = Expansion::parse(expand_start, env)?; + let expand_end = Expansion::parse(expand_end, env)?; + let template = if let Some(trans) = transform { + let keys: Vec<_> = trans.keys().cloned().collect(); + TemplateFix::with_transform(fix, &env.lang, &keys) + } else { + TemplateFix::try_new(fix, &env.lang)? + }; + Ok(Self { + template, + expand_start, + expand_end, + title: title.clone(), + }) + } + + pub fn parse( + fixer: &SerializableFixer, + env: &DeserializeEnv, + transform: &Option>, + ) -> Result, FixerError> { + let ret = match fixer { + SerializableFixer::Str(fix) => Self::with_transform(fix, env, transform), + SerializableFixer::Config(cfg) => Self::do_parse(cfg, env, transform), + SerializableFixer::List(list) => { + return Self::parse_list(list, env, transform); + } + }; + Ok(vec![ret?]) + } + + fn parse_list( + list: &[SerializableFixConfig], + env: &DeserializeEnv, + transform: &Option>, + ) -> Result, FixerError> { + list.iter() + .map(|cfg| { + if cfg.title.is_none() { + return Err(FixerError::MissingTitle); + } + Self::do_parse(cfg, env, transform) + }) + .collect() + } + + pub(crate) fn with_transform( + fix: &str, + env: &DeserializeEnv, + transform: &Option>, + ) -> Result { + let template = if let Some(trans) = transform { + let keys: Vec<_> = trans.keys().cloned().collect(); + TemplateFix::with_transform(fix, &env.lang, &keys) + } else { + TemplateFix::try_new(fix, &env.lang)? + }; + Ok(Self { + template, + expand_end: None, + expand_start: None, + title: None, + }) + } + + pub fn from_str(src: &str, lang: &L) -> Result { + let template = TemplateFix::try_new(src, lang)?; + Ok(Self { + template, + expand_start: None, + expand_end: None, + title: None, + }) + } + + pub fn title(&self) -> Option<&str> { + self.title.as_deref() + } + + pub(crate) fn used_vars(&self) -> RapidSet<&str> { + self.template.used_vars() + } +} + +impl Replacer for Fixer +where + D: Doc, + C: Content, +{ + fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Vec { + // simple forwarding to template + self.template.generate_replacement(nm) + } + fn get_replaced_range(&self, nm: &NodeMatch<'_, D>, matcher: impl Matcher) -> Range { + let range = nm.range(); + if self.expand_start.is_none() && self.expand_end.is_none() { + return if let Some(len) = matcher.get_match_len(nm.get_node().clone()) { + range.start..range.start + len + } else { + range + }; + } + let start = expand_start(self.expand_start.as_ref(), nm); + let end = expand_end(self.expand_end.as_ref(), nm); + start..end + } +} + +fn expand_start(expansion: Option<&Expansion>, nm: &NodeMatch<'_, D>) -> usize { + let node = nm.get_node(); + let mut env = std::borrow::Cow::Borrowed(nm.get_env()); + let Some(start) = expansion else { + return node.range().start; + }; + let node = start.stop_by.find( + || node.prev(), + || node.prev_all(), + |n| start.matches.match_node_with_env(n, &mut env), + ); + node.map(|n| n.range().start) + .unwrap_or_else(|| nm.range().start) +} + +fn expand_end(expansion: Option<&Expansion>, nm: &NodeMatch<'_, D>) -> usize { + let node = nm.get_node(); + let mut env = std::borrow::Cow::Borrowed(nm.get_env()); + let Some(end) = expansion else { + return node.range().end; + }; + let node = end.stop_by.find( + || node.next(), + || node.next_all(), + |n| end.matches.match_node_with_env(n, &mut env), + ); + node.map(|n| n.range().end) + .unwrap_or_else(|| nm.range().end) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::maybe::Maybe; + use crate::test::TypeScript; + use thread_ast_engine::tree_sitter::LanguageExt; + + #[test] + fn test_parse() { + let fixer: SerializableFixer = from_str("test").expect("should parse"); + assert!(matches!(fixer, SerializableFixer::Str(_))); + } + + fn parse(config: SerializableFixConfig) -> Result { + let config = SerializableFixer::Config(Box::new(config)); + let env = DeserializeEnv::new(TypeScript::Tsx); + let fixer = Fixer::parse(&config, &env, &Some(Default::default()))?.remove(0); + Ok(fixer) + } + + #[test] + fn test_deserialize_object() -> Result<(), serde_yaml::Error> { + let src = "{template: 'abc', expandEnd: {regex: ',', stopBy: neighbor}}"; + let SerializableFixer::Config(cfg) = from_str(src)? else { + panic!("wrong parsing") + }; + assert_eq!(cfg.template, "abc"); + let Maybe::Present(relation) = cfg.expand_end else { + panic!("wrong parsing") + }; + let rule = relation.rule; + assert_eq!(rule.regex, Maybe::Present(",".to_string())); + assert!(rule.pattern.is_absent()); + Ok(()) + } + + #[test] + fn test_parse_config() -> Result<(), FixerError> { + let relation = from_str("{regex: ',', stopBy: neighbor}").expect("should deser"); + let config = SerializableFixConfig { + expand_end: Maybe::Present(relation), + expand_start: Maybe::Absent, + template: "abcd".to_string(), + title: None, + }; + let ret = parse(config)?; + assert!(ret.expand_start.is_none()); + assert!(ret.expand_end.is_some()); + assert!(matches!(ret.template, TemplateFix::Textual(_))); + Ok(()) + } + + #[test] + fn test_parse_str() -> Result<(), FixerError> { + let config = SerializableFixer::Str("abcd".to_string()); + let env = DeserializeEnv::new(TypeScript::Tsx); + let ret = Fixer::parse(&config, &env, &None)?.remove(0); + assert!(ret.expand_end.is_none()); + assert!(ret.expand_start.is_none()); + assert!(matches!(ret.template, TemplateFix::Textual(_))); + Ok(()) + } + + #[test] + fn test_replace_fixer() -> Result<(), FixerError> { + let expand_end = from_str("{regex: ',', stopBy: neighbor}").expect("should word"); + let config = SerializableFixConfig { + expand_end: Maybe::Present(expand_end), + expand_start: Maybe::Absent, + template: "var $A = 456".to_string(), + title: None, + }; + let fixer = parse(config)?; + let grep = TypeScript::Tsx.ast_grep("let a = 123"); + let node = grep.root().find("let $A = 123").expect("should found"); + let edit = fixer.generate_replacement(&node); + assert_eq!(String::from_utf8_lossy(&edit), "var a = 456"); + Ok(()) + } + + #[test] + fn test_replace_range() -> Result<(), FixerError> { + use thread_ast_engine::matcher::KindMatcher; + let expand_end = from_str("{regex: ',', stopBy: neighbor}").expect("should word"); + let config = SerializableFixConfig { + expand_end: Maybe::Present(expand_end), + expand_start: Maybe::Absent, + template: "c: 456".to_string(), + title: None, + }; + let fixer = parse(config)?; + let grep = TypeScript::Tsx.ast_grep("var a = { b: 123, }"); + let matcher = KindMatcher::new("pair", TypeScript::Tsx); + let node = grep.root().find(&matcher).expect("should found"); + let edit = node.make_edit(&matcher, &fixer); + let text = String::from_utf8_lossy(&edit.inserted_text); + assert_eq!(text, "c: 456"); + assert_eq!(edit.position, 10); + assert_eq!(edit.deleted_length, 7); + Ok(()) + } + + #[test] + fn test_fixer_list() -> Result<(), FixerError> { + let config: SerializableFixer = from_str( + r" +- { template: 'abc', title: 'fixer 1'} +- { template: 'def', title: 'fixer 2'}", + ) + .expect("should parse"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let fixers = Fixer::parse(&config, &env, &Some(Default::default()))?; + assert_eq!(fixers.len(), 2); + let config: SerializableFixer = from_str( + r" +- { template: 'abc', title: 'fixer 1'} +- { template: 'def'}", + ) + .expect("should parse"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let ret = Fixer::parse(&config, &env, &Some(Default::default())); + assert!(ret.is_err()); + Ok(()) + } +} diff --git a/crates/rule-engine/src/label.rs b/crates/rule-engine/src/label.rs new file mode 100644 index 0000000..157884d --- /dev/null +++ b/crates/rule-engine/src/label.rs @@ -0,0 +1,164 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::ops::Range; +use thread_ast_engine::{Doc, Node, NodeMatch}; +use thread_utils::RapidMap; + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub enum LabelStyle { + /// Labels that describe the primary cause of a diagnostic. + Primary, + /// Labels that provide additional context for a diagnostic. + Secondary, +} + +#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +pub struct LabelConfig { + pub style: LabelStyle, + pub message: Option, +} + +/// A label is a way to mark a specific part of the code with a styled message. +/// It is used to provide diagnostic information in LSP or CLI. +/// 'r represents a lifetime for the message string from `rule`. +/// 't represents a lifetime for the node from a ast `tree`. +#[derive(Clone)] +pub struct Label<'r, 't, D: Doc> { + pub style: LabelStyle, + pub message: Option<&'r str>, + pub start_node: Node<'t, D>, + pub end_node: Node<'t, D>, +} + +impl<'t, D: Doc> Label<'_, 't, D> { + fn primary(n: &Node<'t, D>) -> Self { + Self { + style: LabelStyle::Primary, + start_node: n.clone(), + end_node: n.clone(), + message: None, + } + } + fn secondary(n: &Node<'t, D>) -> Self { + Self { + style: LabelStyle::Secondary, + start_node: n.clone(), + end_node: n.clone(), + message: None, + } + } + + pub fn range(&self) -> Range { + let start = self.start_node.range().start; + let end = self.end_node.range().end; + start..end + } +} + +pub fn get_labels_from_config<'r, 't, D: Doc>( + config: &'r RapidMap, + node_match: &NodeMatch<'t, D>, +) -> Vec> { + let env = node_match.get_env(); + config + .iter() + .filter_map(|(var, conf)| { + let (start, end) = if let Some(n) = env.get_match(var) { + (n.clone(), n.clone()) + } else { + let ns = env.get_multiple_matches(var); + let start = ns.first()?.clone(); + let end = ns.last()?.clone(); + (start, end) + }; + Some(Label { + style: conf.style.clone(), + message: conf.message.as_deref(), + start_node: start, + end_node: end, + }) + }) + .collect() +} + +pub fn get_default_labels<'t, D: Doc>(n: &NodeMatch<'t, D>) -> Vec> { + let mut ret = vec![Label::primary(n)]; + if let Some(secondary) = n.get_env().get_labels("secondary") { + ret.extend(secondary.iter().map(Label::secondary)); + } + ret +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TypeScript; + use thread_ast_engine::matcher::Pattern; + use thread_ast_engine::tree_sitter::LanguageExt; + use thread_ast_engine::tree_sitter::StrDoc; + + #[test] + fn test_label_primary_secondary() { + let doc = TypeScript::Tsx.ast_grep("let a = 1;"); + let root = doc.root(); + let label = Label::primary(&root); + assert_eq!(label.style, LabelStyle::Primary); + assert_eq!(label.range(), root.range()); + let label2 = Label::<'_, '_, StrDoc>::secondary(&root); + assert_eq!(label2.style, LabelStyle::Secondary); + } + + #[test] + fn test_get_labels_from_config_single() { + let doc = TypeScript::Tsx.ast_grep("let foo = 42;"); + let pattern = Pattern::try_new("let $A = $B;", TypeScript::Tsx).unwrap(); + let m = doc.root().find(pattern).unwrap(); + let mut config = thread_utils::RapidMap::default(); + config.insert( + "A".to_string(), + LabelConfig { + style: LabelStyle::Primary, + message: Some("var label".to_string()), + }, + ); + let labels = get_labels_from_config(&config, &m); + assert_eq!(labels.len(), 1); + assert_eq!(labels[0].style, LabelStyle::Primary); + assert_eq!(labels[0].message, Some("var label")); + } + + #[test] + fn test_get_labels_from_config_multiple() { + let doc = TypeScript::Tsx.ast_grep("let foo = 42, bar = 99;"); + let pattern = Pattern::try_new("let $A = $B, $C = $D;", TypeScript::Tsx).unwrap(); + let m = doc.root().find(pattern).unwrap(); + let mut config = thread_utils::RapidMap::default(); + config.insert( + "A".to_string(), + LabelConfig { + style: LabelStyle::Secondary, + message: None, + }, + ); + let labels = get_labels_from_config(&config, &m); + assert_eq!(labels.len(), 1); + assert_eq!(labels[0].style, LabelStyle::Secondary); + } + + #[test] + fn test_get_default_labels() { + let doc = TypeScript::Tsx.ast_grep("let foo = 42;"); + let pattern = Pattern::try_new("let $A = $B;", TypeScript::Tsx).unwrap(); + let m = doc.root().find(pattern).unwrap(); + let labels = get_default_labels(&m); + assert!(!labels.is_empty()); + assert_eq!(labels[0].style, LabelStyle::Primary); + } +} diff --git a/crates/rule-engine/src/lib.rs b/crates/rule-engine/src/lib.rs new file mode 100644 index 0000000..1bcf92b --- /dev/null +++ b/crates/rule-engine/src/lib.rs @@ -0,0 +1,241 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +#![feature(portable_simd)] + +mod check_var; +mod combined; +mod fixer; +mod label; +mod maybe; +mod rule; +mod rule_collection; +mod rule_config; +mod rule_core; +mod transform; + +use serde::Deserialize; +use serde_yaml::{Deserializer, Error as YamlError, with::singleton_map_recursive::deserialize}; + +use thread_ast_engine::language::Language; + +pub use combined::CombinedScan; +pub use fixer::Fixer; +pub use label::{Label, LabelStyle}; +pub use rule::DeserializeEnv; +pub use rule::referent_rule::GlobalRules; +pub use rule::{Rule, RuleSerializeError, SerializableRule}; +pub use rule_collection::RuleCollection; +pub use rule_config::{Metadata, RuleConfig, RuleConfigError, SerializableRuleConfig, Severity}; +pub use rule_core::{RuleCore, RuleCoreError, SerializableRuleCore}; +pub use transform::Transformation; + +pub fn from_str<'de, T: Deserialize<'de>>(s: &'de str) -> Result { + let deserializer = Deserializer::from_str(s); + deserialize(deserializer) +} + +pub fn from_yaml_string<'a, L: Language + Deserialize<'a>>( + yamls: &'a str, + registration: &GlobalRules, +) -> Result>, RuleConfigError> { + let mut ret = vec![]; + for yaml in Deserializer::from_str(yamls) { + let config = RuleConfig::deserialize(yaml, registration)?; + ret.push(config); + } + Ok(ret) +} +#[cfg(test)] +mod test { + use super::*; + use std::path::Path; + use thread_ast_engine::Language; + use thread_ast_engine::matcher::{Pattern, PatternBuilder, PatternError}; + use thread_ast_engine::tree_sitter::{LanguageExt, StrDoc, TSLanguage}; + + #[derive(Clone, Debug, Deserialize, PartialEq, Eq)] + pub enum TypeScript { + Tsx, + } + impl Language for TypeScript { + fn kind_to_id(&self, kind: &str) -> u16 { + TSLanguage::from(tree_sitter_typescript::LANGUAGE_TSX).id_for_node_kind(kind, true) + } + fn field_to_id(&self, field: &str) -> Option { + TSLanguage::from(tree_sitter_typescript::LANGUAGE_TSX) + .field_id_for_name(field) + .map(|f| f.get()) + } + fn from_path>(_path: P) -> Option { + Some(TypeScript::Tsx) + } + fn build_pattern(&self, builder: &PatternBuilder) -> Result { + builder.build(|src| StrDoc::try_new(src, self.clone())) + } + } + impl LanguageExt for TypeScript { + fn get_ts_language(&self) -> TSLanguage { + tree_sitter_typescript::LANGUAGE_TSX.into() + } + } + + fn test_rule_match(yaml: &str, source: &str) { + let globals = GlobalRules::default(); + let config = &from_yaml_string::(yaml, &globals).expect("rule should parse")[0]; + let grep = config.language.ast_grep(source); + assert!(grep.root().find(&config.matcher).is_some()); + } + + fn test_rule_unmatch(yaml: &str, source: &str) { + let globals = GlobalRules::default(); + let config = &from_yaml_string::(yaml, &globals).expect("rule should parse")[0]; + let grep = config.language.ast_grep(source); + assert!(grep.root().find(&config.matcher).is_none()); + } + + fn make_yaml(rule: &str) -> String { + format!( + r" +id: test +message: test rule +severity: info +language: Tsx +rule: +{rule} +" + ) + } + + #[test] + fn test_deserialize_rule_config() { + let yaml = &make_yaml( + " + pattern: let a = 123 +", + ); + test_rule_match(yaml, "let a = 123; let b = 33;"); + test_rule_match(yaml, "class B { func() {let a = 123; }}"); + test_rule_unmatch(yaml, "const a = 33"); + } + + #[test] + fn test_deserialize_nested() { + let yaml = &make_yaml( + " + all: + - pattern: let $A = 123 + - pattern: let a = $B +", + ); + test_rule_match(yaml, "let a = 123; let b = 33;"); + test_rule_match(yaml, "class B { func() {let a = 123; }}"); + test_rule_unmatch(yaml, "const a = 33"); + test_rule_unmatch(yaml, "let a = 33"); + } + + #[test] + fn test_deserialize_kind() { + let yaml = &make_yaml( + " + kind: class_body +", + ); + test_rule_match(yaml, "class B { func() {let a = 123; }}"); + test_rule_unmatch(yaml, "const B = { func() {let a = 123; }}"); + } + + #[test] + fn test_deserialize_inside() { + let yaml = &make_yaml( + " + all: + - inside: + kind: class_body + stopBy: end + - pattern: let a = 123 +", + ); + test_rule_unmatch(yaml, "let a = 123; let b = 33;"); + test_rule_match(yaml, "class B { func() {let a = 123; }}"); + test_rule_unmatch(yaml, "let a = 123"); + } + + #[test] + fn test_deserialize_not_inside() { + let yaml = &make_yaml( + " + all: + - not: + inside: + kind: class_body + stopBy: end + - pattern: let a = 123 +", + ); + test_rule_match(yaml, "let a = 123; let b = 33;"); + test_rule_unmatch(yaml, "class B { func() {let a = 123; }}"); + test_rule_unmatch(yaml, "let a = 13"); + } + + #[test] + fn test_deserialize_meta_var() { + let yaml = &make_yaml( + " + all: + - inside: + any: + - pattern: function $A($$$) { $$$ } + - pattern: let $A = ($$$) => $$$ + stopBy: end + - pattern: $A($$$) +", + ); + test_rule_match(yaml, "function recursion() { recursion() }"); + test_rule_match(yaml, "let recursion = () => { recursion() }"); + test_rule_unmatch(yaml, "function callOther() { other() }"); + } + + #[test] + fn test_deserialize_constraints() { + let yaml = r" +id: test +message: test rule +severity: info +language: Tsx +rule: + all: + - pattern: console.log($A) + - inside: + pattern: function $B() {$$$} + stopBy: end +constraints: + B: + regex: test +"; + test_rule_match(yaml, "function test() { console.log(1) }"); + test_rule_match(yaml, "function test() { console.log(2) }"); + test_rule_unmatch(yaml, "function tt() { console.log(2) }"); + } + + // https://github.com/ast-grep/ast-grep/issues/813 + #[test] + fn test_util_rule_with_vaargs() { + let yaml = r" +id: sibling +language: Tsx +utils: + utilpat: + pattern: '$A($$$B);' +rule: + matches: utilpat + follows: + matches: utilpat + stopBy: end +"; + test_rule_match(yaml, "a();a(123);a();a(123)"); + } +} diff --git a/crates/rule-engine/src/maybe.rs b/crates/rule-engine/src/maybe.rs new file mode 100644 index 0000000..78c0765 --- /dev/null +++ b/crates/rule-engine/src/maybe.rs @@ -0,0 +1,152 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use schemars::{JsonSchema, Schema, SchemaGenerator}; +use serde::{Deserialize, Serialize, de, ser}; +use std::borrow::Cow; + +#[derive(Clone, PartialEq, Eq, Copy, Default, Debug)] +pub enum Maybe { + #[default] + Absent, + Present(T), +} + +impl Maybe { + pub fn is_present(&self) -> bool { + matches!(self, Maybe::Present(_)) + } + pub fn is_absent(&self) -> bool { + matches!(self, Maybe::Absent) + } + pub fn unwrap(self) -> T { + match self { + Maybe::Absent => panic!("called `Maybe::unwrap()` on an `Absent` value"), + Maybe::Present(t) => t, + } + } +} + +impl From> for Option { + fn from(maybe: Maybe) -> Self { + match maybe { + Maybe::Present(v) => Some(v), + Maybe::Absent => None, + } + } +} + +impl From> for Maybe { + fn from(opt: Option) -> Maybe { + match opt { + Some(v) => Maybe::Present(v), + None => Maybe::Absent, + } + } +} + +const ERROR_STR: &str = r#"Maybe fields need to be annotated with: + #[serde(default, skip_serializing_if = "Maybe::is_absent")]"#; + +impl Serialize for Maybe { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Maybe::Absent => Err(ser::Error::custom(ERROR_STR)), + Maybe::Present(t) => T::serialize(t, serializer), + } + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Maybe { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + match Option::deserialize(deserializer)? { + Some(t) => Ok(Maybe::Present(t)), + None => Err(de::Error::custom("Maybe field cannot be null.")), + } + } +} + +impl JsonSchema for Maybe { + fn schema_name() -> Cow<'static, str> { + Cow::Owned(format!("Maybe<{}>", T::schema_name())) + } + fn schema_id() -> Cow<'static, str> { + Cow::Owned(format!("Maybe<{}>", T::schema_id())) + } + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + generator.subschema_for::() + } + + fn _schemars_private_non_optional_json_schema(generator: &mut SchemaGenerator) -> Schema { + T::_schemars_private_non_optional_json_schema(generator) + } + + fn _schemars_private_is_option() -> bool { + true + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + + #[derive(Serialize, Deserialize, Debug, Clone)] + struct Correct { + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + a: Maybe, + } + #[derive(Serialize, Deserialize, Debug, Clone)] + struct Wrong { + #[serde(skip_serializing_if = "Maybe::is_absent")] + a: Maybe, + } + + #[test] + fn test_de_correct_ok() { + let correct: Correct = from_str("a: 123").expect("should ok"); + assert!(matches!(correct.a, Maybe::Present(123))); + let correct: Correct = from_str("").expect("should ok"); + assert!(matches!(correct.a, Maybe::Absent)); + } + #[test] + fn test_de_correct_err() { + let ret: Result = from_str("a:"); + assert!(ret.is_err()); + let err = ret.unwrap_err().to_string(); + assert!(err.contains("cannot be null")); + } + #[test] + fn test_de_wrong_err() { + let wrong: Wrong = from_str("a: 123").expect("should ok"); + assert!(matches!(wrong.a, Maybe::Present(123))); + let wrong: Result = from_str("a:"); + assert!(wrong.is_err()); + let wrong: Result = from_str(""); + assert!(wrong.is_err()); + } + + #[test] + #[should_panic] + fn test_unwrap_absent() { + let nothing: Maybe<()> = Maybe::Absent; + nothing.unwrap(); + } + + #[test] + fn test_from_option() { + let mut maybe = Maybe::from(None); + assert!(maybe.is_absent()); + maybe = Maybe::from(Some(123)); + assert!(maybe.is_present()); + } +} diff --git a/crates/rule-engine/src/rule/deserialize_env.rs b/crates/rule-engine/src/rule/deserialize_env.rs new file mode 100644 index 0000000..a746d58 --- /dev/null +++ b/crates/rule-engine/src/rule/deserialize_env.rs @@ -0,0 +1,337 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::referent_rule::{GlobalRules, ReferentRuleError, RuleRegistration}; +use crate::check_var::CheckHint; +use crate::maybe::Maybe; +use crate::rule::{self, Rule, RuleSerializeError, SerializableRule}; +use crate::rule_core::{RuleCoreError, SerializableRuleCore}; +use crate::transform::Trans; +use thread_ast_engine::meta_var::MetaVariable; + +use thread_ast_engine::language::Language; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use thread_utils::RapidMap; + +#[derive(Serialize, Deserialize, Clone, JsonSchema)] +pub struct SerializableGlobalRule { + #[serde(flatten)] + pub core: SerializableRuleCore, + /// Unique, descriptive identifier, e.g., no-unused-variable + pub id: String, + /// Specify the language to parse and the file extension to include in matching. + pub language: L, +} + +fn into_map( + rules: Vec>, +) -> RapidMap { + rules + .into_iter() + .map(|r| (r.id, (r.language, r.core))) + .collect() +} + +type OrderResult = Result; + +/// A struct to store information to deserialize rules. +#[derive(Clone, Debug)] +pub struct DeserializeEnv { + /// registration for global utility rules and local utility rules. + pub(crate) registration: RuleRegistration, + /// current rules' language + pub(crate) lang: L, +} + +trait DependentRule: Sized { + fn visit_dependency<'a>(&'a self, sorter: &mut TopologicalSort<'a, Self>) -> OrderResult<()>; +} + +impl DependentRule for SerializableRule { + fn visit_dependency<'a>(&'a self, sorter: &mut TopologicalSort<'a, Self>) -> OrderResult<()> { + visit_dependent_rule_ids(self, sorter) + } +} + +impl DependentRule for (L, SerializableRuleCore) { + fn visit_dependency<'a>(&'a self, sorter: &mut TopologicalSort<'a, Self>) -> OrderResult<()> { + visit_dependent_rule_ids(&self.1.rule, sorter) + } +} + +impl DependentRule for Trans { + fn visit_dependency<'a>(&'a self, sorter: &mut TopologicalSort<'a, Self>) -> OrderResult<()> { + let used_var = self.used_vars(); + sorter.visit(used_var) + } +} + +/// A struct to topological sort rules +/// it is used to report cyclic dependency errors in rules/transformation +struct TopologicalSort<'a, T: DependentRule> { + maps: &'a RapidMap, + order: Vec<&'a str>, + // bool stands for if the rule has completed visit + seen: RapidMap<&'a str, bool>, +} + +impl<'a, T: DependentRule> TopologicalSort<'a, T> { + fn get_order(maps: &RapidMap) -> OrderResult> { + let mut top_sort = TopologicalSort::new(maps); + for key in maps.keys() { + top_sort.visit(key)?; + } + Ok(top_sort.order) + } + + fn new(maps: &'a RapidMap) -> Self { + Self { + maps, + order: vec![], + seen: RapidMap::default(), + } + } + + fn visit(&mut self, key: &'a str) -> OrderResult<()> { + if let Some(&completed) = self.seen.get(key) { + // if the rule has been seen but not completed + // it means we have a cyclic dependency and report an error here + return if completed { + Ok(()) + } else { + Err(key.to_string()) + }; + } + let Some(item) = self.maps.get(key) else { + // key can be found elsewhere + // e.g. if key is rule_id + // if rule_id not found in global, it can be a local rule + // if rule_id not found in local, it can be a global rule + // TODO: add check here and return Err if rule not found + return Ok(()); + }; + // mark the id as seen but not completed + self.seen.insert(key, false); + item.visit_dependency(self)?; + // mark the id as seen and completed + self.seen.insert(key, true); + self.order.push(key); + Ok(()) + } +} + +fn visit_dependent_rule_ids<'a, T: DependentRule>( + rule: &'a SerializableRule, + sort: &mut TopologicalSort<'a, T>, +) -> OrderResult<()> { + // handle all composite rule here + if let Maybe::Present(matches) = &rule.matches { + sort.visit(matches)?; + } + if let Maybe::Present(all) = &rule.all { + for sub in all { + visit_dependent_rule_ids(sub, sort)?; + } + } + if let Maybe::Present(any) = &rule.any { + for sub in any { + visit_dependent_rule_ids(sub, sort)?; + } + } + if let Maybe::Present(not) = &rule.not { + visit_dependent_rule_ids(not, sort)?; + } + Ok(()) +} + +impl DeserializeEnv { + pub fn new(lang: L) -> Self { + Self { + registration: Default::default(), + lang, + } + } + + /// register utils rule in the DeserializeEnv for later usage. + /// N.B. This function will manage the util registration order + /// by their dependency. `potential_kinds` need ordered insertion. + pub fn with_utils( + self, + utils: &RapidMap, + ) -> Result { + let order = TopologicalSort::get_order(utils) + .map_err(ReferentRuleError::CyclicRule) + .map_err(RuleSerializeError::MatchesReference)?; + for id in order { + let rule = utils.get(id).expect("must exist"); + let rule = self.deserialize_rule(rule.clone())?; + self.registration.insert_local(id, rule)?; + } + Ok(self) + } + + /// register global utils rule discovered in the config. + pub fn parse_global_utils( + utils: Vec>, + ) -> Result { + let registration = GlobalRules::default(); + let utils = into_map(utils); + let order = TopologicalSort::get_order(&utils) + .map_err(ReferentRuleError::CyclicRule) + .map_err(RuleSerializeError::from)?; + for id in order { + let (lang, core) = utils.get(id).expect("must exist"); + let env = DeserializeEnv::new(lang.clone()).with_globals(®istration); + let matcher = core.get_matcher_with_hint(env, CheckHint::Global)?; + registration + .insert(id, matcher) + .map_err(RuleSerializeError::MatchesReference)?; + } + Ok(registration) + } + + pub fn deserialize_rule( + &self, + serialized: SerializableRule, + ) -> Result { + rule::deserialize_rule(serialized, self) + } + + pub(crate) fn get_transform_order<'a>( + &self, + trans: &'a RapidMap>, + ) -> Result, String> { + TopologicalSort::get_order(trans) + } + + pub fn with_globals(self, globals: &GlobalRules) -> Self { + Self { + registration: RuleRegistration::from_globals(globals), + lang: self.lang, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::TypeScript; + use crate::{Rule, from_str}; + use thread_ast_engine::Matcher; + use thread_ast_engine::tree_sitter::LanguageExt; + + type Result = std::result::Result; + + fn get_dependent_utils() -> Result<(Rule, DeserializeEnv)> { + let utils = from_str( + " +accessor-name: + matches: member-name + regex: whatever +member-name: + kind: identifier +", + ) + .expect("failed to parse utils"); + let env = DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils)?; + assert_eq!(utils.keys().count(), 2); + let rule = from_str("matches: accessor-name").unwrap(); + Ok(( + env.deserialize_rule(rule).unwrap(), + env, // env is required for weak ref + )) + } + + #[test] + fn test_local_util_matches() -> Result<()> { + let (rule, _env) = get_dependent_utils()?; + let grep = TypeScript::Tsx.ast_grep("whatever"); + assert!(grep.root().find(rule).is_some()); + Ok(()) + } + + #[test] + #[ignore = "TODO, need to figure out potential_kinds"] + fn test_local_util_kinds() -> Result<()> { + // run multiple times to avoid accidental working order due to FastMap randomness + for _ in 0..10 { + let (rule, _env) = get_dependent_utils()?; + assert!(rule.potential_kinds().is_some()); + } + Ok(()) + } + + #[test] + fn test_using_global_rule_in_local() -> Result<()> { + let utils = from_str( + " +local-rule: + matches: global-rule +", + ) + .expect("failed to parse utils"); + // should not panic + DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils)?; + Ok(()) + } + + #[test] + fn test_using_cyclic_local() -> Result<()> { + let utils = from_str( + " +local-rule: + matches: local-rule +", + ) + .expect("failed to parse utils"); + let ret = DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils); + assert!(ret.is_err()); + Ok(()) + } + + #[test] + fn test_using_transitive_cycle() -> Result<()> { + let utils = from_str( + " +local-rule-a: + matches: local-rule-b +local-rule-b: + all: + - matches: local-rule-c +local-rule-c: + any: + - matches: local-rule-a +", + ) + .expect("failed to parse utils"); + let ret = DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils); + assert!(ret.is_err()); + Ok(()) + } + + #[test] + fn test_cyclic_not() -> Result<()> { + let utils = from_str( + " +local-rule-a: + not: {matches: local-rule-b} +local-rule-b: + matches: local-rule-a", + ) + .expect("failed to parse utils"); + let ret = DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils); + assert!(matches!( + ret, + Err(RuleSerializeError::MatchesReference( + ReferentRuleError::CyclicRule(_) + )) + )); + Ok(()) + } +} diff --git a/crates/rule-engine/src/rule/mod.rs b/crates/rule-engine/src/rule/mod.rs new file mode 100644 index 0000000..bfbecdd --- /dev/null +++ b/crates/rule-engine/src/rule/mod.rs @@ -0,0 +1,670 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +mod deserialize_env; +mod nth_child; +mod range; +pub mod referent_rule; +mod relational_rule; +mod stop_by; + +pub use deserialize_env::DeserializeEnv; +pub use relational_rule::Relation; +pub use stop_by::StopBy; + +use crate::maybe::Maybe; +use nth_child::{NthChild, NthChildError, SerializableNthChild}; +use range::{RangeMatcher, RangeMatcherError, SerializableRange}; +use referent_rule::{ReferentRule, ReferentRuleError}; +use relational_rule::{Follows, Has, Inside, Precedes}; + +use thread_ast_engine::language::Language; +use thread_ast_engine::matcher::{KindMatcher, KindMatcherError, RegexMatcher, RegexMatcherError}; +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::{Doc, Node, ops as o}; +use thread_ast_engine::{MatchStrictness, Matcher, Pattern, PatternError}; + +use bit_set::BitSet; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use thiserror::Error; +use thread_utils::RapidSet; + +/// A rule object to find matching AST nodes. We have three categories of rules in ast-grep. +/// +/// * Atomic: the most basic rule to match AST. We have two variants: Pattern and Kind. +/// +/// * Relational: filter matched target according to their position relative to other nodes. +/// +/// * Composite: use logic operation all/any/not to compose the above rules to larger rules. +/// +/// Every rule has it's unique name so we can combine several rules in one object. +#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct SerializableRule { + // avoid embedding AtomicRule/RelationalRule/CompositeRule with flatten here for better error message + + // atomic + /// A pattern string or a pattern object. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub pattern: Maybe, + /// The kind name of the node to match. You can look up code's kind names in playground. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub kind: Maybe, + /// A Rust regular expression to match the node's text. https://docs.rs/regex/latest/regex/#syntax + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub regex: Maybe, + /// `nth_child` accepts number, string or object. + /// It specifies the position in nodes' sibling list. + #[serde(default, skip_serializing_if = "Maybe::is_absent", rename = "nthChild")] + pub nth_child: Maybe, + /// `range` accepts a range object. + /// the target node must exactly appear in the range. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub range: Maybe, + + // relational + /// `inside` accepts a relational rule object. + /// the target node must appear inside of another node matching the `inside` sub-rule. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub inside: Maybe>, + /// `has` accepts a relational rule object. + /// the target node must has a descendant node matching the `has` sub-rule. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub has: Maybe>, + /// `precedes` accepts a relational rule object. + /// the target node must appear before another node matching the `precedes` sub-rule. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub precedes: Maybe>, + /// `follows` accepts a relational rule object. + /// the target node must appear after another node matching the `follows` sub-rule. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub follows: Maybe>, + // composite + /// A list of sub rules and matches a node if all of sub rules match. + /// The meta variables of the matched node contain all variables from the sub-rules. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub all: Maybe>, + /// A list of sub rules and matches a node if any of sub rules match. + /// The meta variables of the matched node only contain those of the matched sub-rule. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub any: Maybe>, + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + /// A single sub-rule and matches a node if the sub rule does not match. + pub not: Maybe>, + /// A utility rule id and matches a node if the utility rule matches. + #[serde(default, skip_serializing_if = "Maybe::is_absent")] + pub matches: Maybe, +} + +struct Categorized { + pub atomic: AtomicRule, + pub relational: RelationalRule, + pub composite: CompositeRule, +} + +impl SerializableRule { + fn categorized(self) -> Categorized { + Categorized { + atomic: AtomicRule { + pattern: self.pattern.into(), + kind: self.kind.into(), + regex: self.regex.into(), + nth_child: self.nth_child.into(), + range: self.range.into(), + }, + relational: RelationalRule { + inside: self.inside.into(), + has: self.has.into(), + precedes: self.precedes.into(), + follows: self.follows.into(), + }, + composite: CompositeRule { + all: self.all.into(), + any: self.any.into(), + not: self.not.into(), + matches: self.matches.into(), + }, + } + } +} + +pub struct AtomicRule { + pub pattern: Option, + pub kind: Option, + pub regex: Option, + pub nth_child: Option, + pub range: Option, +} +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum Strictness { + /// all nodes are matched + Cst, + /// all nodes except source trivial nodes are matched. + Smart, + /// only ast nodes are matched + Ast, + /// ast-nodes excluding comments are matched + Relaxed, + /// ast-nodes excluding comments, without text + Signature, +} + +impl From for Strictness { + fn from(value: MatchStrictness) -> Self { + use MatchStrictness as M; + use Strictness as S; + match value { + M::Cst => S::Cst, + M::Smart => S::Smart, + M::Ast => S::Ast, + M::Relaxed => S::Relaxed, + M::Signature => S::Signature, + } + } +} + +impl From for MatchStrictness { + fn from(value: Strictness) -> Self { + use MatchStrictness as M; + use Strictness as S; + match value { + S::Cst => M::Cst, + S::Smart => M::Smart, + S::Ast => M::Ast, + S::Relaxed => M::Relaxed, + S::Signature => M::Signature, + } + } +} + +/// A String pattern will match one single AST node according to pattern syntax. +/// Or an object with field `context`, `selector` and optionally `strictness`. +#[derive(Serialize, Deserialize, Clone, JsonSchema, Debug)] +#[serde(untagged)] +pub enum PatternStyle { + Str(String), + Contextual { + /// The surrounding code that helps to resolve any ambiguity in the syntax. + context: String, + /// The sub-syntax node kind that is the actual matcher of the pattern. + selector: Option, + /// Strictness of the pattern. More strict pattern matches fewer nodes. + strictness: Option, + }, +} + +pub struct RelationalRule { + pub inside: Option>, + pub has: Option>, + pub precedes: Option>, + pub follows: Option>, +} + +pub struct CompositeRule { + pub all: Option>, + pub any: Option>, + pub not: Option>, + pub matches: Option, +} + +#[derive(Clone, Debug)] +pub enum Rule { + // atomic + Pattern(Pattern), + Kind(KindMatcher), + Regex(RegexMatcher), + NthChild(NthChild), + Range(RangeMatcher), + // relational + Inside(Box), + Has(Box), + Precedes(Box), + Follows(Box), + // composite + All(o::All), + Any(o::Any), + Not(Box>), + Matches(ReferentRule), +} +impl Rule { + /// Check if it has a cyclic referent rule with the id. + pub(crate) fn check_cyclic(&self, id: &str) -> bool { + match self { + Rule::All(all) => all.inner().iter().any(|r| r.check_cyclic(id)), + Rule::Any(any) => any.inner().iter().any(|r| r.check_cyclic(id)), + Rule::Not(not) => not.inner().check_cyclic(id), + Rule::Matches(m) => m.rule_id == id, + _ => false, + } + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + match self { + Rule::Pattern(p) => p.defined_vars(), + Rule::Kind(_) => RapidSet::default(), + Rule::Regex(_) => RapidSet::default(), + Rule::NthChild(n) => n.defined_vars(), + Rule::Range(_) => RapidSet::default(), + Rule::Has(c) => c.defined_vars(), + Rule::Inside(p) => p.defined_vars(), + Rule::Precedes(f) => f.defined_vars(), + Rule::Follows(f) => f.defined_vars(), + Rule::All(sub) => sub.inner().iter().flat_map(|r| r.defined_vars()).collect(), + Rule::Any(sub) => sub.inner().iter().flat_map(|r| r.defined_vars()).collect(), + Rule::Not(sub) => sub.inner().defined_vars(), + // TODO: this is not correct, we are collecting util vars else where + Rule::Matches(_r) => RapidSet::default(), + } + } + + /// check if util rules used are defined + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + match self { + Rule::Pattern(_) => Ok(()), + Rule::Kind(_) => Ok(()), + Rule::Regex(_) => Ok(()), + Rule::NthChild(n) => n.verify_util(), + Rule::Range(_) => Ok(()), + Rule::Has(c) => c.verify_util(), + Rule::Inside(p) => p.verify_util(), + Rule::Precedes(f) => f.verify_util(), + Rule::Follows(f) => f.verify_util(), + Rule::All(sub) => sub.inner().iter().try_for_each(|r| r.verify_util()), + Rule::Any(sub) => sub.inner().iter().try_for_each(|r| r.verify_util()), + Rule::Not(sub) => sub.inner().verify_util(), + Rule::Matches(r) => Ok(r.verify_util()?), + } + } +} + +impl Matcher for Rule { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + use Rule::*; + match self { + // atomic + Pattern(pattern) => pattern.match_node_with_env(node, env), + Kind(kind) => kind.match_node_with_env(node, env), + Regex(regex) => regex.match_node_with_env(node, env), + NthChild(nth_child) => nth_child.match_node_with_env(node, env), + Range(range) => range.match_node_with_env(node, env), + // relational + Inside(parent) => match_and_add_label(&**parent, node, env), + Has(child) => match_and_add_label(&**child, node, env), + Precedes(latter) => match_and_add_label(&**latter, node, env), + Follows(former) => match_and_add_label(&**former, node, env), + // composite + All(all) => all.match_node_with_env(node, env), + Any(any) => any.match_node_with_env(node, env), + Not(not) => not.match_node_with_env(node, env), + Matches(rule) => rule.match_node_with_env(node, env), + } + } + + fn potential_kinds(&self) -> Option { + use Rule::*; + match self { + // atomic + Pattern(pattern) => pattern.potential_kinds(), + Kind(kind) => kind.potential_kinds(), + Regex(regex) => regex.potential_kinds(), + NthChild(nth_child) => nth_child.potential_kinds(), + Range(range) => range.potential_kinds(), + // relational + Inside(parent) => parent.potential_kinds(), + Has(child) => child.potential_kinds(), + Precedes(latter) => latter.potential_kinds(), + Follows(former) => former.potential_kinds(), + // composite + All(all) => all.potential_kinds(), + Any(any) => any.potential_kinds(), + Not(not) => not.potential_kinds(), + Matches(rule) => rule.potential_kinds(), + } + } +} + +/// Rule matches nothing by default. +/// In Math jargon, Rule is vacuously false. +impl Default for Rule { + fn default() -> Self { + Self::Any(o::Any::new(std::iter::empty())) + } +} + +fn match_and_add_label<'tree, D: Doc, M: Matcher>( + inner: &M, + node: Node<'tree, D>, + env: &mut Cow>, +) -> Option> { + let matched = inner.match_node_with_env(node, env)?; + env.to_mut().add_label("secondary", matched.clone()); + Some(matched) +} + +#[derive(Error, Debug)] +pub enum RuleSerializeError { + #[error("Rule must have one positive matcher.")] + MissPositiveMatcher, + #[error("Rule contains invalid kind matcher.")] + InvalidKind(#[from] KindMatcherError), + #[error("Rule contains invalid pattern matcher.")] + InvalidPattern(#[from] PatternError), + #[error("Rule contains invalid nthChild.")] + NthChild(#[from] NthChildError), + #[error("Rule contains invalid regex matcher.")] + WrongRegex(#[from] RegexMatcherError), + #[error("Rule contains invalid matches reference.")] + MatchesReference(#[from] ReferentRuleError), + #[error("Rule contains invalid range matcher.")] + InvalidRange(#[from] RangeMatcherError), + #[error("field is only supported in has/inside.")] + FieldNotSupported, + #[error("Relational rule contains invalid field {0}.")] + InvalidField(String), +} + +// TODO: implement positive/non positive +pub fn deserialize_rule( + serialized: SerializableRule, + env: &DeserializeEnv, +) -> Result { + let mut rules = Vec::with_capacity(1); + use Rule as R; + let categorized = serialized.categorized(); + // ATTENTION, relational_rule should always come at last + // after target node is decided by atomic/composite rule + deserialize_atomic_rule(categorized.atomic, &mut rules, env)?; + deserialize_composite_rule(categorized.composite, &mut rules, env)?; + deserialize_relational_rule(categorized.relational, &mut rules, env)?; + + if rules.is_empty() { + Err(RuleSerializeError::MissPositiveMatcher) + } else if rules.len() == 1 { + Ok(rules.pop().expect("should not be empty")) + } else { + Ok(R::All(o::All::new(rules))) + } +} + +fn deserialize_composite_rule( + composite: CompositeRule, + rules: &mut Vec, + env: &DeserializeEnv, +) -> Result<(), RuleSerializeError> { + use Rule as R; + let convert_rules = |rules: Vec| -> Result<_, RuleSerializeError> { + let mut inner = Vec::with_capacity(rules.len()); + for rule in rules { + inner.push(deserialize_rule(rule, env)?); + } + Ok(inner) + }; + if let Some(all) = composite.all { + rules.push(R::All(o::All::new(convert_rules(all)?))); + } + if let Some(any) = composite.any { + rules.push(R::Any(o::Any::new(convert_rules(any)?))); + } + if let Some(not) = composite.not { + let not = o::Not::new(deserialize_rule(*not, env)?); + rules.push(R::Not(Box::new(not))); + } + if let Some(id) = composite.matches { + let matches = ReferentRule::try_new(id, &env.registration)?; + rules.push(R::Matches(matches)); + } + Ok(()) +} + +fn deserialize_relational_rule( + relational: RelationalRule, + rules: &mut Vec, + env: &DeserializeEnv, +) -> Result<(), RuleSerializeError> { + use Rule as R; + // relational + if let Some(inside) = relational.inside { + rules.push(R::Inside(Box::new(Inside::try_new(*inside, env)?))); + } + if let Some(has) = relational.has { + rules.push(R::Has(Box::new(Has::try_new(*has, env)?))); + } + if let Some(precedes) = relational.precedes { + rules.push(R::Precedes(Box::new(Precedes::try_new(*precedes, env)?))); + } + if let Some(follows) = relational.follows { + rules.push(R::Follows(Box::new(Follows::try_new(*follows, env)?))); + } + Ok(()) +} + +fn deserialize_atomic_rule( + atomic: AtomicRule, + rules: &mut Vec, + env: &DeserializeEnv, +) -> Result<(), RuleSerializeError> { + use Rule as R; + if let Some(pattern) = atomic.pattern { + rules.push(match pattern { + PatternStyle::Str(pat) => R::Pattern(Pattern::try_new(&pat, &env.lang)?), + PatternStyle::Contextual { + context, + selector, + strictness, + } => { + let pattern = if let Some(selector) = selector { + Pattern::contextual(&context, &selector, &env.lang)? + } else { + Pattern::try_new(&context, &env.lang)? + }; + let pattern = if let Some(strictness) = strictness { + pattern.with_strictness(strictness.into()) + } else { + pattern + }; + R::Pattern(pattern) + } + }); + } + if let Some(kind) = atomic.kind { + rules.push(R::Kind(KindMatcher::try_new(&kind, &env.lang)?)); + } + if let Some(regex) = atomic.regex { + rules.push(R::Regex(RegexMatcher::try_new(®ex)?)); + } + if let Some(nth_child) = atomic.nth_child { + rules.push(R::NthChild(NthChild::try_new(nth_child, env)?)); + } + if let Some(range) = atomic.range { + rules.push(R::Range(RangeMatcher::try_new(range.start, range.end)?)); + } + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::test::TypeScript; + use PatternStyle::*; + use thread_ast_engine::tree_sitter::LanguageExt; + + #[test] + fn test_pattern() { + let src = r" +pattern: Test +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(rule.pattern.is_present()); + let src = r" +pattern: + context: class $C { set $B() {} } + selector: method_definition +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(matches!(rule.pattern, Maybe::Present(Contextual { .. }),)); + } + + #[test] + fn test_augmentation() { + let src = r" +pattern: class A {} +inside: + pattern: function() {} +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(rule.inside.is_present()); + assert!(rule.pattern.is_present()); + } + + #[test] + fn test_multi_augmentation() { + let src = r" +pattern: class A {} +inside: + pattern: function() {} +has: + pattern: Some() +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(rule.inside.is_present()); + assert!(rule.has.is_present()); + assert!(rule.follows.is_absent()); + assert!(rule.precedes.is_absent()); + assert!(rule.pattern.is_present()); + } + + #[test] + fn test_maybe_not() { + let src = "not: 123"; + let ret: Result = from_str(src); + assert!(ret.is_err()); + let src = "not:"; + let ret: Result = from_str(src); + assert!(ret.is_err()); + } + + #[test] + fn test_nested_augmentation() { + let src = r" +pattern: class A {} +inside: + pattern: function() {} + inside: + pattern: + context: Some() + selector: ss +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(rule.inside.is_present()); + let inside = rule.inside.unwrap(); + assert!(inside.rule.pattern.is_present()); + assert!(inside.rule.inside.unwrap().rule.pattern.is_present()); + } + + #[test] + fn test_precedes_follows() { + let src = r" +pattern: class A {} +precedes: + pattern: function() {} +follows: + pattern: + context: Some() + selector: ss +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + assert!(rule.precedes.is_present()); + assert!(rule.follows.is_present()); + let follows = rule.follows.unwrap(); + assert!(follows.rule.pattern.is_present()); + assert!(follows.rule.pattern.is_present()); + } + + #[test] + fn test_deserialize_rule() { + let src = r" +pattern: class A {} +kind: class_declaration +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule = deserialize_rule(rule, &env).expect("should deserialize"); + let root = TypeScript::Tsx.ast_grep("class A {}"); + assert!(root.root().find(rule).is_some()); + } + + #[test] + fn test_deserialize_order() { + let src = r" +pattern: class A {} +inside: + kind: class +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule = deserialize_rule(rule, &env).expect("should deserialize"); + assert!(matches!(rule, Rule::All(_))); + } + + #[test] + fn test_defined_vars() { + let src = r" +pattern: var $A = 123 +inside: + pattern: var $B = 456 +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule = deserialize_rule(rule, &env).expect("should deserialize"); + assert_eq!(rule.defined_vars(), ["A", "B"].into_iter().collect()); + } + + #[test] + fn test_issue_1164() { + let src = r" + kind: statement_block + has: + pattern: this.$A = promise() + stopBy: end"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule = deserialize_rule(rule, &env).expect("should deserialize"); + let root = TypeScript::Tsx.ast_grep( + "if (a) { + this.a = b; + this.d = promise() + }", + ); + assert!(root.root().find(rule).is_some()); + } + + #[test] + fn test_issue_1225() { + let src = r" + kind: statement_block + has: + pattern: $A + regex: const"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule = deserialize_rule(rule, &env).expect("should deserialize"); + let root = TypeScript::Tsx.ast_grep( + "{ + let x = 1; + const z = 9; + }", + ); + assert!(root.root().find(rule).is_some()); + } +} diff --git a/crates/rule-engine/src/rule/nth_child.rs b/crates/rule-engine/src/rule/nth_child.rs new file mode 100644 index 0000000..d61ca5a --- /dev/null +++ b/crates/rule-engine/src/rule/nth_child.rs @@ -0,0 +1,430 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::{DeserializeEnv, Rule, RuleSerializeError, SerializableRule}; + +use thread_ast_engine::language::Language; +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::{Doc, Matcher, Node}; + +use std::borrow::Cow; +use thread_utils::RapidSet; + +use bit_set::BitSet; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum NthChildError { + #[error("Illegal character {0} encountered")] + IllegalCharacter(char), + #[error("Invalid syntax")] + InvalidSyntax, + #[error("Invalid ofRule")] + InvalidRule(#[from] Box), +} + +/// A string or number describing the indices of matching nodes in a list of siblings. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(untagged)] +pub enum NthChildSimple { + /// A number indicating the precise element index + Numeric(usize), + /// Functional notation like CSS's An + B + Functional(String), +} + +enum ParseState { + Initial, + N, + Sign(bool), // bool flag: has met n before? + Num(bool), // bool flag: has met n before +} + +fn parse_an_b(input: &str) -> Result { + use ParseState::*; + let mut step_size = 0; + let mut sign = 1; + let mut num = 0; + let mut state = Initial; + for c in input.chars() { + // ignore all white spaces + if c.is_whitespace() { + continue; + } + match state { + Initial => match c { + '+' | '-' => { + state = Sign(false); + sign = if c == '+' { 1 } else { -1 }; + } + '0'..='9' => { + state = Num(false); + num = (c as u8 - b'0') as i32; + } + 'n' | 'N' => { + state = N; + step_size = sign; + } + c => return Err(NthChildError::IllegalCharacter(c)), + }, + Sign(has_n) => match c { + '+' | '-' => return Err(NthChildError::InvalidSyntax), + '0'..='9' => { + state = Num(has_n); + num = (c as u8 - b'0') as i32; + } + 'n' | 'N' => { + if has_n { + return Err(NthChildError::InvalidSyntax); + } + state = N; + step_size = sign; + } + c => return Err(NthChildError::IllegalCharacter(c)), + }, + Num(has_n) => match c { + '+' | '-' => return Err(NthChildError::InvalidSyntax), + '0'..='9' => { + num = num * 10 + (c as u8 - b'0') as i32; + } + 'n' | 'N' => { + if has_n { + return Err(NthChildError::InvalidSyntax); + } + state = N; + step_size = sign * num; + num = 0; + } + c => return Err(NthChildError::IllegalCharacter(c)), + }, + N => match c { + '+' | '-' => { + state = Sign(true); + sign = if c == '+' { 1 } else { -1 }; + num = 0; + } + '0'..='9' => return Err(NthChildError::InvalidSyntax), + 'n' | 'N' => return Err(NthChildError::InvalidSyntax), + c => return Err(NthChildError::IllegalCharacter(c)), + }, + } + } + if matches!(state, Sign(_) | Initial) { + Err(NthChildError::InvalidSyntax) + } else { + Ok(FunctionalPosition { + step_size, + offset: num * sign, + }) + } +} + +impl NthChildSimple { + fn try_parse(&self) -> Result { + match self { + NthChildSimple::Numeric(n) => Ok(FunctionalPosition { + step_size: 0, + offset: *n as i32, + }), + NthChildSimple::Functional(s) => parse_an_b(s), + } + } +} + +/// `nthChild` accepts either a number, a string or an object. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(untagged, rename_all = "camelCase")] +pub enum SerializableNthChild { + /// Simple syntax + Simple(NthChildSimple), + /// Object style syntax + #[serde(rename_all = "camelCase")] + Complex { + /// nth-child syntax + position: NthChildSimple, + /// select the nth node that matches the rule, like CSS's of syntax + of_rule: Option>, + /// matches from the end instead like CSS's nth-last-child + #[serde(default)] + reverse: bool, + }, +} + +/// Corresponds to the CSS syntax An+B +/// See https://developer.mozilla.org/en-US/docs/Web/CSS/:nth-child#functional_notation +#[derive(Clone, Debug)] +struct FunctionalPosition { + step_size: i32, + offset: i32, +} + +impl FunctionalPosition { + /// index is 0-based, but output is 1-based + fn is_matched(&self, index: usize) -> bool { + let index = (index + 1) as i32; // Convert 0-based index to 1-based + let FunctionalPosition { step_size, offset } = self; + if *step_size == 0 { + index == *offset + } else { + let n = index - offset; + n / step_size >= 0 && n % step_size == 0 + } + } +} + +#[derive(Clone, Debug)] +pub struct NthChild { + position: FunctionalPosition, + of_rule: Option>, + reverse: bool, +} + +impl NthChild { + pub fn try_new( + rule: SerializableNthChild, + env: &DeserializeEnv, + ) -> Result { + match rule { + SerializableNthChild::Simple(position) => Ok(NthChild { + position: position.try_parse()?, + of_rule: None, + reverse: false, + }), + SerializableNthChild::Complex { + position, + of_rule, + reverse, + } => Ok(NthChild { + position: position.try_parse()?, + of_rule: of_rule + .map(|r| env.deserialize_rule(*r)) + .transpose() + .map_err(Box::new)? + .map(Box::new), + reverse, + }), + } + } + + fn find_index<'t, D: Doc>( + &self, + node: &Node<'t, D>, + env: &mut Cow>, + ) -> Option { + let parent = node.parent()?; + // only consider named children + let mut children: Vec<_> = if let Some(rule) = &self.of_rule { + // if of_rule is present, only consider children that match the rule + parent + .children() + .filter(|n| n.is_named()) + .filter_map(|child| rule.match_node_with_env(child, env)) + .collect() + } else { + parent.children().filter(|n| n.is_named()).collect() + }; + // count the index from the end if reverse is true + if self.reverse { + children.reverse() + } + children + .iter() + .position(|child| child.node_id() == node.node_id()) + } + pub fn defined_vars(&self) -> RapidSet<&str> { + if let Some(rule) = &self.of_rule { + rule.defined_vars() + } else { + RapidSet::default() + } + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + if let Some(rule) = &self.of_rule { + rule.verify_util() + } else { + Ok(()) + } + } +} + +impl Matcher for NthChild { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let index = self.find_index(&node, env)?; + self.position.is_matched(index).then_some(node) + } + fn potential_kinds(&self) -> Option { + let rule = self.of_rule.as_ref()?; + rule.potential_kinds() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::test::TypeScript as TS; + use thread_ast_engine::matcher::RegexMatcher; + use thread_ast_engine::meta_var::MetaVarEnv; + use thread_ast_engine::tree_sitter::LanguageExt; + + #[test] + fn test_positional() { + let position = FunctionalPosition { + step_size: 0, + offset: 1, + }; + assert!(position.is_matched(0)); + assert!(!position.is_matched(1)); + assert!(!position.is_matched(2)); + } + + #[test] + fn test_positional_an_b() { + let position = FunctionalPosition { + step_size: 2, + offset: -1, + }; + assert!(position.is_matched(0)); + assert!(!position.is_matched(1)); + assert!(position.is_matched(2)); + assert!(!position.is_matched(3)); + assert!(position.is_matched(4)); + } + + fn find_index(rule: Option, reverse: bool) -> Option { + let rule = NthChild { + position: FunctionalPosition { + step_size: 2, + offset: -1, + }, + of_rule: rule.map(Box::new), + reverse, + }; + let mut env = Cow::Owned(MetaVarEnv::new()); + let grep = TS::Tsx.ast_grep("[1,2,3,4]"); + let node = grep.root().find("2").unwrap(); + rule.find_index(&*node, &mut env) + } + + #[test] + fn test_find_index_simple() { + let i = find_index(None, false); + assert_eq!(i, Some(1)); + } + + #[test] + fn test_find_index_reverse() { + let i = find_index(None, true); + assert_eq!(i, Some(2)); + } + + #[test] + fn test_find_of_rule() { + let regex = RegexMatcher::try_new(r"2|3").unwrap(); + let i = find_index(Some(Rule::Regex(regex.clone())), false); + assert_eq!(i, Some(0)); + let i = find_index(Some(Rule::Regex(regex)), true); + assert_eq!(i, Some(1)); + } + + fn parse(s: &str) -> FunctionalPosition { + parse_an_b(s).expect("should parse") + } + fn test_parse(s: &str, step: i32, offset: i32) { + let pos = parse(s); + assert_eq!(pos.step_size, step, "{s}: wrong step"); + assert_eq!(pos.offset, offset, "{s}: wrong offset"); + } + + #[test] + fn test_parse_selector() { + // https://www.w3.org/TR/css-syntax-3/#anb-microsyntax + test_parse("12n + 2", 12, 2); + test_parse("-12n + 21", -12, 21); + test_parse("-12n - 21", -12, -21); + test_parse("2n + 0", 2, 0); + test_parse("-1n + 6", -1, 6); + test_parse("-4n + 10", -4, 10); + test_parse("0n + 5", 0, 5); + test_parse("2", 0, 2); + test_parse("-2", 0, -2); + test_parse("n", 1, 0); + test_parse("-n", -1, 0); + test_parse("N", 1, 0); + test_parse("-N", -1, 0); + test_parse("123 n", 123, 0); + } + + fn parse_error(s: &str, name: &str) { + let Err(err) = parse_an_b(s) else { + panic!("should parse error: {s}"); + }; + match err { + NthChildError::InvalidSyntax => assert_eq!(name, "syntax"), + NthChildError::IllegalCharacter(_) => assert_eq!(name, "character"), + NthChildError::InvalidRule(_) => assert_eq!(name, "rule"), + } + } + + #[test] + fn test_error() { + parse_error("3a + b", "character"); + parse_error("3 - n", "syntax"); + parse_error("3 ++ n", "syntax"); + parse_error("n++", "syntax"); + parse_error("3 + 5", "syntax"); + parse_error("3n +", "syntax"); + parse_error("3n + n", "syntax"); + parse_error("n + 3n", "syntax"); + parse_error("+ n + n", "syntax"); + parse_error("+ n - n", "syntax"); + parse_error("nN", "syntax"); + parse_error("+", "syntax"); + parse_error("-", "syntax"); + parse_error("a", "character"); + parse_error("+a", "character"); + parse_error("na", "character"); + } + + fn deser(src: &str) -> Rule { + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let env = DeserializeEnv::new(TS::Tsx); + env.deserialize_rule(rule).expect("should deserialize") + } + + #[test] + fn test_serialize() { + let root = TS::Tsx.ast_grep("[1,2,3,4]"); + let root = root.root(); + let rule = deser(r"nthChild: 3"); + assert_eq!(root.find(rule).expect("should find").text(), "3"); + let rule = deser(r"nthChild: { position: 2n + 2 }"); + assert_eq!(root.find(rule).expect("should find").text(), "2"); + let rule = deser(r"nthChild: { position: 2n + 2, reverse: true }"); + assert_eq!(root.find(rule).expect("should find").text(), "1"); + let rule = deser(r"nthChild: { position: 2n + 2, ofRule: {regex: '2|3'} }"); + assert_eq!(root.find(rule).expect("should find").text(), "3"); + } + + #[test] + fn test_defined_vars() { + let rule = deser(r"nthChild: { position: 2, ofRule: {pattern: '$A'} }"); + assert_eq!(rule.defined_vars(), vec!["A"].into_iter().collect()); + } + + #[test] + fn test_verify_util() { + let rule = deser(r"nthChild: { position: 2, ofRule: {pattern: '$A'} }"); + assert!(rule.verify_util().is_ok()); + } +} diff --git a/crates/rule-engine/src/rule/range.rs b/crates/rule-engine/src/rule/range.rs new file mode 100644 index 0000000..e3c6d5e --- /dev/null +++ b/crates/rule-engine/src/rule/range.rs @@ -0,0 +1,183 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thread_ast_engine::{Doc, Node, meta_var::MetaVarEnv}; + +/// Represents a zero-based character-wise position in a document +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +pub struct SerializablePosition { + /// 0-based line number in the source code + pub line: usize, + /// 0-based column number in the source code + pub column: usize, +} + +/// Represents a position in source code using 0-based line and column numbers +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +pub struct SerializableRange { + /// start position in the source code + pub start: SerializablePosition, + /// end position in the source code + pub end: SerializablePosition, +} + +use std::borrow::Cow; + +use bit_set::BitSet; +use thiserror::Error; + +use super::Matcher; + +/// Errors that can occur when creating or using a RangeMatcher +#[derive(Clone, Error, Debug)] +pub enum RangeMatcherError { + /// Returned when the range is invalid. This can occur when: + /// - start position is after end position + /// - positions contain invalid line/column values + #[error("The start position must be before the end position.")] + InvalidRange, +} + +#[derive(Clone, Debug)] +pub struct RangeMatcher { + start: SerializablePosition, + end: SerializablePosition, +} + +impl RangeMatcher { + pub fn new(start_pos: SerializablePosition, end_pos: SerializablePosition) -> Self { + Self { + start: start_pos, + end: end_pos, + } + } + + pub fn try_new( + start_pos: SerializablePosition, + end_pos: SerializablePosition, + ) -> Result { + if start_pos.line > end_pos.line + || (start_pos.line == end_pos.line && start_pos.column > end_pos.column) + { + return Err(RangeMatcherError::InvalidRange); + } + + let range = Self::new(start_pos, end_pos); + Ok(range) + } +} + +impl Matcher for RangeMatcher { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + _env: &mut Cow>, + ) -> Option> { + let node_start_pos = node.start_pos(); + let node_end_pos = node.end_pos(); + + // first check line since it is cheaper + if self.start.line != node_start_pos.line() || self.end.line != node_end_pos.line() { + return None; + } + // then check column, this can be expensive for utf-8 encoded files + if self.start.column != node_start_pos.column(&node) + || self.end.column != node_end_pos.column(&node) + { + return None; + } + Some(node) + } + + fn potential_kinds(&self) -> Option { + None + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::TypeScript as TS; + use thread_ast_engine::matcher::MatcherExt; + use thread_ast_engine::tree_sitter::LanguageExt; + + #[test] + fn test_invalid_range() { + let range = RangeMatcher::try_new( + SerializablePosition { + line: 0, + column: 10, + }, + SerializablePosition { line: 0, column: 5 }, + ); + assert!(range.is_err()); + } + + #[test] + fn test_range_match() { + let cand = TS::Tsx.ast_grep("class A { a = 123 }"); + let cand = cand.root(); + let pattern = RangeMatcher::new( + SerializablePosition { + line: 0, + column: 10, + }, + SerializablePosition { + line: 0, + column: 17, + }, + ); + assert!(pattern.find_node(cand).is_some()); + } + + #[test] + fn test_range_non_match() { + let cand = TS::Tsx.ast_grep("class A { a = 123 }"); + let cand = cand.root(); + let pattern = RangeMatcher::new( + SerializablePosition { + line: 0, + column: 10, + }, + SerializablePosition { + line: 0, + column: 15, + }, + ); + assert!(pattern.find_node(cand).is_none(),); + } + + #[test] + fn test_multiline_range() { + let cand = TS::Tsx.ast_grep( + "class A { \n b = () => { \n const c = 1 \n const d = 3 \n return c + d \n } }", + ); + let cand = cand.root(); + let pattern = RangeMatcher::new( + SerializablePosition { line: 1, column: 1 }, + SerializablePosition { line: 5, column: 2 }, + ); + assert!(pattern.find_node(cand).is_some()); + } + + #[test] + fn test_unicode_range() { + let cand = TS::Tsx.ast_grep("let a = 'πŸ¦„'"); + let cand = cand.root(); + let pattern = RangeMatcher::new( + SerializablePosition { line: 0, column: 8 }, + SerializablePosition { + line: 0, + column: 11, + }, + ); + let node = pattern.find_node(cand); + assert!(node.is_some()); + assert_eq!(node.expect("should exist").text(), "'πŸ¦„'"); + } +} diff --git a/crates/rule-engine/src/rule/referent_rule.rs b/crates/rule-engine/src/rule/referent_rule.rs new file mode 100644 index 0000000..235fe6b --- /dev/null +++ b/crates/rule-engine/src/rule/referent_rule.rs @@ -0,0 +1,280 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::{Rule, RuleCore}; + +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::{Doc, Matcher, Node}; + +use bit_set::BitSet; +use thiserror::Error; + +use std::borrow::Cow; +use std::sync::{Arc, Weak}; +use thread_utils::{RapidMap, RapidSet, set_with_capacity}; + +#[derive(Debug)] +pub struct Registration(Arc>); + +impl Clone for Registration { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl Registration { + #[allow(clippy::mut_from_ref)] + fn write(&self) -> &mut RapidMap { + // SAFETY: `write` will only be called during initialization and + // it only insert new item to the RapidMap. It is safe to cast the raw ptr. + unsafe { &mut *(Arc::as_ptr(&self.0) as *mut RapidMap) } + } +} +pub type GlobalRules = Registration; + +impl GlobalRules { + pub fn insert(&self, id: &str, rule: RuleCore) -> Result<(), ReferentRuleError> { + let map = self.write(); + if map.contains_key(id) { + return Err(ReferentRuleError::DuplicateRule(id.into())); + } + map.insert(id.to_string(), rule); + let rule = map.get(id).unwrap(); + // TODO: we can skip check here because insertion order + // is guaranteed in deserialize_env + if rule.check_cyclic(id) { + return Err(ReferentRuleError::CyclicRule(id.to_string())); + } + Ok(()) + } +} + +impl Default for Registration { + fn default() -> Self { + Self(Default::default()) + } +} + +#[derive(Clone, Debug, Default)] +pub struct RuleRegistration { + /// utility rule to every RuleCore, every sub-rule has its own local utility + local: Registration, + /// global rules are shared by all RuleConfigs. It is a singleton. + global: Registration, + /// Every RuleConfig has its own rewriters. But sub-rules share parent's rewriters. + rewriters: Registration, +} + +// these are shit code +impl RuleRegistration { + pub fn get_rewriters(&self) -> &RapidMap { + &self.rewriters.0 + } + + pub fn from_globals(global: &GlobalRules) -> Self { + Self { + local: Default::default(), + global: global.clone(), + rewriters: Default::default(), + } + } + + fn get_ref(&self) -> RegistrationRef { + let local = Arc::downgrade(&self.local.0); + let global = Arc::downgrade(&self.global.0); + RegistrationRef { local, global } + } + + pub(crate) fn insert_local(&self, id: &str, rule: Rule) -> Result<(), ReferentRuleError> { + let map = self.local.write(); + if map.contains_key(id) { + return Err(ReferentRuleError::DuplicateRule(id.into())); + } + map.insert(id.to_string(), rule); + let rule = map.get(id).unwrap(); + // TODO: we can skip check here because insertion order + // is guaranteed in deserialize_env + if rule.check_cyclic(id) { + return Err(ReferentRuleError::CyclicRule(id.to_string())); + } + Ok(()) + } + + pub(crate) fn insert_rewriter(&self, id: &str, rewriter: RuleCore) { + self.rewriters.insert(id, rewriter).expect("should work"); + } + + pub(crate) fn get_local_util_vars(&self) -> RapidSet<&str> { + let utils = &self.local.0; + let size = size_of_val(utils); + if size == 0 { + return RapidSet::default(); + } + // this gets closer to the actual size + let mut ret = set_with_capacity(size); + for rule in utils.values() { + for v in rule.defined_vars() { + ret.insert(v); + } + } + ret + } +} + +/// RegistrationRef must use Weak pointer to avoid +/// cyclic reference in RuleRegistration +#[derive(Clone, Debug)] +struct RegistrationRef { + local: Weak>, + global: Weak>, +} +impl RegistrationRef { + fn get_local(&self) -> Arc> { + self.local + .upgrade() + .expect("Rule Registration must be kept alive") + } + fn get_global(&self) -> Arc> { + self.global + .upgrade() + .expect("Rule Registration must be kept alive") + } +} + +#[derive(Error, Debug)] +pub enum ReferentRuleError { + #[error("Rule `{0}` is not defined.")] + UndefinedUtil(String), + #[error("Duplicate rule id `{0}` is found.")] + DuplicateRule(String), + #[error("Rule `{0}` has a cyclic dependency in its `matches` sub-rule.")] + CyclicRule(String), +} + +#[derive(Clone, Debug)] +pub struct ReferentRule { + pub(crate) rule_id: String, + reg_ref: RegistrationRef, +} + +impl ReferentRule { + pub fn try_new( + rule_id: String, + registration: &RuleRegistration, + ) -> Result { + Ok(Self { + reg_ref: registration.get_ref(), + rule_id, + }) + } + + fn eval_local(&self, func: F) -> Option + where + F: FnOnce(&Rule) -> T, + { + let rules = self.reg_ref.get_local(); + let rule = rules.get(&self.rule_id)?; + Some(func(rule)) + } + + fn eval_global(&self, func: F) -> Option + where + F: FnOnce(&RuleCore) -> T, + { + let rules = self.reg_ref.get_global(); + let rule = rules.get(&self.rule_id)?; + Some(func(rule)) + } + + pub(super) fn verify_util(&self) -> Result<(), ReferentRuleError> { + let rules = self.reg_ref.get_local(); + if rules.contains_key(&self.rule_id) { + return Ok(()); + } + let rules = self.reg_ref.get_global(); + if rules.contains_key(&self.rule_id) { + return Ok(()); + } + Err(ReferentRuleError::UndefinedUtil(self.rule_id.clone())) + } +} + +impl Matcher for ReferentRule { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + self.eval_local(|r| r.match_node_with_env(node.clone(), env)) + .or_else(|| self.eval_global(|r| r.match_node_with_env(node, env))) + .flatten() + } + fn potential_kinds(&self) -> Option { + self.eval_local(|r| { + debug_assert!(!r.check_cyclic(&self.rule_id), "no cyclic rule allowed"); + r.potential_kinds() + }) + .or_else(|| { + self.eval_global(|r| { + debug_assert!(!r.check_cyclic(&self.rule_id), "no cyclic rule allowed"); + r.potential_kinds() + }) + }) + .flatten() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::rule::Rule; + use crate::test::TypeScript as TS; + use thread_ast_engine::Pattern; + use thread_ast_engine::ops as o; + + type Result = std::result::Result<(), ReferentRuleError>; + + #[test] + fn test_cyclic_error() -> Result { + let registration = RuleRegistration::default(); + let rule = ReferentRule::try_new("test".into(), ®istration)?; + let rule = Rule::Matches(rule); + let error = registration.insert_local("test", rule); + assert!(matches!(error, Err(ReferentRuleError::CyclicRule(_)))); + Ok(()) + } + + #[test] + fn test_cyclic_all() -> Result { + let registration = RuleRegistration::default(); + let rule = ReferentRule::try_new("test".into(), ®istration)?; + let rule = Rule::All(o::All::new(std::iter::once(Rule::Matches(rule)))); + let error = registration.insert_local("test", rule); + assert!(matches!(error, Err(ReferentRuleError::CyclicRule(_)))); + Ok(()) + } + + #[test] + fn test_cyclic_not() -> Result { + let registration = RuleRegistration::default(); + let rule = ReferentRule::try_new("test".into(), ®istration)?; + let rule = Rule::Not(Box::new(o::Not::new(Rule::Matches(rule)))); + let error = registration.insert_local("test", rule); + assert!(matches!(error, Err(ReferentRuleError::CyclicRule(_)))); + Ok(()) + } + + #[test] + fn test_success_rule() -> Result { + let registration = RuleRegistration::default(); + let rule = ReferentRule::try_new("test".into(), ®istration)?; + let pattern = Rule::Pattern(Pattern::new("some", TS::Tsx)); + let ret = registration.insert_local("test", pattern); + assert!(ret.is_ok()); + assert!(rule.potential_kinds().is_some()); + Ok(()) + } +} diff --git a/crates/rule-engine/src/rule/relational_rule.rs b/crates/rule-engine/src/rule/relational_rule.rs new file mode 100644 index 0000000..663aded --- /dev/null +++ b/crates/rule-engine/src/rule/relational_rule.rs @@ -0,0 +1,708 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::deserialize_env::DeserializeEnv; +use super::stop_by::{SerializableStopBy, StopBy}; +use crate::rule::{Rule, RuleSerializeError, SerializableRule}; +use thread_ast_engine::language::Language; +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::{Doc, Matcher, Node}; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use thread_utils::RapidSet; + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Relation { + #[serde(flatten)] + pub rule: SerializableRule, + #[serde(default)] + pub stop_by: SerializableStopBy, + pub field: Option, +} + +fn field_name_to_id( + field: Option, + env: &DeserializeEnv, +) -> Result, RuleSerializeError> { + let Some(field) = field else { + return Ok(None); + }; + match env.lang.field_to_id(&field) { + Some(id) => Ok(Some(id)), + None => Err(RuleSerializeError::InvalidField(field)), + } +} + +#[derive(Clone, Debug)] +pub struct Inside { + outer: Rule, + field: Option, + stop_by: StopBy, +} +impl Inside { + pub fn try_new( + relation: Relation, + env: &DeserializeEnv, + ) -> Result { + Ok(Self { + stop_by: StopBy::try_from(relation.stop_by, env)?, + field: field_name_to_id(relation.field, env)?, + outer: env.deserialize_rule(relation.rule)?, // TODO + }) + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + self.outer + .defined_vars() + .union(&self.stop_by.defined_vars()) + .copied() + .collect() + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + self.outer.verify_util()?; + self.stop_by.verify_util() + } +} + +impl Matcher for Inside { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let parent = || node.parent(); + let ancestors = || node.ancestors(); + if let Some(field) = self.field { + let mut last_id = node.node_id(); + let finder = move |moved_node: Node<'tree, D>| { + let expect_id = last_id; + last_id = moved_node.node_id(); + let n = moved_node.child_by_field_id(field)?; + if n.node_id() != expect_id { + None + } else { + self.outer.match_node_with_env(moved_node, env) + } + }; + self.stop_by.find(parent, ancestors, finder) + } else { + let finder = |n| self.outer.match_node_with_env(n, env); + self.stop_by.find(parent, ancestors, finder) + } + } +} + +#[derive(Clone, Debug)] +pub struct Has { + inner: Rule, + stop_by: StopBy, + field: Option, +} +impl Has { + pub fn try_new( + relation: Relation, + env: &DeserializeEnv, + ) -> Result { + Ok(Self { + stop_by: StopBy::try_from(relation.stop_by, env)?, + inner: env.deserialize_rule(relation.rule)?, + field: field_name_to_id(relation.field, env)?, + }) + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + self.inner + .defined_vars() + .union(&self.stop_by.defined_vars()) + .copied() + .collect() + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + self.inner.verify_util()?; + self.stop_by.verify_util() + } +} + +impl Matcher for Has { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + if let Some(field) = self.field { + let child_node = node.child_by_field_id(field)?; + return match &self.stop_by { + StopBy::Neighbor => self.inner.match_node_with_env(child_node, env), + StopBy::End => child_node + .dfs() + .find_map(|n| self.inner.match_node_with_env(n, env)), + StopBy::Rule(matcher) => { + // TODO: use Pre traversal to reduce stack allocation + self.inner + .match_node_with_env(child_node.clone(), env) + .or_else(|| { + if child_node.matches(matcher) { + None + } else { + child_node + .children() + .find_map(|n| self.inner.match_node_with_env(n, env)) + } + }) + } + }; + } + match &self.stop_by { + StopBy::Neighbor => node + .children() + .find_map(|n| self.inner.match_node_with_env(n, env)), + StopBy::End => node + .dfs() + .skip(1) + .find_map(|n| self.inner.match_node_with_env(n, env)), + StopBy::Rule(matcher) => { + // TODO: use Pre traversal to reduce stack allocation + node.children().find_map(|n| { + self.inner.match_node_with_env(n.clone(), env).or_else(|| { + if n.matches(matcher) { + None + } else { + self.match_node_with_env(n, env) + } + }) + }) + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Precedes { + later: Rule, + stop_by: StopBy, +} +impl Precedes { + pub fn try_new( + relation: Relation, + env: &DeserializeEnv, + ) -> Result { + if relation.field.is_some() { + return Err(RuleSerializeError::FieldNotSupported); + } + Ok(Self { + stop_by: StopBy::try_from(relation.stop_by, env)?, + later: env.deserialize_rule(relation.rule)?, + }) + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + self.later + .defined_vars() + .union(&self.stop_by.defined_vars()) + .copied() + .collect() + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + self.later.verify_util()?; + self.stop_by.verify_util() + } +} +impl Matcher for Precedes { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let next = || node.next(); + let next_all = || node.next_all(); + let finder = |n| self.later.match_node_with_env(n, env); + self.stop_by.find(next, next_all, finder) + } +} + +#[derive(Clone, Debug)] +pub struct Follows { + former: Rule, + stop_by: StopBy, +} +impl Follows { + pub fn try_new( + relation: Relation, + env: &DeserializeEnv, + ) -> Result { + if relation.field.is_some() { + return Err(RuleSerializeError::FieldNotSupported); + } + Ok(Self { + stop_by: StopBy::try_from(relation.stop_by, env)?, + former: env.deserialize_rule(relation.rule)?, + }) + } + pub fn defined_vars(&self) -> RapidSet<&str> { + self.former + .defined_vars() + .union(&self.stop_by.defined_vars()) + .copied() + .collect() + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + self.former.verify_util()?; + self.stop_by.verify_util() + } +} +impl Matcher for Follows { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + let prev = || node.prev(); + let prev_all = || node.prev_all(); + let finder = |n| self.former.match_node_with_env(n, env); + self.stop_by.find(prev, prev_all, finder) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::TypeScript as TS; + use thread_ast_engine::Pattern; + use thread_ast_engine::matcher::KindMatcher; + use thread_ast_engine::ops as o; + use thread_ast_engine::{Language, tree_sitter::LanguageExt}; + + fn find_rule(src: &str, matcher: M) -> Option { + let grep = TS::Tsx.ast_grep(src); + grep.root().find(matcher).map(|s| s.text().to_string()) + } + + fn test_found(found_list: &[&str], matcher: M) { + for found in found_list { + assert!(find_rule(found, &matcher).is_some()); + } + } + + fn test_not_found(not_found_list: &[&str], matcher: M) { + for found in not_found_list { + assert!(find_rule(found, &matcher).is_none()); + } + } + + fn make_rule(target: &str, relation: Rule) -> impl Matcher { + o::All::new(vec![Rule::Pattern(Pattern::new(target, TS::Tsx)), relation]) + } + + #[test] + fn test_precedes_operator() { + let precedes = Precedes { + later: Rule::Pattern(Pattern::new("var a = 1", TS::Tsx)), + stop_by: StopBy::End, + }; + let rule = make_rule("var b = 2", Rule::Precedes(Box::new(precedes))); + test_found( + &[ + "var b = 2; var a = 1;", + "var b = 2; alert(b); var a = 1;", + "var b = 2; var a = 1", + "var b = 2\n var a = 1", + ], + &rule, + ); + test_not_found( + &[ + "var a = 1", + "var b = 2; var a = 2;", + "var a = 1; var b = 2;", + "{ var a = 1 }", + "var b = 2; { var a = 1 }", + ], + &rule, + ); + } + + #[test] + fn test_precedes_immediate() { + let precedes = Precedes { + later: Rule::Pattern(Pattern::new("var a = 1", TS::Tsx)), + stop_by: StopBy::Neighbor, + }; + let rule = make_rule("var b = 2", Rule::Precedes(Box::new(precedes))); + test_found( + &[ + "var b = 2; var a = 1;", + "var b = 2; var a = 1", + "var b = 2\n var a = 1", + "{ var b = 2; var a = 1 }", + "function test() { var b = 2; var a = 1 }", + ], + &rule, + ); + test_not_found( + &[ + "var a = 1", + "var b = 2; var a = 2;", + "var a = 1; var b = 2;", + "var b = 2; alert(b); var a = 1;", + "{ var b = 2 } var a = 1;", + ], + &rule, + ); + } + + #[test] + fn test_follows_operator() { + let follows = Follows { + former: Rule::Pattern(Pattern::new("var b = 2", TS::Tsx)), + stop_by: StopBy::End, + }; + let rule = make_rule("var a = 1", Rule::Follows(Box::new(follows))); + test_found( + &[ + "var b = 2; var a = 1;", + "var b = 2; var a = 1", + "var b = 2; alert(b); var a = 1", + "var b = 2\n var a = 1", + "alert(b); var b = 2; var a = 1", + "{var b = 2; var a = 1;}", // inside block + ], + &rule, + ); + test_not_found( + &[ + "var a = 1", + "var b = 2", + "var a = 1; var b = 2;", + "var a = 1; alert(b) ;var b = 2;", + "var a = 1\n var b = 2;", + "{var b = 2;} var a = 1;", // inside block + ], + &rule, + ); + } + + #[test] + fn test_follows_immediate() { + let follows = Follows { + former: Rule::Pattern(Pattern::new("var b = 2", TS::Tsx)), + stop_by: StopBy::Neighbor, + }; + let rule = make_rule("var a = 1", Rule::Follows(Box::new(follows))); + test_found( + &[ + "var b = 2; var a = 1;", + "var b = 2; var a = 1", + "var b = 2\n var a = 1", + "alert(b); var b = 2; var a = 1", + "{var b = 2; var a = 1;}", // inside block + ], + &rule, + ); + test_not_found( + &[ + "var a = 1", + "var b = 2", + "var a = 1; var b = 2;", + "var a = 1; alert(b) ;var b = 2;", + "var a = 1\n var b = 2;", + "var b = 2; alert(b); var a = 1", // not immediate + "{var b = 2;} var a = 1;", // inside block + ], + &rule, + ); + } + + #[test] + fn test_has_rule() { + let has = Has { + stop_by: StopBy::End, + inner: Rule::Pattern(Pattern::new("var a = 1", TS::Tsx)), + field: None, + }; + let rule = make_rule("function test() { $$$ }", Rule::Has(Box::new(has))); + test_found( + &[ + "function test() { var a = 1 }", + "function test() { var a = 1; var b = 2 }", + "function test() { function nested() { var a = 1 } }", + "function test() { if (nested) { var a = 1 } }", + ], + &rule, + ); + test_not_found( + &[ + "var test = function () { var a = 2 }", + "function test() { var a = 2 }", + "function test() { let a = 1; var b = 2 }", + "if (test) { { var a = 1 } }", + ], + &rule, + ); + } + + #[test] + fn test_has_until_should_not_abort_prematurely() { + let has = Has { + stop_by: StopBy::Rule(Rule::Kind(KindMatcher::new( + "function_declaration", + TS::Tsx, + ))), + inner: Rule::Pattern(Pattern::new("var a = 1", TS::Tsx)), + field: None, + }; + let rule = make_rule("function test() { $$$ }", Rule::Has(Box::new(has))); + test_found( + &[ + "function test() { var a = 1}", + "function test() { function inner() { var a = 1 }; var a = 1}", + ], + &rule, + ); + test_not_found( + &[ + "function test() { var a = 2}", + "function test() { function inner() { var a = 1 }}", + ], + &rule, + ); + } + + #[test] + fn test_has_until_should_be_inclusive() { + let has = Has { + stop_by: StopBy::Rule(Rule::Kind(KindMatcher::new( + "function_declaration", + TS::Tsx, + ))), + inner: Rule::Pattern(Pattern::new("function inner() {$$$}", TS::Tsx)), + field: None, + }; + let rule = make_rule("function test() { $$$ }", Rule::Has(Box::new(has))); + test_found( + &[ + "function test() { function inner() { var a = 1 };}", + "function test() { var a = 1; function inner() { var a = 1 };}", + "function test() { if (false) { function inner() { var a = 1 };} }", + ], + &rule, + ); + test_not_found( + &[ + "function test() { var a = 2}", + "function test() { function bbb() { function inner() { var a = 1 } }}", + ], + &rule, + ); + } + + #[test] + fn test_has_immediate() { + let has = Has { + stop_by: StopBy::Neighbor, + inner: Rule::Pattern(Pattern::new("var a = 1", TS::Tsx)), + field: None, + }; + let rule = o::All::new(vec![ + Rule::Pattern(Pattern::new("{ $$$ }", TS::Tsx)), + Rule::Inside(Box::new(Inside { + outer: Rule::Pattern(Pattern::new("function test() { $$$ }", TS::Tsx)), + stop_by: StopBy::Neighbor, + field: None, + })), + Rule::Has(Box::new(has)), + ]); + test_found( + &[ + "function test() { var a = 1 }", + "function test() { var a = 1; var b = 2 }", + ], + &rule, + ); + test_not_found( + &[ + "var test = function () { var a = 2 }", + "function test() { var a = 2 }", + "function test() { let a = 1; var b = 2 }", + "if (test) { { var a = 1 } }", + // nested + "function test() { if (nested) { var a = 1 } }", + "function test() { function nested() { var a = 1 } }", + ], + &rule, + ); + } + + #[test] + fn test_inside_rule() { + let inside = Inside { + stop_by: StopBy::End, + outer: Rule::Pattern(Pattern::new("function test() { $$$ }", TS::Tsx)), + field: None, + }; + let rule = make_rule("var a = 1", Rule::Inside(Box::new(inside))); + test_found( + &[ + "function test() { var a = 1 }", + "function test() { var a = 1; var b = 2 }", + "function test() { function nested() { var a = 1 } }", + "function test() { if (nested) { var a = 1 } }", + ], + &rule, + ); + test_not_found( + &[ + "var test = function () { var a = 2 }", + "function test() { var a = 2 }", + "function test() { let a = 1; var b = 2 }", + "if (test) { { var a = 1 } }", + ], + &rule, + ); + } + + #[test] + fn test_inside_inclusive() { + let inside = Inside { + stop_by: StopBy::Rule(Rule::Kind(KindMatcher::new( + "function_declaration", + TS::Tsx, + ))), + outer: Rule::Pattern(Pattern::new("function test() { $$$ }", TS::Tsx)), + field: None, + }; + let rule = make_rule("var a = 1", Rule::Inside(Box::new(inside))); + test_found( + &[ + "function test() { var a = 1 }", + "function test() { var a = 1; var b = 2 }", + "function test() { if (nested) { var a = 1 } }", + "function test() { var b = function(nested) { var a = 1 } }", + ], + &rule, + ); + test_not_found( + &[ + "function test() { function nested() { var a = 1 } }", + "var test = function () { var a = 2 }", + "function test() { var a = 2 }", + "function test() { let a = 1; var b = 2 }", + ], + &rule, + ); + } + + #[test] + fn test_inside_immediate() { + let inside = Inside { + stop_by: StopBy::Neighbor, + outer: Rule::All(o::All::new(vec![ + Rule::Pattern(Pattern::new("{ $$$ }", TS::Tsx)), + Rule::Inside(Box::new(Inside { + outer: Rule::Pattern(Pattern::new("function test() { $$$ }", TS::Tsx)), + stop_by: StopBy::Neighbor, + field: None, + })), + ])), + field: None, + }; + let rule = make_rule("var a = 1", Rule::Inside(Box::new(inside))); + test_found( + &[ + "function test() { var a = 1 }", + "function test() { var a = 1; var b = 2 }", + ], + &rule, + ); + test_not_found( + &[ + "var test = function () { var a = 2 }", + "function test() { var a = 2 }", + "function test() { let a = 1; var b = 2 }", + "if (test) { { var a = 1 } }", + // nested + "function test() { function nested() { var a = 1 } }", + "function test() { if (nested) { var a = 1 } }", + ], + &rule, + ); + } + + #[test] + fn test_inside_field() { + let inside = Inside { + stop_by: StopBy::End, + outer: Rule::Kind(KindMatcher::new("for_statement", TS::Tsx)), + field: TS::Tsx.field_to_id("condition"), + }; + let rule = make_rule("a = 1", Rule::Inside(Box::new(inside))); + test_found(&["for (;a = 1;) {}"], &rule); + test_not_found(&["for (;; a = 1) {}"], &rule); + } + + #[test] + fn test_has_field() { + let has = Has { + stop_by: StopBy::End, + inner: Rule::Pattern(Pattern::new("a = 1", TS::Tsx)), + field: TS::Tsx.field_to_id("condition"), + }; + let rule = o::All::new(vec![ + Rule::Kind(KindMatcher::new("for_statement", TS::Tsx)), + Rule::Has(Box::new(has)), + ]); + test_found(&["for (;a = 1;) {}"], &rule); + test_not_found(&["for (;; a = 1) {}", "for (;;) { a = 1}"], &rule); + } + + #[test] + fn test_invalid_field() { + let env = DeserializeEnv::new(TS::Tsx); + let relation = Relation { + rule: crate::from_str("pattern: test").unwrap(), + stop_by: SerializableStopBy::End, + field: Some("invalid_field".to_string()), + }; + let inside = Inside::try_new(relation, &env); + assert!(inside.is_err()); + match inside { + Err(RuleSerializeError::InvalidField(_)) => {} + _ => panic!("expected InvalidField error"), + } + } + + #[test] + fn test_defined_vars() { + let precedes = Precedes { + later: Rule::Pattern(Pattern::new("var a = $A", TS::Tsx)), + stop_by: StopBy::Rule(Rule::Pattern(Pattern::new("var b = $B", TS::Tsx))), + }; + assert_eq!(precedes.defined_vars(), ["A", "B"].into_iter().collect()); + let follows = Follows { + former: Rule::Pattern(Pattern::new("var a = 123", TS::Tsx)), + stop_by: StopBy::Rule(Rule::Pattern(Pattern::new("var b = $B", TS::Tsx))), + }; + assert_eq!(follows.defined_vars(), ["B"].into_iter().collect()); + let inside = Inside { + stop_by: StopBy::Rule(Rule::Pattern(Pattern::new("var $C", TS::Tsx))), + outer: Rule::Pattern(Pattern::new("var a = $A", TS::Tsx)), + field: TS::Tsx.field_to_id("condition"), + }; + assert_eq!(inside.defined_vars(), ["A", "C"].into_iter().collect()); + let has = Has { + stop_by: StopBy::Rule(Rule::Kind(KindMatcher::new("for_statement", TS::Tsx))), + inner: Rule::Pattern(Pattern::new("var a = $A", TS::Tsx)), + field: TS::Tsx.field_to_id("condition"), + }; + assert_eq!(has.defined_vars(), ["A"].into_iter().collect()); + } +} diff --git a/crates/rule-engine/src/rule/stop_by.rs b/crates/rule-engine/src/rule/stop_by.rs new file mode 100644 index 0000000..a5c5381 --- /dev/null +++ b/crates/rule-engine/src/rule/stop_by.rs @@ -0,0 +1,259 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::deserialize_env::DeserializeEnv; +use crate::rule::{Rule, RuleSerializeError, SerializableRule}; + +use thread_ast_engine::language::Language; +use thread_ast_engine::{Doc, Node}; + +use schemars::JsonSchema; +use serde::de::{self, Deserializer, MapAccess, Visitor}; +use serde::{Deserialize, Serialize}; + +use std::fmt; +use thread_utils::RapidSet; + +// NB StopBy's JsonSchema is changed in xtask/schema.rs +// revise schema is easier than manually implementation +#[derive(Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum SerializableStopBy { + #[default] + Neighbor, + End, + Rule(Box), +} + +impl SerializableStopBy { + /// String key used for serializing the Neighbor variant + const NEIGHBOR_KEY: &str = "neighbor"; + /// String key used for serializing the End variant + const END_KEY: &str = "end"; +} + +struct StopByVisitor; +impl<'de> Visitor<'de> for StopByVisitor { + type Value = SerializableStopBy; + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("`neighbor`, `end` or a rule object") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + match value { + SerializableStopBy::NEIGHBOR_KEY => Ok(SerializableStopBy::Neighbor), + SerializableStopBy::END_KEY => Ok(SerializableStopBy::End), + v => Err(de::Error::custom(format!( + "unknown variant `{v}`, expected `{}`, `{}` or a rule object", + SerializableStopBy::NEIGHBOR_KEY, + SerializableStopBy::END_KEY, + ))), + } + } + + fn visit_map
(self, map: A) -> Result + where + A: MapAccess<'de>, + { + let rule = Deserialize::deserialize(de::value::MapAccessDeserializer::new(map))?; + Ok(SerializableStopBy::Rule(rule)) + } +} + +impl<'de> Deserialize<'de> for SerializableStopBy { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(StopByVisitor) + } +} + +impl Serialize for SerializableStopBy { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + SerializableStopBy::Neighbor => { + serializer.serialize_str(SerializableStopBy::NEIGHBOR_KEY) + } + SerializableStopBy::End => serializer.serialize_str(SerializableStopBy::END_KEY), + SerializableStopBy::Rule(rule) => rule.serialize(serializer), + } + } +} + +#[derive(Clone, Debug)] +pub enum StopBy { + Neighbor, + End, + Rule(Rule), +} + +impl StopBy { + pub(crate) fn try_from( + relation: SerializableStopBy, + env: &DeserializeEnv, + ) -> Result { + use SerializableStopBy as S; + Ok(match relation { + S::Neighbor => StopBy::Neighbor, + S::End => StopBy::End, + S::Rule(r) => StopBy::Rule(env.deserialize_rule(*r)?), + }) + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + match self { + StopBy::Rule(rule) => rule.defined_vars(), + StopBy::End => RapidSet::default(), + StopBy::Neighbor => RapidSet::default(), + } + } + + pub fn verify_util(&self) -> Result<(), RuleSerializeError> { + match self { + StopBy::Rule(rule) => rule.verify_util(), + StopBy::End => Ok(()), + StopBy::Neighbor => Ok(()), + } + } +} + +impl StopBy { + // TODO: document this monster method + pub(crate) fn find<'t, O, M, I, F, D>( + &self, + once: O, + multi: M, + mut finder: F, + ) -> Option> + where + D: Doc, + I: Iterator>, + O: FnOnce() -> Option>, + M: FnOnce() -> I, + F: FnMut(Node<'t, D>) -> Option>, + { + match self { + StopBy::Neighbor => finder(once()?), + StopBy::End => { + let mut iter = multi(); + iter.find_map(finder) + } + StopBy::Rule(stop) => { + let iter = multi(); + iter.take_while(inclusive_until(stop)).find_map(finder) + } + } + } +} + +fn inclusive_until<'t, D: Doc>(rule: &Rule) -> impl FnMut(&Node<'t, D>) -> bool + '_ { + let mut matched = false; + move |n| { + if matched { + false + } else { + matched = n.matches(rule); + true + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::test::TypeScript; + + #[test] + fn test_relational() { + let src = r" +inside: + pattern: class A {} + stopBy: neighbor +"; + let rule: SerializableRule = from_str(src).expect("cannot parse rule"); + let stop_by = rule.inside.unwrap().stop_by; + assert!(matches!(stop_by, SerializableStopBy::Neighbor)); + } + + fn to_stop_by(src: &str) -> Result { + from_str(src) + } + + #[test] + fn test_stop_by_ok() { + let stop = to_stop_by("'neighbor'").expect("cannot parse stopBy"); + assert!(matches!(stop, SerializableStopBy::Neighbor)); + let stop = to_stop_by("'end'").expect("cannot parse stopBy"); + assert!(matches!(stop, SerializableStopBy::End)); + let stop = to_stop_by("kind: some-kind").expect("cannot parse stopBy"); + assert!(matches!(stop, SerializableStopBy::Rule(_))); + } + + macro_rules! cast_err { + ($reg: expr) => { + match $reg { + Err(a) => a, + _ => panic!("non-matching variant"), + } + }; + } + + #[test] + fn test_stop_by_err() { + let err = cast_err!(to_stop_by("'ddd'")).to_string(); + assert!(err.contains("unknown variant")); + assert!(err.contains("ddd")); + let err = cast_err!(to_stop_by("pattern: 1233")); + assert!(err.to_string().contains("variant")); + } + + fn parse_stop_by(src: &str) -> StopBy { + let stop_by = to_stop_by(src).expect("cannot parse stopBy"); + StopBy::try_from(stop_by, &DeserializeEnv::new(TypeScript::Tsx)).expect("cannot convert") + } + + #[test] + fn test_stop_by_no_defined_vars() { + let stop_by = parse_stop_by("neighbor"); + assert!(stop_by.defined_vars().is_empty()); + let stop_by = parse_stop_by("end"); + assert!(stop_by.defined_vars().is_empty()); + } + + #[test] + fn test_stop_by_defined_vars() { + let stop_by = parse_stop_by("kind: class"); + assert_eq!(stop_by.defined_vars(), RapidSet::default()); + let stop_by = parse_stop_by("pattern: $A"); + assert_eq!(stop_by.defined_vars(), ["A"].into_iter().collect()); + } + + #[test] + fn test_serialization_deserialization_symmetry() { + let stop = to_stop_by("'neighbor'").expect("cannot parse stopBy"); + let serialized = serde_yaml::to_string(&stop).expect("cannot serialize stopBy"); + let deserialized = to_stop_by(&serialized).expect("cannot parse stopBy"); + assert!(matches!(deserialized, SerializableStopBy::Neighbor)); + + let stop = to_stop_by("'end'").expect("cannot parse stopBy"); + let serialized = serde_yaml::to_string(&stop).expect("cannot serialize stopBy"); + let deserialized = to_stop_by(&serialized).expect("cannot parse stopBy"); + assert!(matches!(deserialized, SerializableStopBy::End)); + + let stop = to_stop_by("kind: some-kind").expect("cannot parse stopBy"); + let serialized = serde_yaml::to_string(&stop).expect("cannot serialize stopBy"); + let deserialized = to_stop_by(&serialized).expect("cannot parse stopBy"); + assert!(matches!(deserialized, SerializableStopBy::Rule(_))); + } +} diff --git a/crates/rule-engine/src/rule_collection.rs b/crates/rule-engine/src/rule_collection.rs new file mode 100644 index 0000000..7c0920c --- /dev/null +++ b/crates/rule-engine/src/rule_collection.rs @@ -0,0 +1,291 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::{RuleConfig, Severity}; +use globset::{Glob, GlobSet, GlobSetBuilder}; +use std::path::Path; +use thread_ast_engine::language::Language; + +/// RuleBucket stores rules of the same language id. +/// Rules for different language will stay in separate buckets. +pub struct RuleBucket { + rules: Vec>, + lang: L, +} + +impl RuleBucket { + fn new(lang: L) -> Self { + Self { + rules: vec![], + lang, + } + } + pub fn add(&mut self, rule: RuleConfig) { + self.rules.push(rule); + } +} + +struct ContingentRule { + rule: RuleConfig, + files_globs: Option, + ignore_globs: Option, +} + +fn build_glob_set(paths: &Vec) -> Result { + let mut builder = GlobSetBuilder::new(); + for path in paths { + builder.add(Glob::new(path)?); + } + builder.build() +} + +impl TryFrom> for ContingentRule +where + L: Language, +{ + type Error = globset::Error; + fn try_from(rule: RuleConfig) -> Result { + let files_globs = rule.files.as_ref().map(build_glob_set).transpose()?; + let ignore_globs = rule.ignores.as_ref().map(build_glob_set).transpose()?; + Ok(Self { + rule, + files_globs, + ignore_globs, + }) + } +} + +impl ContingentRule { + pub fn matches_path>(&self, path: P) -> bool { + if let Some(ignore_globs) = &self.ignore_globs { + if ignore_globs.is_match(&path) { + return false; + } + } + if let Some(files_globs) = &self.files_globs { + return files_globs.is_match(path); + } + true + } +} + +/// A collection of rules to run one round of scanning. +/// Rules will be grouped together based on their language, path globbing and pattern rule. +pub struct RuleCollection { + // use vec since we don't have many languages + /// a list of rule buckets grouped by languages. + /// Tenured rules will always run against a file of that language type. + tenured: Vec>, + /// contingent rules will run against a file if it matches file/ignore glob. + contingent: Vec>, +} + +impl RuleCollection { + pub fn try_new(configs: Vec>) -> Result { + let mut tenured = vec![]; + let mut contingent = vec![]; + for config in configs { + if matches!(config.severity, Severity::Off) { + continue; + } else if config.files.is_none() && config.ignores.is_none() { + Self::add_tenured_rule(&mut tenured, config); + } else { + contingent.push(ContingentRule::try_from(config)?); + } + } + Ok(Self { + tenured, + contingent, + }) + } + + pub fn get_rule_from_lang(&self, path: &Path, lang: L) -> Vec<&RuleConfig> { + let mut all_rules = vec![]; + for rule in &self.tenured { + if rule.lang == lang { + all_rules = rule.rules.iter().collect(); + break; + } + } + all_rules.extend(self.contingent.iter().filter_map(|cont| { + if cont.rule.language == lang && cont.matches_path(path) { + Some(&cont.rule) + } else { + None + } + })); + all_rules + } + + pub fn for_path>(&self, path: P) -> Vec<&RuleConfig> { + let path = path.as_ref(); + let Some(lang) = L::from_path(path) else { + return vec![]; + }; + let mut ret = self.get_rule_from_lang(path, lang); + ret.sort_unstable_by_key(|r| &r.id); + ret + } + + pub fn get_rule(&self, id: &str) -> Option<&RuleConfig> { + for rule in &self.tenured { + for r in &rule.rules { + if r.id == id { + return Some(r); + } + } + } + for rule in &self.contingent { + if rule.rule.id == id { + return Some(&rule.rule); + } + } + None + } + + pub fn total_rule_count(&self) -> usize { + let mut ret = self.tenured.iter().map(|bucket| bucket.rules.len()).sum(); + ret += self.contingent.len(); + ret + } + + pub fn for_each_rule(&self, mut f: impl FnMut(&RuleConfig)) { + for bucket in &self.tenured { + for rule in &bucket.rules { + f(rule); + } + } + for rule in &self.contingent { + f(&rule.rule); + } + } + + fn add_tenured_rule(tenured: &mut Vec>, rule: RuleConfig) { + let lang = rule.language.clone(); + for bucket in tenured.iter_mut() { + if bucket.lang == lang { + bucket.add(rule); + return; + } + } + let mut bucket = RuleBucket::new(lang); + bucket.add(rule); + tenured.push(bucket); + } +} + +impl Default for RuleCollection { + fn default() -> Self { + Self { + tenured: vec![], + contingent: vec![], + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::GlobalRules; + use crate::from_yaml_string; + use crate::test::TypeScript; + + fn make_rule(files: &str) -> RuleCollection { + let globals = GlobalRules::default(); + let rule_config = from_yaml_string( + &format!( + r" +id: test +message: test rule +severity: info +language: Tsx +rule: + all: [kind: number] +{files}" + ), + &globals, + ) + .unwrap() + .pop() + .unwrap(); + RuleCollection::try_new(vec![rule_config]).expect("should parse") + } + + fn assert_match_path(collection: &RuleCollection, path: &str) { + let rules = collection.for_path(path); + assert_eq!(rules.len(), 1); + assert_eq!(rules[0].id, "test"); + } + + fn assert_ignore_path(collection: &RuleCollection, path: &str) { + let rules = collection.for_path(path); + assert!(rules.is_empty()); + } + + #[test] + fn test_ignore_rule() { + let src = r#" +ignores: + - ./manage.py + - "**/test*" +"#; + let collection = make_rule(src); + assert_ignore_path(&collection, "./manage.py"); + assert_ignore_path(&collection, "./src/test.py"); + assert_match_path(&collection, "./src/app.py"); + } + + #[test] + fn test_files_rule() { + let src = r#" +files: + - ./manage.py + - "**/test*" +"#; + let collection = make_rule(src); + assert_match_path(&collection, "./manage.py"); + assert_match_path(&collection, "./src/test.py"); + assert_ignore_path(&collection, "./src/app.py"); + } + + #[test] + fn test_files_with_ignores_rule() { + let src = r#" +files: + - ./src/**/*.py +ignores: + - ./src/excluded/*.py +"#; + let collection = make_rule(src); + assert_match_path(&collection, "./src/test.py"); + assert_match_path(&collection, "./src/some_folder/test.py"); + assert_ignore_path(&collection, "./src/excluded/app.py"); + } + + #[test] + fn test_rule_collection_get_contingent_rule() { + let src = r#" +files: + - ./manage.py + - "**/test*" +"#; + let collection = make_rule(src); + assert!(collection.get_rule("test").is_some()); + } + + #[test] + fn test_rule_collection_get_tenured_rule() { + let src = r#""#; + let collection = make_rule(src); + assert!(collection.get_rule("test").is_some()); + } + + #[test] + #[ignore] + fn test_rules_for_path() { + todo!() + } +} diff --git a/crates/rule-engine/src/rule_config.rs b/crates/rule-engine/src/rule_config.rs new file mode 100644 index 0000000..d2886a8 --- /dev/null +++ b/crates/rule-engine/src/rule_config.rs @@ -0,0 +1,776 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::GlobalRules; + +use crate::check_var::{CheckHint, check_rewriters_in_transform}; +use crate::fixer::Fixer; +use crate::label::{Label, LabelConfig, get_default_labels, get_labels_from_config}; +use crate::rule::DeserializeEnv; +use crate::rule_core::{RuleCore, RuleCoreError, SerializableRuleCore}; + +use thread_ast_engine::language::Language; +use thread_ast_engine::replacer::Replacer; +use thread_ast_engine::source::Content; +use thread_ast_engine::{Doc, Matcher, NodeMatch}; + +use schemars::{JsonSchema, Schema, SchemaGenerator}; +use serde::{Deserialize, Serialize}; +use serde_yaml::Error as YamlError; +use serde_yaml::{Deserializer, with::singleton_map_recursive::deserialize}; +use thiserror::Error; + +use std::borrow::Cow; +use std::ops::{Deref, DerefMut}; +use thread_utils::RapidMap; + +#[derive(Serialize, Deserialize, Clone, Debug, Default, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum Severity { + #[default] + /// A kind reminder for code with potential improvement. + Hint, + /// A suggestion that code can be improved or optimized. + Info, + /// A warning that code might produce bugs or does not follow best practice. + Warning, + /// An error that code produces bugs or has logic errors. + Error, + /// Turns off the rule. + Off, +} + +#[derive(Error, Debug)] +pub enum RuleConfigError { + #[error("Fail to parse yaml as RuleConfig")] + Yaml(#[from] YamlError), + #[error("Fail to parse yaml as Rule.")] + Core(#[from] RuleCoreError), + #[error("Rewriter rule `{1}` is not configured correctly.")] + Rewriter(#[source] RuleCoreError, String), + #[error("Undefined rewriter `{0}` used in transform.")] + UndefinedRewriter(String), + #[error("Rewriter rule `{0}` should have `fix`.")] + NoFixInRewriter(String), + #[error("Label meta-variable `{0}` must be defined in `rule` or `constraints`.")] + LabelVariable(String), + #[error("Rule must specify a set of AST kinds to match. Try adding `kind` rule.")] + MissingPotentialKinds, +} + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +pub struct SerializableRewriter { + #[serde(flatten)] + pub core: SerializableRuleCore, + /// Unique, descriptive identifier, e.g., no-unused-variable + pub id: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +pub struct SerializableRuleConfig { + #[serde(flatten)] + pub core: SerializableRuleCore, + /// Unique, descriptive identifier, e.g., no-unused-variable + pub id: String, + /// Specify the language to parse and the file extension to include in matching. + pub language: L, + /// Rewrite rules for `rewrite` transformation + pub rewriters: Option>, + /// Main message highlighting why this rule fired. It should be single line and concise, + /// but specific enough to be understood without additional context. + #[serde(default)] + pub message: String, + /// Additional notes to elaborate the message and provide potential fix to the issue. + /// `notes` can contain markdown syntax, but it cannot reference meta-variables. + pub note: Option, + /// One of: hint, info, warning, or error + #[serde(default)] + pub severity: Severity, + /// Custom label dictionary to configure reporting. Key is the meta-variable name and + /// value is the label message and label style. + pub labels: Option>, + /// Glob patterns to specify that the rule only applies to matching files + pub files: Option>, + /// Glob patterns that exclude rules from applying to files + pub ignores: Option>, + /// Documentation link to this rule + pub url: Option, + /// Extra information for the rule + pub metadata: Option, +} + +/// A trivial wrapper around a FastMap to work around +/// the limitation of `serde_yaml::Value` not implementing `JsonSchema`. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Metadata(RapidMap); + +impl JsonSchema for Metadata { + fn schema_name() -> Cow<'static, str> { + "Metadata".into() + } + fn schema_id() -> Cow<'static, str> { + concat!(module_path!(), "::Metadata").into() + } + fn json_schema(generator: &mut SchemaGenerator) -> Schema { + schemars::json_schema!({ + "type": "object", + "additionalProperties": generator.subschema_for::() + }) + } +} + +impl SerializableRuleConfig { + pub fn get_matcher(&self, globals: &GlobalRules) -> Result { + // every RuleConfig has one rewriters, and the rewriter is shared between sub-rules + // all RuleConfigs has one common globals + // every sub-rule has one util + let env = DeserializeEnv::new(self.language.clone()).with_globals(globals); + let rule = self.core.get_matcher(env.clone())?; + self.register_rewriters(&rule, env)?; + self.check_labels(&rule)?; + Ok(rule) + } + + fn check_labels(&self, rule: &RuleCore) -> Result<(), RuleConfigError> { + let Some(labels) = &self.labels else { + return Ok(()); + }; + // labels var must be vars with node, transform var cannot be used + let vars = rule.defined_node_vars(); + for var in labels.keys() { + if !vars.contains(var.as_str()) { + return Err(RuleConfigError::LabelVariable(var.clone())); + } + } + Ok(()) + } + + fn register_rewriters( + &self, + rule: &RuleCore, + env: DeserializeEnv, + ) -> Result<(), RuleConfigError> { + let Some(ser) = &self.rewriters else { + return Ok(()); + }; + let reg = &env.registration; + let vars = rule.defined_vars(); + for val in ser { + if val.core.fix.is_none() { + return Err(RuleConfigError::NoFixInRewriter(val.id.clone())); + } + let rewriter = val + .core + .get_matcher_with_hint(env.clone(), CheckHint::Rewriter(&vars)) + .map_err(|e| RuleConfigError::Rewriter(e, val.id.clone()))?; + reg.insert_rewriter(&val.id, rewriter); + } + check_rewriters_in_transform(rule, reg.get_rewriters())?; + Ok(()) + } +} + +impl Deref for SerializableRuleConfig { + type Target = SerializableRuleCore; + fn deref(&self) -> &Self::Target { + &self.core + } +} + +impl DerefMut for SerializableRuleConfig { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.core + } +} + +#[derive(Clone, Debug)] +pub struct RuleConfig { + inner: SerializableRuleConfig, + pub matcher: RuleCore, +} + +impl RuleConfig { + pub fn try_from( + inner: SerializableRuleConfig, + globals: &GlobalRules, + ) -> Result { + let matcher = inner.get_matcher(globals)?; + if matcher.potential_kinds().is_none() { + return Err(RuleConfigError::MissingPotentialKinds); + } + Ok(Self { inner, matcher }) + } + + pub fn deserialize<'de>( + deserializer: Deserializer<'de>, + globals: &GlobalRules, + ) -> Result + where + L: Deserialize<'de>, + { + let inner: SerializableRuleConfig = deserialize(deserializer)?; + Self::try_from(inner, globals) + } + + pub fn get_message(&self, node: &NodeMatch) -> String + where + D: Doc, + { + let env = self.matcher.get_env(self.language.clone()); + let parsed = + Fixer::with_transform(&self.message, &env, &self.transform).expect("should work"); + let bytes = parsed.generate_replacement(node); + ::encode_bytes(&bytes).to_string() + } + pub fn get_fixer(&self) -> Result, RuleConfigError> { + if let Some(fix) = &self.fix { + let env = self.matcher.get_env(self.language.clone()); + let parsed = Fixer::parse(fix, &env, &self.transform).map_err(RuleCoreError::Fixer)?; + Ok(parsed) + } else { + Ok(vec![]) + } + } + pub fn get_labels<'t, D: Doc>(&self, node: &NodeMatch<'t, D>) -> Vec> { + if let Some(labels_config) = &self.labels { + get_labels_from_config(labels_config, node) + } else { + get_default_labels(node) + } + } +} +impl Deref for RuleConfig { + type Target = SerializableRuleConfig; + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for RuleConfig { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::rule::SerializableRule; + use crate::test::TypeScript; + use thread_ast_engine::tree_sitter::LanguageExt; + + fn ts_rule_config(rule: SerializableRule) -> SerializableRuleConfig { + let core = SerializableRuleCore { + rule, + constraints: None, + transform: None, + utils: None, + fix: None, + }; + SerializableRuleConfig { + core, + id: "".into(), + language: TypeScript::Tsx, + rewriters: None, + message: "".into(), + note: None, + severity: Severity::Hint, + labels: None, + files: None, + ignores: None, + url: None, + metadata: None, + } + } + + #[test] + fn test_rule_message() { + let globals = GlobalRules::default(); + let rule = from_str("pattern: class $A {}").expect("cannot parse rule"); + let mut config = ts_rule_config(rule); + config.id = "test".into(); + config.message = "Found $A".into(); + let config = RuleConfig::try_from(config, &Default::default()).expect("should work"); + let grep = TypeScript::Tsx.ast_grep("class TestClass {}"); + let node_match = grep + .root() + .find(config.get_matcher(&globals).unwrap()) + .expect("should find match"); + assert_eq!(config.get_message(&node_match), "Found TestClass"); + } + + #[test] + fn test_augmented_rule() { + let globals = GlobalRules::default(); + let rule = from_str( + " +pattern: console.log($A) +inside: + stopBy: end + pattern: function test() { $$$ } +", + ) + .expect("should parse"); + let config = ts_rule_config(rule); + let grep = TypeScript::Tsx.ast_grep("console.log(1)"); + let matcher = config.get_matcher(&globals).unwrap(); + assert!(grep.root().find(&matcher).is_none()); + let grep = TypeScript::Tsx.ast_grep("function test() { console.log(1) }"); + assert!(grep.root().find(&matcher).is_some()); + } + + #[test] + fn test_multiple_augment_rule() { + let globals = GlobalRules::default(); + let rule = from_str( + " +pattern: console.log($A) +inside: + stopBy: end + pattern: function test() { $$$ } +has: + stopBy: end + pattern: '123' +", + ) + .expect("should parse"); + let config = ts_rule_config(rule); + let grep = TypeScript::Tsx.ast_grep("function test() { console.log(1) }"); + let matcher = config.get_matcher(&globals).unwrap(); + assert!(grep.root().find(&matcher).is_none()); + let grep = TypeScript::Tsx.ast_grep("function test() { console.log(123) }"); + assert!(grep.root().find(&matcher).is_some()); + } + + #[test] + fn test_rule_env() { + let globals = GlobalRules::default(); + let rule = from_str( + " +all: + - pattern: console.log($A) + - inside: + stopBy: end + pattern: function $B() {$$$} +", + ) + .expect("should parse"); + let config = ts_rule_config(rule); + let grep = TypeScript::Tsx.ast_grep("function test() { console.log(1) }"); + let node_match = grep + .root() + .find(config.get_matcher(&globals).unwrap()) + .expect("should found"); + let env = node_match.get_env(); + let a = env.get_match("A").expect("should exist").text(); + assert_eq!(a, "1"); + let b = env.get_match("B").expect("should exist").text(); + assert_eq!(b, "test"); + } + + #[test] + fn test_transform() { + let globals = GlobalRules::default(); + let rule = from_str("pattern: console.log($A)").expect("should parse"); + let mut config = ts_rule_config(rule); + let transform = from_str( + " +B: + substring: + source: $A + startChar: 1 + endChar: -1 +", + ) + .expect("should parse"); + config.transform = Some(transform); + let grep = TypeScript::Tsx.ast_grep("function test() { console.log(123) }"); + let node_match = grep + .root() + .find(config.get_matcher(&globals).unwrap()) + .expect("should found"); + let env = node_match.get_env(); + let a = env.get_match("A").expect("should exist").text(); + assert_eq!(a, "123"); + let b = env.get_transformed("B").expect("should exist"); + assert_eq!(b, b"2"); + } + + fn get_matches_config() -> SerializableRuleConfig { + let rule = from_str( + " +matches: test-rule +", + ) + .unwrap(); + let utils = from_str( + " +test-rule: + pattern: some($A) +", + ) + .unwrap(); + let mut ret = ts_rule_config(rule); + ret.utils = Some(utils); + ret + } + + #[test] + fn test_utils_rule() { + let globals = GlobalRules::default(); + let config = get_matches_config(); + let matcher = config.get_matcher(&globals).unwrap(); + let grep = TypeScript::Tsx.ast_grep("some(123)"); + assert!(grep.root().find(&matcher).is_some()); + let grep = TypeScript::Tsx.ast_grep("some()"); + assert!(grep.root().find(&matcher).is_none()); + } + #[test] + fn test_get_fixer() { + let globals = GlobalRules::default(); + let mut config = get_matches_config(); + config.fix = Some(from_str("string!!").unwrap()); + let rule = RuleConfig::try_from(config, &globals).unwrap(); + let fixer = rule.get_fixer().unwrap().remove(0); + let grep = TypeScript::Tsx.ast_grep("some(123)"); + let nm = grep.root().find(&rule.matcher).unwrap(); + let replacement = fixer.generate_replacement(&nm); + assert_eq!(String::from_utf8_lossy(&replacement), "string!!"); + } + + #[test] + fn test_add_rewriters() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +transform: + B: + rewrite: + rewriters: [re] + source: $A +rewriters: +- id: re + rule: {kind: number} + fix: yjsnp + ", + ) + .expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("work"); + let grep = TypeScript::Tsx.ast_grep("a = 123"); + let nm = grep.root().find(&rule.matcher).unwrap(); + let b = nm.get_env().get_transformed("B").expect("should have"); + assert_eq!(String::from_utf8_lossy(b), "yjsnp"); + } + + #[test] + fn test_rewriters_access_utils() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +utils: + num: { kind: number } +transform: + B: + rewrite: + rewriters: [re] + source: $A +rewriters: +- id: re + rule: {matches: num, pattern: $NOT} + fix: yjsnp + ", + ) + .expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("work"); + let grep = TypeScript::Tsx.ast_grep("a = 456"); + let nm = grep.root().find(&rule.matcher).unwrap(); + let b = nm.get_env().get_transformed("B").expect("should have"); + assert!(nm.get_env().get_match("NOT").is_none()); + assert_eq!(String::from_utf8_lossy(b), "yjsnp"); + } + + #[test] + fn test_rewriter_utils_should_not_pollute_registration() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {matches: num} +language: Tsx +transform: + B: + rewrite: + rewriters: [re] + source: $B +rewriters: +- id: re + rule: {matches: num} + fix: yjsnp + utils: + num: { kind: number } + ", + ) + .expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + assert!(matches!(ret, Err(RuleConfigError::Core(_)))); + } + + #[test] + fn test_rewriter_should_have_fix() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {kind: number} +language: Tsx +rewriters: +- id: wrong + rule: {matches: num}", + ) + .expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + match ret { + Err(RuleConfigError::NoFixInRewriter(name)) => assert_eq!(name, "wrong"), + _ => panic!("unexpected error"), + } + } + + #[test] + fn test_utils_in_rewriter_should_work() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +transform: + B: + rewrite: + rewriters: [re] + source: $A +rewriters: +- id: re + rule: {matches: num} + fix: yjsnp + utils: + num: { kind: number } + ", + ) + .expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("work"); + let grep = TypeScript::Tsx.ast_grep("a = 114514"); + let nm = grep.root().find(&rule.matcher).unwrap(); + let b = nm.get_env().get_transformed("B").expect("should have"); + assert_eq!(String::from_utf8_lossy(b), "yjsnp"); + } + + #[test] + fn test_use_rewriter_recursive() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +transform: + B: { rewrite: { rewriters: [re], source: $A } } +rewriters: +- id: handle-num + rule: {regex: '114'} + fix: '1919810' +- id: re + rule: {kind: number, pattern: $A} + transform: + B: { rewrite: { rewriters: [handle-num], source: $A } } + fix: $B + ", + ) + .expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("work"); + let grep = TypeScript::Tsx.ast_grep("a = 114514"); + let nm = grep.root().find(&rule.matcher).unwrap(); + let b = nm.get_env().get_transformed("B").expect("should have"); + assert_eq!(String::from_utf8_lossy(b), "1919810"); + } + + fn make_undefined_error(src: &str) -> String { + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let err = RuleConfig::try_from(rule, &Default::default()); + match err { + Err(RuleConfigError::UndefinedRewriter(name)) => name, + _ => panic!("unexpected parsing result"), + } + } + + #[test] + fn test_undefined_rewriter() { + let undefined = make_undefined_error( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +transform: + B: { rewrite: { rewriters: [not-defined], source: $A } } +rewriters: +- id: re + rule: {kind: number, pattern: $A} + fix: hah + ", + ); + assert_eq!(undefined, "not-defined"); + } + #[test] + fn test_wrong_rewriter() { + let rule: SerializableRuleConfig = from_str( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +rewriters: +- id: wrong + rule: {kind: '114'} + fix: '1919810' + ", + ) + .expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + match ret { + Err(RuleConfigError::Rewriter(_, name)) => assert_eq!(name, "wrong"), + _ => panic!("unexpected error"), + } + } + + #[test] + fn test_undefined_rewriter_in_transform() { + let undefined = make_undefined_error( + r" +id: test +rule: {pattern: 'a = $A'} +language: Tsx +transform: + B: { rewrite: { rewriters: [re], source: $A } } +rewriters: +- id: re + rule: {kind: number, pattern: $A} + transform: + C: { rewrite: { rewriters: [nested-undefined], source: $A } } + fix: hah + ", + ); + assert_eq!(undefined, "nested-undefined"); + } + + #[test] + fn test_rewriter_use_upper_var() { + let src = r" +id: test +rule: {pattern: '$B = $A'} +language: Tsx +transform: + D: { rewrite: { rewriters: [re], source: $A } } +rewriters: +- id: re + rule: {kind: number, pattern: $C} + fix: $B.$C + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + assert!(ret.is_ok()); + } + + #[test] + fn test_rewriter_use_undefined_var() { + let src = r" +id: test +rule: {pattern: '$B = $A'} +language: Tsx +transform: + B: { rewrite: { rewriters: [re], source: $A } } +rewriters: +- id: re + rule: {kind: number, pattern: $C} + fix: $D.$C + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + assert!(ret.is_err()); + } + + #[test] + fn test_get_message_transform() { + let src = r" +id: test-rule +language: Tsx +rule: { kind: string, pattern: $ARG } +transform: + TEST: { replace: { replace: 'a', by: 'b', source: $ARG, } } +message: $TEST + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("should work"); + let grep = TypeScript::Tsx.ast_grep("a = '123'"); + let nm = grep.root().find(&rule.matcher).unwrap(); + assert_eq!(rule.get_message(&nm), "'123'"); + } + + #[test] + fn test_get_message_transform_string() { + let src = r" +id: test-rule +language: Tsx +rule: { kind: string, pattern: $ARG } +transform: + TEST: replace($ARG, replace=a, by=b) +message: $TEST + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("should work"); + let grep = TypeScript::Tsx.ast_grep("a = '123'"); + let nm = grep.root().find(&rule.matcher).unwrap(); + assert_eq!(rule.get_message(&nm), "'123'"); + } + + #[test] + fn test_complex_metadata() { + let src = r" +id: test-rule +language: Tsx +rule: { kind: string } +metadata: + test: [1, 2, 3] + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let rule = RuleConfig::try_from(rule, &Default::default()).expect("should work"); + let grep = TypeScript::Tsx.ast_grep("a = '123'"); + let nm = grep.root().find(&rule.matcher); + assert!(nm.is_some()); + } + + #[test] + fn test_label() { + let src = r" +id: test-rule +language: Tsx +rule: { pattern: Some($A) } +labels: + A: { style: primary, message: 'var label' } + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + assert!(ret.is_ok()); + let src = r" +id: test-rule +language: Tsx +rule: { pattern: Some($A) } +labels: + B: { style: primary, message: 'var label' } + "; + let rule: SerializableRuleConfig = from_str(src).expect("should parse"); + let ret = RuleConfig::try_from(rule, &Default::default()); + assert!(matches!(ret, Err(RuleConfigError::LabelVariable(_)))); + } +} diff --git a/crates/rule-engine/src/rule_core.rs b/crates/rule-engine/src/rule_core.rs new file mode 100644 index 0000000..0f7702f --- /dev/null +++ b/crates/rule-engine/src/rule_core.rs @@ -0,0 +1,460 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use crate::DeserializeEnv; +use crate::check_var::{CheckHint, check_rule_with_hint}; +use crate::fixer::{Fixer, FixerError, SerializableFixer}; +use crate::rule::Rule; +use crate::rule::referent_rule::RuleRegistration; +use crate::rule::{RuleSerializeError, SerializableRule}; +use crate::transform::{Transform, TransformError, Transformation}; + +use serde::{Deserialize, Serialize}; +use serde_yaml::Error as YamlError; +use thread_ast_engine::language::Language; +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::{Doc, Matcher, Node}; + +use bit_set::BitSet; +use schemars::JsonSchema; +use thiserror::Error; + +use std::borrow::Cow; +use std::ops::Deref; +use thread_utils::{RapidMap, RapidSet}; + +#[derive(Error, Debug)] +pub enum RuleCoreError { + #[error("Fail to parse yaml as RuleConfig")] + Yaml(#[from] YamlError), + #[error("`utils` is not configured correctly.")] + Utils(#[source] RuleSerializeError), + #[error("`rule` is not configured correctly.")] + Rule(#[from] RuleSerializeError), + #[error("`constraints` is not configured correctly.")] + Constraints(#[source] RuleSerializeError), + #[error("`transform` is not configured correctly.")] + Transform(#[from] TransformError), + #[error("`fix` pattern is invalid.")] + Fixer(#[from] FixerError), + #[error("Undefined meta var `{0}` used in `{1}`.")] + UndefinedMetaVar(String, &'static str), +} + +type RResult = std::result::Result; + +/// Used for global rules, rewriters, and pyo3/napi +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +pub struct SerializableRuleCore { + /// A rule object to find matching AST nodes + pub rule: SerializableRule, + /// Additional meta variables pattern to filter matching + pub constraints: Option>, + /// Utility rules that can be used in `matches` + pub utils: Option>, + /// A dictionary for metavariable manipulation. Dict key is the new variable name. + /// Dict value is a [transformation] that specifies how meta var is processed. + /// See [transformation doc](https://ast-grep.github.io/reference/yaml/transformation.html). + pub transform: Option>, + /// A pattern string or a FixConfig object to auto fix the issue. + /// It can reference metavariables appeared in rule. + /// See details in fix [object reference](https://ast-grep.github.io/reference/yaml/fix.html#fixconfig). + pub fix: Option, +} + +impl SerializableRuleCore { + /// This function assumes env's local is empty. + fn get_deserialize_env( + &self, + env: DeserializeEnv, + ) -> RResult> { + if let Some(utils) = &self.utils { + let env = env.with_utils(utils).map_err(RuleCoreError::Utils)?; + Ok(env) + } else { + Ok(env) + } + } + + fn get_constraints( + &self, + env: &DeserializeEnv, + ) -> RResult> { + let mut constraints = RapidMap::default(); + let Some(serde_cons) = &self.constraints else { + return Ok(constraints); + }; + for (key, ser) in serde_cons { + let constraint = env + .deserialize_rule(ser.clone()) + .map_err(RuleCoreError::Constraints)?; + constraints.insert(key.to_string(), constraint); + } + Ok(constraints) + } + + fn get_fixer(&self, env: &DeserializeEnv) -> RResult> { + if let Some(fix) = &self.fix { + let parsed = Fixer::parse(fix, env, &self.transform)?; + Ok(parsed) + } else { + Ok(vec![]) + } + } + + fn get_matcher_from_env(&self, env: &DeserializeEnv) -> RResult { + let rule = env.deserialize_rule(self.rule.clone())?; + let constraints = self.get_constraints(env)?; + let transform = self + .transform + .as_ref() + .map(|t| Transform::deserialize(t, env)) + .transpose()?; + let fixer = self.get_fixer(env)?; + Ok(RuleCore::new(rule) + .with_matchers(constraints) + .with_registration(env.registration.clone()) + .with_transform(transform) + .with_fixer(fixer)) + } + + pub fn get_matcher(&self, env: DeserializeEnv) -> RResult { + self.get_matcher_with_hint(env, CheckHint::Normal) + } + + pub(crate) fn get_matcher_with_hint( + &self, + env: DeserializeEnv, + hint: CheckHint, + ) -> RResult { + let env = self.get_deserialize_env(env)?; + let ret = self.get_matcher_from_env(&env)?; + check_rule_with_hint( + &ret.rule, + &ret.registration, + &ret.constraints, + &ret.transform, + &ret.fixer, + hint, + )?; + Ok(ret) + } +} + +#[derive(Clone, Debug)] +pub struct RuleCore { + rule: Rule, + constraints: RapidMap, + kinds: Option, + pub(crate) transform: Option, + pub fixer: Vec, + // this is required to hold util rule reference + registration: RuleRegistration, +} + +impl RuleCore { + #[inline] + pub fn new(rule: Rule) -> Self { + let kinds = rule.potential_kinds(); + Self { + rule, + kinds, + ..Default::default() + } + } + + #[inline] + pub fn with_matchers(self, constraints: RapidMap) -> Self { + Self { + constraints, + ..self + } + } + + #[inline] + pub fn with_registration(self, registration: RuleRegistration) -> Self { + Self { + registration, + ..self + } + } + + #[inline] + pub fn with_transform(self, transform: Option) -> Self { + Self { transform, ..self } + } + + #[inline] + pub fn with_fixer(self, fixer: Vec) -> Self { + Self { fixer, ..self } + } + + pub fn get_env(&self, lang: L) -> DeserializeEnv { + DeserializeEnv { + lang, + registration: self.registration.clone(), + } + } + /// Get the meta variables that have real ast node matches + /// that is, meta vars defined in the rules and constraints + pub(crate) fn defined_node_vars(&self) -> RapidSet<&str> { + let mut ret = self.rule.defined_vars(); + for v in self.registration.get_local_util_vars() { + ret.insert(v); + } + for constraint in self.constraints.values() { + for var in constraint.defined_vars() { + ret.insert(var); + } + } + ret + } + + pub fn defined_vars(&self) -> RapidSet<&str> { + let mut ret = self.defined_node_vars(); + if let Some(trans) = &self.transform { + for key in trans.keys() { + ret.insert(key); + } + } + ret + } + + pub(crate) fn do_match<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + enclosing_env: Option<&MetaVarEnv<'tree, D>>, + ) -> Option> { + if let Some(kinds) = &self.kinds { + if !kinds.contains(node.kind_id().into()) { + return None; + } + } + let ret = self.rule.match_node_with_env(node, env)?; + if !env.to_mut().match_constraints(&self.constraints) { + return None; + } + if let Some(trans) = &self.transform { + let rewriters = self.registration.get_rewriters(); + let env = env.to_mut(); + if let Some(enclosing) = enclosing_env { + trans.apply_transform(env, rewriters, enclosing); + } else { + let enclosing = env.clone(); + trans.apply_transform(env, rewriters, &enclosing); + }; + } + Some(ret) + } +} +impl Deref for RuleCore { + type Target = Rule; + fn deref(&self) -> &Self::Target { + &self.rule + } +} + +impl Default for RuleCore { + #[inline] + fn default() -> Self { + Self { + rule: Rule::default(), + constraints: RapidMap::default(), + kinds: None, + transform: None, + fixer: vec![], + registration: RuleRegistration::default(), + } + } +} + +impl Matcher for RuleCore { + fn match_node_with_env<'tree, D: Doc>( + &self, + node: Node<'tree, D>, + env: &mut Cow>, + ) -> Option> { + self.do_match(node, env, None) + } + + fn potential_kinds(&self) -> Option { + self.rule.potential_kinds() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::rule::referent_rule::{ReferentRule, ReferentRuleError}; + use crate::test::TypeScript; + use thread_ast_engine::matcher::{Pattern, RegexMatcher}; + use thread_ast_engine::tree_sitter::LanguageExt; + + fn get_matcher(src: &str) -> RResult { + let env = DeserializeEnv::new(TypeScript::Tsx); + let rule: SerializableRuleCore = from_str(src).expect("should word"); + rule.get_matcher(env) + } + + #[test] + fn test_rule_error() { + let ret = get_matcher(r"rule: {kind: bbb}"); + assert!(matches!(ret, Err(RuleCoreError::Rule(_)))); + } + + #[test] + fn test_utils_error() { + let ret = get_matcher( + r" +rule: { kind: number } +utils: { testa: {kind: bbb} } + ", + ); + assert!(matches!(ret, Err(RuleCoreError::Utils(_)))); + } + + #[test] + fn test_undefined_utils_error() { + let ret = get_matcher(r"rule: { kind: number, matches: undefined-util }"); + match ret { + Err(RuleCoreError::Rule(RuleSerializeError::MatchesReference( + ReferentRuleError::UndefinedUtil(name), + ))) => { + assert_eq!(name, "undefined-util"); + } + _ => panic!("wrong error"), + } + } + + #[test] + fn test_cyclic_transform_error() { + let ret = get_matcher( + r" +rule: { kind: number } +transform: + A: {substring: {source: $B}} + B: {substring: {source: $A}}", + ); + assert!(matches!( + ret, + Err(RuleCoreError::Transform(TransformError::Cyclic(_))) + )); + } + + #[test] + fn test_rule_reg_with_utils() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = + from_str("{rule: {matches: test}, utils: {test: {kind: number}} }") + .expect("should deser"); + let rule = ReferentRule::try_new("test".into(), &env.registration).expect("should work"); + let not = ReferentRule::try_new("test2".into(), &env.registration).expect("should work"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + let grep = TypeScript::Tsx.ast_grep("a = 123"); + assert!(grep.root().find(&matcher).is_some()); + assert!(grep.root().find(&rule).is_some()); + assert!(grep.root().find(¬).is_none()); + let grep = TypeScript::Tsx.ast_grep("a = '123'"); + assert!(grep.root().find(&matcher).is_none()); + assert!(grep.root().find(&rule).is_none()); + assert!(grep.root().find(¬).is_none()); + } + + #[test] + fn test_rule_with_constraints() { + let mut constraints = RapidMap::default(); + constraints.insert( + "A".to_string(), + Rule::Regex(RegexMatcher::try_new("a").unwrap()), + ); + let rule = RuleCore::new(Rule::Pattern(Pattern::new("$A", TypeScript::Tsx))) + .with_matchers(constraints); + let grep = TypeScript::Tsx.ast_grep("a"); + assert!(grep.root().find(&rule).is_some()); + let grep = TypeScript::Tsx.ast_grep("bbb"); + assert!(grep.root().find(&rule).is_none()); + } + + #[test] + fn test_constraints_inheriting_env() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = + from_str("{rule: {pattern: $A = $B}, constraints: {A: {pattern: $B}} }") + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + let grep = TypeScript::Tsx.ast_grep("a = a"); + assert!(grep.root().find(&matcher).is_some()); + let grep = TypeScript::Tsx.ast_grep("a = b"); + assert!(grep.root().find(&matcher).is_none()); + } + + #[test] + fn test_constraints_writing_to_env() { + let env = DeserializeEnv::new(TypeScript::Tsx); + let ser_rule: SerializableRuleCore = + from_str("{rule: {pattern: $A = $B}, constraints: {B: {pattern: $C + $D}} }") + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + let grep = TypeScript::Tsx.ast_grep("a = a"); + assert!(grep.root().find(&matcher).is_none()); + let grep = TypeScript::Tsx.ast_grep("a = 1 + 2"); + let nm = grep.root().find(&matcher).expect("should match"); + let env = nm.get_env(); + let matched = env.get_match("C").expect("should match C").text(); + assert_eq!(matched, "1"); + let matched = env.get_match("D").expect("should match D").text(); + assert_eq!(matched, "2"); + } + + fn get_rewriters() -> (&'static str, RuleCore) { + // NOTE: initialize a DeserializeEnv here is not 100% correct + // it does not inherit global rules or local rules + let env = DeserializeEnv::new(TypeScript::Tsx); + let rewriter: SerializableRuleCore = + from_str("{rule: {kind: number, pattern: $REWRITE}, fix: yjsnp}") + .expect("should parse"); + let rewriter = rewriter.get_matcher(env).expect("should work"); + ("re", rewriter) + } + + #[test] + fn test_rewriter_writing_to_env() { + let (id, rewriter) = get_rewriters(); + let env = DeserializeEnv::new(TypeScript::Tsx); + env.registration.insert_rewriter(id, rewriter); + let ser_rule: SerializableRuleCore = from_str( + r" +rule: {pattern: $A = $B} +transform: + C: + rewrite: + source: $B + rewriters: [re]", + ) + .expect("should deser"); + let matcher = ser_rule.get_matcher(env).expect("should parse"); + let grep = TypeScript::Tsx.ast_grep("a = 1 + 2"); + let nm = grep.root().find(&matcher).expect("should match"); + let env = nm.get_env(); + let matched = env.get_match("B").expect("should match").text(); + assert_eq!(matched, "1 + 2"); + let matched = env.get_match("A").expect("should match").text(); + assert_eq!(matched, "a"); + let transformed = env.get_transformed("C").expect("should transform"); + assert_eq!(String::from_utf8_lossy(transformed), "yjsnp + yjsnp"); + assert!(env.get_match("REWRITE").is_none()); + + let grep = TypeScript::Tsx.ast_grep("a = a"); + let nm = grep.root().find(&matcher).expect("should match"); + let env = nm.get_env(); + let matched = env.get_match("B").expect("should match").text(); + assert_eq!(matched, "a"); + let transformed = env.get_transformed("C").expect("should transform"); + assert_eq!(String::from_utf8_lossy(transformed), "a"); + } +} diff --git a/crates/rule-engine/src/transform/mod.rs b/crates/rule-engine/src/transform/mod.rs new file mode 100644 index 0000000..5e01abd --- /dev/null +++ b/crates/rule-engine/src/transform/mod.rs @@ -0,0 +1,186 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +mod parse; +mod rewrite; +mod string_case; +mod trans; + +use crate::{DeserializeEnv, RuleCore}; + +use thread_ast_engine::Doc; +use thread_ast_engine::Language; +use thread_ast_engine::meta_var::MetaVarEnv; +use thread_ast_engine::meta_var::MetaVariable; + +use parse::ParseTransError; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use thread_utils::RapidMap; + +pub use trans::Trans; + +#[derive(Serialize, Deserialize, Clone, JsonSchema, Debug)] +#[serde(untagged)] +pub enum Transformation { + Simplied(String), + Object(Trans), +} + +impl Transformation { + pub fn parse(&self, lang: &L) -> Result, TransformError> { + match self { + Transformation::Simplied(s) => { + let t: Trans = s.parse()?; + t.parse(lang) + } + Transformation::Object(t) => t.parse(lang), + } + } +} + +#[derive(Error, Debug)] +pub enum TransformError { + #[error("Cannot parse transform string.")] + Parse(#[from] ParseTransError), + #[error("`{0}` has a cyclic dependency.")] + Cyclic(String), + #[error("Transform var `{0}` has already defined.")] + AlreadyDefined(String), + #[error("source `{0}` should be $-prefixed.")] + MalformedVar(String), +} + +#[derive(Clone, Debug)] +pub struct Transform { + transforms: Vec<(String, Trans)>, +} + +impl Transform { + pub fn deserialize( + map: &RapidMap, + env: &DeserializeEnv, + ) -> Result { + let map: Result<_, _> = map + .iter() + .map(|(key, val)| val.parse(&env.lang).map(|t| (key.to_string(), t))) + .collect(); + let map = map?; + let order = env + .get_transform_order(&map) + .map_err(TransformError::Cyclic)?; + let transforms = order + .iter() + .map(|&key| (key.to_string(), map[key].clone())) + .collect(); + Ok(Self { transforms }) + } + + pub fn apply_transform<'c, D: Doc>( + &self, + env: &mut MetaVarEnv<'c, D>, + rewriters: &RapidMap, + enclosing_env: &MetaVarEnv<'c, D>, + ) { + let mut ctx = Ctx { + env, + rewriters, + enclosing_env, + }; + for (key, tr) in &self.transforms { + tr.insert(key, &mut ctx); + } + } + + pub(crate) fn keys(&self) -> impl Iterator { + self.transforms.iter().map(|t| &t.0) + } + + pub(crate) fn values(&self) -> impl Iterator> { + self.transforms.iter().map(|t| &t.1) + } +} + +// two lifetime to represent env root lifetime and lang/trans lifetime +struct Ctx<'b, 'c, D: Doc> { + rewriters: &'b RapidMap, + env: &'b mut MetaVarEnv<'c, D>, + enclosing_env: &'b MetaVarEnv<'c, D>, +} + +#[cfg(test)] +mod test { + use super::*; + use crate::from_str; + use crate::test::TypeScript; + use thread_ast_engine::tree_sitter::LanguageExt; + + #[test] + fn test_transform_str() {} + + #[test] + fn test_single_cyclic_transform() { + let mut trans = RapidMap::default(); + let trans_a = from_str("substring: {source: $A}").unwrap(); + trans.insert("A".into(), trans_a); + let env = DeserializeEnv::new(TypeScript::Tsx); + match Transform::deserialize(&trans, &env) { + Err(TransformError::Cyclic(a)) => assert_eq!(a, "A"), + _ => panic!("unexpected error"), + } + } + + #[test] + fn test_cyclic_transform() { + let mut trans = RapidMap::default(); + let trans_a = from_str("substring: {source: $B}").unwrap(); + trans.insert("A".into(), trans_a); + let trans_b = from_str("substring: {source: $A}").unwrap(); + trans.insert("B".into(), trans_b); + let env = DeserializeEnv::new(TypeScript::Tsx); + let ret = Transform::deserialize(&trans, &env); + assert!(matches!(ret, Err(TransformError::Cyclic(_)))); + } + + #[test] + fn test_transform_use_matched() { + let mut trans = RapidMap::default(); + let trans_a = from_str("substring: {source: $C}").unwrap(); + trans.insert("A".into(), trans_a); + let trans_b = from_str("substring: {source: $A}").unwrap(); + trans.insert("B".into(), trans_b); + let env = DeserializeEnv::new(TypeScript::Tsx); + let ret = Transform::deserialize(&trans, &env); + assert!(ret.is_ok()); + } + + #[test] + fn test_transform_indentation() { + let src = " +if (true) { + let a = { + b: 123 + } +} +"; + let expected = "{ + b: 123 +}"; + let mut trans = RapidMap::default(); + let tr = from_str("{ substring: { source: $A } }").expect("should work"); + trans.insert("TR".into(), tr); + let grep = TypeScript::Tsx.ast_grep(src); + let root = grep.root(); + let mut nm = root.find("let a = $A").expect("should find"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let trans = Transform::deserialize(&trans, &env).expect("should deserialize"); + trans.apply_transform(nm.get_env_mut(), &Default::default(), &Default::default()); + let actual = nm.get_env().get_transformed("TR").expect("should have TR"); + let actual = std::str::from_utf8(actual).expect("should work"); + assert_eq!(actual, expected); + } +} diff --git a/crates/rule-engine/src/transform/parse.rs b/crates/rule-engine/src/transform/parse.rs new file mode 100644 index 0000000..7722791 --- /dev/null +++ b/crates/rule-engine/src/transform/parse.rs @@ -0,0 +1,259 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::Trans; +use super::rewrite::Rewrite; +use super::trans::{Convert, Replace, Substring}; +use serde_yaml::from_str as yaml_from_str; +use std::str::FromStr; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ParseTransError { + #[error("`{0}` has syntax error.")] + Syntax(String), + #[error("`{0}` is not a valid transformation.")] + InvalidTransform(String), + #[error("`{0}` is not a valid argument.")] + InvalidArg(String), + #[error("Argument `{0}` is required.")] + RequiredArg(&'static str), + #[error("Invalid argument value.")] + ArgValue(#[from] serde_yaml::Error), +} + +impl FromStr for Trans { + type Err = ParseTransError; + + fn from_str(s: &str) -> Result { + let decomposed = decompose_str(s)?; + let trans = match decomposed.func { + "convert" => Trans::Convert(to_convert(decomposed)?), + "replace" => Trans::Replace(to_replace(decomposed)?), + "substring" => Trans::Substring(to_substring(decomposed)?), + "rewrite" => Trans::Rewrite(to_rewrite(decomposed)?), + invalid => return Err(ParseTransError::InvalidTransform(invalid.to_string())), + }; + Ok(trans) + } +} + +struct DecomposedTransString<'a> { + func: &'a str, + source: &'a str, + args: Vec<(&'a str, &'a str)>, +} + +fn decompose_str(input: &str) -> Result, ParseTransError> { + let error = || ParseTransError::Syntax(input.to_string()); + let input = input.trim(); + let (func, rest) = input.split_once('(').ok_or_else(error)?; + let func = func.trim(); + let rest = rest.trim_end_matches(')'); + let (source, rest) = rest.split_once(',').ok_or_else(error)?; + let source = source.trim(); + let args = decompose_args(rest.trim()).ok_or_else(error)?; + Ok(DecomposedTransString { func, source, args }) +} + +fn decompose_args(mut rest: &str) -> Option> { + let mut args = Vec::new(); + while !rest.is_empty() { + let (key, next) = rest.split_once('=')?; + let next = next.trim_start(); + let end_index = if next.starts_with(['\'', '"', '[']) { + let end_char = match next.as_bytes()[0] { + b'[' => ']', + b => b as char, + }; + next[1..].find(end_char)? + 1 + } else { + next.find(',').unwrap_or(next.len()) - 1 + }; + let (val, next) = next.split_at(end_index + 1); + // value should not be trimmed + args.push((key.trim(), val)); + rest = next.trim_start().trim_start_matches(',').trim(); + } + Some(args) +} + +fn to_convert(decomposed: DecomposedTransString) -> Result, ParseTransError> { + debug_assert_eq!(decomposed.func, "convert"); + let mut to_case = None; + let mut separated_by = None; + for (key, value) in decomposed.args { + match key { + "toCase" => to_case = Some(value), + "separatedBy" => separated_by = Some(value), + _ => return Err(ParseTransError::InvalidArg(key.to_string())), + } + } + let to_case = to_case.ok_or(ParseTransError::RequiredArg("to_case"))?; + let to_case = yaml_from_str(to_case)?; + let separated_by = separated_by.map(yaml_from_str).transpose()?; + Ok(Convert { + source: decomposed.source.to_string(), + to_case, + separated_by, + }) +} + +fn to_replace(decomposed: DecomposedTransString) -> Result, ParseTransError> { + debug_assert_eq!(decomposed.func, "replace"); + let mut replace = None; + let mut by = None; + for (key, value) in decomposed.args { + match key { + "replace" => replace = Some(value), + "by" => by = Some(value), + _ => return Err(ParseTransError::InvalidArg(key.to_string())), + } + } + let replace = replace.ok_or(ParseTransError::RequiredArg("replace"))?; + let by = by.ok_or(ParseTransError::RequiredArg("by"))?; + Ok(Replace::new( + decomposed.source.to_string(), + serde_yaml::from_str(replace)?, + serde_yaml::from_str(by)?, + )) +} +fn to_substring(decomposed: DecomposedTransString) -> Result, ParseTransError> { + debug_assert_eq!(decomposed.func, "substring"); + let mut start_char = None; + let mut end_char = None; + for (key, value) in decomposed.args { + match key { + "startChar" => start_char = Some(value), + "endChar" => end_char = Some(value), + _ => return Err(ParseTransError::InvalidArg(key.to_string())), + } + } + let start_char = start_char.map(yaml_from_str).transpose()?; + let end_char = end_char.map(yaml_from_str).transpose()?; + Ok(Substring { + source: decomposed.source.to_string(), + start_char, + end_char, + }) +} +fn to_rewrite(decomposed: DecomposedTransString) -> Result, ParseTransError> { + debug_assert_eq!(decomposed.func, "rewrite"); + let mut rewriters = None; + let mut join_by = None; + for (key, value) in decomposed.args { + match key { + "rewriters" => rewriters = Some(value), + "joinBy" => join_by = Some(value), + _ => return Err(ParseTransError::InvalidArg(key.to_string())), + } + } + let rewriters = rewriters.ok_or(ParseTransError::RequiredArg("rewriters"))?; + let rewriters = yaml_from_str(rewriters)?; + Ok(Rewrite { + source: decomposed.source.to_string(), + rewriters, + join_by: join_by.map(yaml_from_str).transpose()?, + }) +} + +#[cfg(test)] +mod test { + use crate::transform::string_case::StringCase; + + use super::*; + + #[test] + fn test_decompose_str() { + let input = "substring($A, startChar=1, endChar=2)"; + let decomposed = decompose_str(input).expect("should parse"); + assert_eq!(decomposed.func, "substring"); + assert_eq!(decomposed.source, "$A"); + assert_eq!(decomposed.args.len(), 2); + assert_eq!(decomposed.args[0], ("startChar", "1")); + assert_eq!(decomposed.args[1], ("endChar", "2")); + } + const SUBSTRING_CASE: &str = "substring($A, startChar=1, endChar=2)"; + const REPLACE_CASE: &str = "replace($A, replace= ^.+, by=', ')"; + const CONVERT_CASE: &str = "convert($A, toCase=camelCase, separatedBy=[underscore, dash])"; + const REWRITE_CASE: &str = "rewrite($A, rewriters=[rule1, rule2], joinBy = ',,,,')"; + + #[test] + fn test_decompose_cases() { + let cases = [SUBSTRING_CASE, REPLACE_CASE, CONVERT_CASE, REWRITE_CASE]; + for case in cases { + let decomposed = decompose_str(case).expect("should parse"); + match decomposed.func { + "convert" => assert_eq!(decomposed.args.len(), 2), + "replace" => assert_eq!(decomposed.args.len(), 2), + "substring" => assert_eq!(decomposed.args.len(), 2), + "rewrite" => assert_eq!(decomposed.args.len(), 2), + _ => panic!("Unexpected function: {}", decomposed.func), + } + } + } + + #[test] + fn test_valid_transform() { + let cases = [ + "convert($A, toCase=camelCase, separatedBy=[])", + "replace($A, replace= ^.+, by = '[')", + "substring( $A, startChar=1)", + "substring( $A,)", + "rewrite($A, rewriters=[rule1, rule2])", + ]; + for case in cases { + Trans::from_str(case).expect("should parse convert"); + } + } + + #[test] + fn test_parse_convert() { + let convert = Trans::from_str(CONVERT_CASE).expect("should parse convert"); + let Trans::Convert(convert) = convert else { + panic!("Expected Convert transformation"); + }; + assert_eq!(convert.source, "$A"); + assert_eq!(convert.separated_by.map(|v| v.len()), Some(2)); + assert!(matches!(convert.to_case, StringCase::CamelCase)); + } + + #[test] + fn test_parse_replace() { + let replace = Trans::from_str(REPLACE_CASE).expect("should parse replace"); + let Trans::Replace(replace) = replace else { + panic!("Expected Replace transformation"); + }; + assert_eq!(replace.source, "$A"); + assert_eq!(replace.replace, "^.+"); + assert_eq!(replace.by, ", "); + } + + #[test] + fn test_parse_substring() { + let substring = Trans::from_str(SUBSTRING_CASE).expect("should parse substring"); + let Trans::Substring(substring) = substring else { + panic!("Expected Substring transformation"); + }; + assert_eq!(substring.source, "$A"); + assert_eq!(substring.start_char, Some(1)); + assert_eq!(substring.end_char, Some(2)); + } + + #[test] + fn test_parse_rewrite() { + let rewrite = Trans::from_str(REWRITE_CASE).expect("should parse rewrite"); + let Trans::Rewrite(rewrite) = rewrite else { + panic!("Expected Rewrite transformation"); + }; + assert_eq!(rewrite.source, "$A"); + assert_eq!( + rewrite.rewriters, + vec!["rule1".to_owned(), "rule2".to_owned()] + ); + assert_eq!(rewrite.join_by, Some(",,,,".into())); + } +} diff --git a/crates/rule-engine/src/transform/rewrite.rs b/crates/rule-engine/src/transform/rewrite.rs new file mode 100644 index 0000000..846a21b --- /dev/null +++ b/crates/rule-engine/src/transform/rewrite.rs @@ -0,0 +1,372 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::Ctx; +use super::{TransformError, trans::parse_meta_var}; +use crate::rule_core::RuleCore; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use thread_ast_engine::meta_var::MetaVariable; +use thread_ast_engine::source::{Content, Edit}; +use thread_ast_engine::{Doc, Language, Node, NodeMatch}; + +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Rewrite { + pub source: T, + pub rewriters: Vec, + // do we need this? + // sort_by: Option, + pub join_by: Option, +} + +fn get_nodes_from_env<'b, D: Doc>(var: &MetaVariable, ctx: &Ctx<'_, 'b, D>) -> Vec> { + match var { + MetaVariable::MultiCapture(n) => ctx.env.get_multiple_matches(n), + MetaVariable::Capture(m, _) => { + if let Some(n) = ctx.env.get_match(m) { + vec![n.clone()] + } else { + vec![] + } + } + _ => vec![], + } +} +impl Rewrite { + pub fn parse(&self, lang: &L) -> Result, TransformError> { + let source = parse_meta_var(&self.source, lang)?; + Ok(Rewrite { + source, + rewriters: self.rewriters.clone(), + join_by: self.join_by.clone(), + }) + } +} + +impl Rewrite { + pub(super) fn compute(&self, ctx: &mut Ctx<'_, '_, D>) -> Option { + let var = &self.source; + let nodes = get_nodes_from_env(var, ctx); + if nodes.is_empty() { + return None; + } + let rewriters = ctx.rewriters; + let start = nodes[0].range().start; + let bytes = ctx.env.get_var_bytes(var)?; + let rules: Vec<_> = self + .rewriters + .iter() + .filter_map(|id| rewriters.get(id)) // NOTE: rewriter must be defined + .collect(); + let edits = find_and_make_edits(nodes, &rules, ctx); + let rewritten = if let Some(joiner) = &self.join_by { + let mut ret = vec![]; + let mut edits = edits.into_iter(); + if let Some(first) = edits.next() { + let mut pos = first.position - start + first.deleted_length; + ret.extend(first.inserted_text); + let joiner = D::Source::decode_str(joiner); + for edit in edits { + let p = edit.position - start; + // skip overlapping edits + if pos > p { + continue; + } + ret.extend_from_slice(&joiner); + ret.extend(edit.inserted_text); + pos = p + edit.deleted_length; + } + ret + } else { + ret + } + } else { + make_edit::(bytes, edits, start) + }; + Some(D::Source::encode_bytes(&rewritten).to_string()) + } +} + +type Bytes = [<::Source as Content>::Underlying]; +fn find_and_make_edits<'n, D: Doc>( + nodes: Vec>, + rules: &[&RuleCore], + ctx: &Ctx<'_, 'n, D>, +) -> Vec> { + nodes + .into_iter() + .flat_map(|n| replace_one(n, rules, ctx)) + .collect() +} + +fn replace_one<'n, D: Doc>( + node: Node<'n, D>, + rules: &[&RuleCore], + ctx: &Ctx<'_, 'n, D>, +) -> Vec> { + let mut edits = Vec::with_capacity(16); // pre-allocate to avoid reallocations + + for child in node.dfs() { + for rule in rules { + let mut env = std::borrow::Cow::Borrowed(ctx.enclosing_env); + // NOTE: we inherit meta_var_env from enclosing rule + // but match env will NOT inherited recursively! + // e.g. $B is matched in parent linter and it is inherited. + // $C is matched in rewriter but is NOT inherited in recursive rewriter + // this is to enable recursive rewriter to match sub nodes + // in future, we can use the explicit `expose` to control env inheritance + if let Some(n) = rule.do_match(child.clone(), &mut env, Some(ctx.enclosing_env)) { + let nm = NodeMatch::new(n, env.into_owned()); + edits.push(nm.make_edit(rule, rule.fixer.first().expect("rewriter must have fix"))); + // stop at first fix, skip duplicate fix + break; + } + } + } + edits +} + +fn make_edit( + old_content: &Bytes, + edits: Vec>, + offset: usize, +) -> Vec<<::Source as Content>::Underlying> { + let mut new_content = vec![]; + let mut start = 0; + for edit in edits { + let pos = edit.position - offset; + // skip overlapping edits + if start > pos { + continue; + } + new_content.extend_from_slice(&old_content[start..pos]); + new_content.extend_from_slice(&edit.inserted_text); + start = pos + edit.deleted_length; + } + // add trailing statements + new_content.extend_from_slice(&old_content[start..]); + new_content +} + +#[cfg(test)] +mod test { + use super::*; + use crate::check_var::CheckHint; + use crate::from_str; + use crate::rule::DeserializeEnv; + use crate::rule::referent_rule::RuleRegistration; + use crate::rule_core::SerializableRuleCore; + use crate::test::TypeScript; + use thread_utils::RapidSet; + + fn apply_transformation( + rewrite: Rewrite, + src: &str, + pat: &str, + rewriters: RuleRegistration, + ) -> String { + compute_rewritten(src, pat, rewrite, rewriters).expect("should have transforms") + } + + macro_rules! str_vec { + ( $($a: expr),* ) => { vec![ $($a.to_string()),* ] }; + } + + fn make_rewriters(pairs: &[(&str, &str)]) -> RuleRegistration { + make_rewriter_reg(pairs, Default::default()) + } + + fn make_rewriter_reg(pairs: &[(&str, &str)], vars: RapidSet<&str>) -> RuleRegistration { + let env = DeserializeEnv::new(TypeScript::Tsx); + for (key, ser) in pairs { + let serialized: SerializableRuleCore = from_str(ser).unwrap(); + let rule = serialized + .get_matcher_with_hint(env.clone(), CheckHint::Rewriter(&vars)) + .unwrap(); + env.registration.insert_rewriter(key, rule); + } + env.registration + } + + #[test] + fn test_perform_one_rewrite() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["rewrite"], + join_by: None, + }; + let rewriters = make_rewriters(&[("rewrite", "{rule: {kind: number}, fix: '810'}")]); + let ret = apply_transformation(rewrite, "log(t(1, 2, 3))", "log($A)", rewriters); + assert_eq!(ret, "t(810, 810, 810)"); + } + + #[test] + fn test_perform_multiple_rewriters() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re1", "re2"], + join_by: None, + }; + let reg = make_rewriters(&[ + ("re1", "{rule: {regex: '^1$'}, fix: '810'}"), + ("re2", "{rule: {regex: '^2$'}, fix: '1919'}"), + ]); + let ret = apply_transformation(rewrite, "log(t(1, 2, 3))", "log($A)", reg); + assert_eq!(ret, "t(810, 1919, 3)"); + } + + #[test] + fn test_ignore_unused_rewriters() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re1"], + join_by: None, + }; + let reg = make_rewriters(&[ + ("ignored", "{rule: {regex: '^2$'}, fix: '1919'}"), + ("re1", "{rule: {kind: number}, fix: '810'}"), + ]); + let ret = apply_transformation(rewrite, "log(t(1, 2, 3))", "log($A)", reg); + assert_eq!(ret, "t(810, 810, 810)"); + } + + #[test] + fn test_rewriters_order() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re2", "re1"], + join_by: None, + }; + // first match wins the rewrite + let reg = make_rewriters(&[ + ("re2", "{rule: {regex: '^2$'}, fix: '1919'}"), + ("re1", "{rule: {kind: number}, fix: '810'}"), + ]); + let ret = apply_transformation(rewrite, "log(t(1, 2, 3))", "log($A)", reg); + assert_eq!(ret, "t(810, 1919, 810)"); + } + + #[test] + fn test_rewriters_overlapping() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re1", "re2"], + join_by: None, + }; + // parent node wins fix, even if rule comes later + let reg = make_rewriters(&[ + ("re1", "{rule: {kind: number}, fix: '810'}"), + ("re2", "{rule: {kind: array}, fix: '1919'}"), + ]); + let ret = apply_transformation(rewrite, "[1, 2, 3]", "$A", reg); + assert_eq!(ret, "1919"); + } + + #[test] + fn test_rewriters_join_by() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re1"], + join_by: Some(" + ".into()), + }; + let reg = make_rewriters(&[("re1", "{rule: {kind: number}, fix: '810'}")]); + let ret = apply_transformation(rewrite, "log(t(1, 2, 3))", "log($A)", reg); + assert_eq!(ret, "810 + 810 + 810"); + } + + #[test] + fn test_recursive_rewriters() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re1"], + join_by: None, + }; + let rule = r#" +rule: {pattern: '[$$$C]'} +transform: + D: + rewrite: + source: $$$C + rewriters: [re1] +fix: $D + "#; + let reg = make_rewriters(&[("re1", rule)]); + let ret = apply_transformation(rewrite, "[1, [2, [3, [4]]]]", "$A", reg); + assert_eq!(ret, "1, 2, 3, 4"); + } + + #[test] + fn test_should_inherit_match_env() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re"], + join_by: None, + }; + let reg = make_rewriters(&[("re", "{rule: {pattern: $C}, fix: '123'}")]); + let ret = apply_transformation(rewrite.clone(), "[1, 2]", "[$A, $B]", reg.clone()); + assert_eq!(ret, "123"); + let ret = apply_transformation(rewrite.clone(), "[1, 1]", "[$A, $C]", reg.clone()); + assert_eq!(ret, "123"); + // should not match $C so no rewrite + let ret = apply_transformation(rewrite, "[1, 2]", "[$A, $C]", reg); + assert_eq!(ret, "1"); + } + + #[test] + fn test_node_not_found() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re"], + join_by: None, + }; + let rewriters = make_rewriters(&[("re", "{rule: {pattern: $B}, fix: '123'}")]); + let ret = compute_rewritten("[1, 2]", "[$B, $C]", rewrite, rewriters); + assert_eq!(ret, None); + } + + #[test] + fn test_rewrite_use_enclosing_env() { + let rewrite = Rewrite { + source: "$A".into(), + rewriters: str_vec!["re"], + join_by: None, + }; + let mut vars = RapidSet::default(); + vars.insert("C"); + let reg = make_rewriter_reg(&[("re", "{rule: {pattern: $B}, fix: '$B == $C'}")], vars); + let ret = apply_transformation(rewrite, "[1, 2]", "[$A, $C]", reg); + assert_eq!(ret, "1 == 2"); + } + + fn compute_rewritten( + src: &str, + pat: &str, + rewrite: Rewrite, + reg: RuleRegistration, + ) -> Option { + use thread_ast_engine::tree_sitter::LanguageExt; + let grep = TypeScript::Tsx.ast_grep(src); + let root = grep.root(); + let mut nm = root.find(pat).expect("should find"); + let before_vars: Vec<_> = nm.get_env().get_matched_variables().collect(); + let env = nm.get_env_mut(); + let enclosing = env.clone(); + let rewriters = reg.get_rewriters(); + let mut ctx = Ctx { + env, + rewriters, + enclosing_env: &enclosing, + }; + let after_vars: Vec<_> = ctx.env.get_matched_variables().collect(); + assert_eq!( + before_vars, after_vars, + "rewrite should not write back to env" + ); + rewrite.parse(&TypeScript::Tsx).ok()?.compute(&mut ctx) + } +} diff --git a/crates/rule-engine/src/transform/string_case.rs b/crates/rule-engine/src/transform/string_case.rs new file mode 100644 index 0000000..23285b2 --- /dev/null +++ b/crates/rule-engine/src/transform/string_case.rs @@ -0,0 +1,291 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::ops::Range; + +fn capitalize(string: &str) -> String { + let mut chars = string.chars(); + if let Some(c) = chars.next() { + c.to_uppercase().chain(chars).collect() + } else { + string.to_string() + } +} + +/// An enumeration representing different cases for strings. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum StringCase { + LowerCase, + UpperCase, + Capitalize, + CamelCase, + SnakeCase, + KebabCase, + PascalCase, +} + +use StringCase::*; + +impl StringCase { + pub fn apply(&self, s: &str, seps: Option<&[Separator]>) -> String { + match &self { + LowerCase => s.to_lowercase(), + UpperCase => s.to_uppercase(), + Capitalize => capitalize(s), + CamelCase => join_camel_case(split(s, seps)), + SnakeCase => join(split(s, seps), '_'), + KebabCase => join(split(s, seps), '-'), + PascalCase => split(s, seps).map(capitalize).collect(), + } + } +} + +#[derive(Serialize, Deserialize, Clone, Copy, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +/// Separator to split string. e.g. `user_accountName` -> `user`, `accountName` +/// It will be rejoin according to `StringCase`. +pub enum Separator { + CaseChange, + Dash, + Dot, + Slash, + Space, + Underscore, +} + +impl From<&[Separator]> for Delimiter { + fn from(value: &[Separator]) -> Self { + use Separator::*; + let mut delimiter = vec![]; + let mut state = CaseState::IgnoreCase; + value.iter().for_each(|v| match v { + CaseChange => state = CaseState::Lower, + Dash => delimiter.push('-'), + Dot => delimiter.push('.'), + Slash => delimiter.push('/'), + Space => delimiter.push(' '), + Underscore => delimiter.push('_'), + }); + Self { + left: 0, + right: 0, + state, + delimiter, + } + } +} + +#[derive(PartialEq, Eq, Debug, Clone)] +/// CaseState is used to record the case change between two characters. +/// It will be used if separator is CaseChange. +enum CaseState { + Lower, + OneUpper, + /// MultiUpper records consecutive uppercase characters. + /// char is the last uppercase char, used to calculate the split range. + MultiUpper(char), + IgnoreCase, +} + +#[derive(Debug)] +struct Delimiter { + left: usize, + right: usize, + state: CaseState, + delimiter: Vec, +} +impl Delimiter { + fn all() -> Delimiter { + Delimiter { + left: 0, + right: 0, + state: CaseState::Lower, + delimiter: vec!['-', '.', '/', ' ', '_'], + } + } + fn delimit(&mut self, c: char) -> Option> { + let Self { + left, + right, + state, + delimiter, + } = self; + use CaseState::*; + // normal delimiter + if delimiter.contains(&c) { + let range = *left..*right; + *left = *right + 1; + *right = *left; + if *state != IgnoreCase { + self.state = Lower; + } + return Some(range); + } + // case delimiter, from lowercase to uppercase + if *state == Lower && c.is_uppercase() { + let range = *left..*right; + *left = *right; + *right = *left + c.len_utf8(); + self.state = OneUpper; + return Some(range); + } + // case 2, consecutive UpperCases followed by lowercase + // e.g. XMLHttp -> XML Http + if let MultiUpper(last_char) = state { + if c.is_lowercase() { + let new_left = *right - last_char.len_utf8(); + let range = *left..new_left; + *left = new_left; + *right += c.len_utf8(); + self.state = Lower; + return Some(range); + } + } + *right += c.len_utf8(); + if *state == CaseState::IgnoreCase { + return None; + } else if c.is_lowercase() { + self.state = Lower; + } else if *state == Lower { + self.state = OneUpper; + } else { + self.state = MultiUpper(c); + } + None + } + fn conclude(&mut self, len: usize) -> Option> { + let Self { left, right, .. } = self; + if left < right && *right <= len { + let range = *left..*right; + *left = *right; + Some(range) + } else { + None + } + } +} + +/** + Split string by Separator +*/ +fn split<'a>(s: &'a str, seps: Option<&[Separator]>) -> impl Iterator { + let mut chars = s.chars(); + let mut delimiter = if let Some(seps) = seps { + Delimiter::from(seps) + } else { + Delimiter::all() + }; + std::iter::from_fn(move || { + for c in chars.by_ref() { + if let Some(range) = delimiter.delimit(c) { + if range.start != range.end { + return Some(&s[range]); + } + } + } + let range = delimiter.conclude(s.len())?; + if range.start != range.end { + Some(&s[range]) + } else { + None + } + }) +} + +fn join<'a, I>(mut words: I, sep: char) -> String +where + I: Iterator, +{ + let mut result = String::new(); + if let Some(w) = words.next() { + result.push_str(&w.to_lowercase()); + } + for w in words { + result.push(sep); + result.push_str(&w.to_lowercase()); + } + result +} + +fn join_camel_case<'a, I>(words: I) -> String +where + I: Iterator, +{ + let mut result = String::new(); + for (i, word) in words.enumerate() { + if i == 0 { + result.push_str(&word.to_lowercase()); + } else { + result.push_str(&capitalize(word)); + } + } + result +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_case_conversions() { + assert_eq!(StringCase::LowerCase.apply("aBc", None), "abc"); + assert_eq!(StringCase::UpperCase.apply("aBc", None), "ABC"); + assert_eq!(StringCase::Capitalize.apply("aBc", None), "ABc"); + } + const CAMEL: &str = "camelsLiveInTheDesert"; + const SNAKE: &str = "snakes_live_in_forests"; + const KEBAB: &str = "kebab-is-a-delicious-food"; + const PASCAL: &str = "PascalIsACoolGuy"; + const PATH: &str = "path/is/a/slashed/string"; + const DOT: &str = "www.dot.com"; + const URL: &str = "x.com/hd_nvim"; + + fn assert_split(s: &str, v: &[&str]) { + let actual: Vec<_> = split(s, None).collect(); + assert_eq!(v, actual) + } + + #[test] + fn test_split() { + assert_split(CAMEL, &["camels", "Live", "In", "The", "Desert"]); + assert_split(SNAKE, &["snakes", "live", "in", "forests"]); + assert_split(KEBAB, &["kebab", "is", "a", "delicious", "food"]); + assert_split(PASCAL, &["Pascal", "Is", "A", "Cool", "Guy"]); + assert_split(PATH, &["path", "is", "a", "slashed", "string"]); + assert_split(DOT, &["www", "dot", "com"]); + assert_split(URL, &["x", "com", "hd", "nvim"]); + assert_split("XMLHttpRequest", &["XML", "Http", "Request"]); + assert_split("whatHTML", &["what", "HTML"]); + } + + fn assert_split_sep(s: &str, seps: &[Separator], v: &[&str]) { + let actual: Vec<_> = split(s, Some(seps)).collect(); + assert_eq!(v, actual) + } + + #[test] + fn test_split_by_separator() { + use Separator::*; + assert_split_sep("user_accountName", &[Underscore], &["user", "accountName"]); + assert_split_sep("user_accountName", &[Space], &["user_accountName"]); + assert_split_sep("user_accountName", &[CaseChange], &["user_account", "Name"]); + } + + fn assert_format(fmt: StringCase, src: &str, expected: &str) { + assert_eq!(fmt.apply(src, None), expected) + } + + #[test] + fn test_format() { + assert_format(SnakeCase, CAMEL, "camels_live_in_the_desert"); + assert_format(KebabCase, CAMEL, "camels-live-in-the-desert"); + assert_format(PascalCase, KEBAB, "KebabIsADeliciousFood"); + assert_format(PascalCase, SNAKE, "SnakesLiveInForests"); + } +} diff --git a/crates/rule-engine/src/transform/trans.rs b/crates/rule-engine/src/transform/trans.rs new file mode 100644 index 0000000..80ff2ea --- /dev/null +++ b/crates/rule-engine/src/transform/trans.rs @@ -0,0 +1,555 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::rewrite::Rewrite; +use super::{Ctx, TransformError, string_case}; +use thread_ast_engine::meta_var::MetaVariable; +use thread_ast_engine::source::Content; +use thread_ast_engine::{Doc, Language}; +use thread_utils::is_ascii_simd; + +use regex::Regex; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::sync::OnceLock; + +use string_case::{Separator, StringCase}; + +#[inline] +fn get_text_from_env(var: &MetaVariable, ctx: &mut Ctx<'_, '_, D>) -> Option { + // TODO: check if topological sort has resolved transform dependency + let bytes = ctx.env.get_var_bytes(var)?; + Some(::encode_bytes(bytes).into_owned()) +} + +/// Extracts a substring from the meta variable's text content. +/// +/// Both `start_char` and `end_char` support negative indexing, +/// which counts character from the end of an array, moving backwards. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Substring { + /// source meta variable to be transformed + pub source: T, + /// optional starting character index of the substring, defaults to 0. + pub start_char: Option, + /// optional ending character index of the substring, defaults to the end of the string. + pub end_char: Option, +} + +impl Substring { + /// Computes the substring based on the provided character indices. + #[inline] + fn compute(&self, ctx: &mut Ctx<'_, '_, D>) -> Option { + let text = get_text_from_env(&self.source, ctx)?; + + // SIMD-optimized ASCII check for better performance on longer strings + if is_ascii_simd(&text) { + return self.compute_ascii(&text); + } + // Fallback to standard ASCII check + if text.is_ascii() { + return self.compute_ascii(&text); + } + + // UTF-8 path using char indices for boundary-safe slicing + self.compute_unicode(&text) + } + + /// Optimized substring for ASCII strings + #[inline] + fn compute_ascii(&self, text: &str) -> Option { + let len = text.len() as i32; + let start = resolve_char(&self.start_char, 0, len); + let end = resolve_char(&self.end_char, len, len); + + if start > end || start >= text.len() || end > text.len() { + return Some(String::new()); + } + + Some(text[start..end].to_string()) + } + + /// UTF-8 aware substring using char boundaries + #[inline] + fn compute_unicode(&self, text: &str) -> Option { + let char_count = text.chars().count() as i32; + let start_idx = resolve_char(&self.start_char, 0, char_count); + let end_idx = resolve_char(&self.end_char, char_count, char_count); + + if start_idx > end_idx || start_idx >= char_count as usize { + return Some(String::new()); + } + + // Use char_indices for efficient boundary detection + let mut char_indices = text.char_indices(); + let start_byte = char_indices + .nth(start_idx) + .map(|(i, _)| i) + .unwrap_or(text.len()); + + if end_idx >= char_count as usize { + return Some(text[start_byte..].to_string()); + } + + let end_byte = char_indices + .nth(end_idx - start_idx - 1) + .map(|(i, _)| i) + .unwrap_or(text.len()); + + Some(text[start_byte..end_byte].to_string()) + } +} + +/// resolve relative negative char index to absolute index +/// e.g. -1 => len - 1, n > len => n +#[inline] +fn resolve_char(opt: &Option, dft: i32, len: i32) -> usize { + let c = *opt.as_ref().unwrap_or(&dft); + if c >= len { + len as usize + } else if c >= 0 { + c as usize + } else if len + c < 0 { + 0 + } else { + debug_assert!(c < 0); + (len + c) as usize + } +} + +/// Replaces a substring in the meta variable's text content with another string. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Replace { + /// source meta variable to be transformed + pub source: T, + /// a regex to find substring to be replaced + pub replace: String, + /// the replacement string + pub by: String, + /// Cached compiled regex for performance + #[serde(skip)] + #[schemars(skip)] + compiled_regex: OnceLock>, +} + +impl Replace { + /// Create a new Replace with empty cache + #[inline] + pub fn new(source: T, replace: String, by: String) -> Self { + Self { + source, + replace, + by, + compiled_regex: OnceLock::new(), + } + } + + /// Get the cached compiled regex, compiling it if necessary + #[inline] + fn get_regex(&self) -> Option<&Regex> { + let result = self + .compiled_regex + .get_or_init(|| Regex::new(&self.replace).map_err(|e| e.to_string())); + result.as_ref().ok() + } +} + +impl Replace { + /// Computes the replacement of the matched text. + #[inline] + fn compute(&self, ctx: &mut Ctx<'_, '_, D>) -> Option { + let text = get_text_from_env(&self.source, ctx)?; + let re = self.get_regex()?; + Some(re.replace_all(&text, &self.by).into_owned()) + } +} + +/// Converts the source meta variable's text content to a specified case format. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Convert { + /// source meta variable to be transformed + pub source: T, + /// the target case format to convert the text content to + pub to_case: StringCase, + /// optional separators to specify how to separate word + pub separated_by: Option>, +} + +impl Convert { + #[inline] + fn compute(&self, ctx: &mut Ctx<'_, '_, D>) -> Option { + let text = get_text_from_env(&self.source, ctx)?; + Some(self.to_case.apply(&text, self.separated_by.as_deref())) + } +} + +/// Represents a transformation that can be applied to a matched AST node. +/// Available transformations are `substring`, `replace` and `convert`. +#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub enum Trans { + Substring(Substring), + Replace(Replace), + Convert(Convert), + Rewrite(Rewrite), +} + +impl Trans { + fn source(&self) -> &T { + use Trans as T; + match self { + T::Replace(r) => &r.source, + T::Substring(s) => &s.source, + T::Convert(c) => &c.source, + T::Rewrite(r) => &r.source, + } + } +} + +pub(crate) fn parse_meta_var( + src: &str, + lang: &L, +) -> Result { + let source = lang.pre_process_pattern(src); + if let Some(var) = lang.extract_meta_var(&source) { + Ok(var) + } else { + Err(TransformError::MalformedVar(src.to_string())) + } +} + +impl Trans { + pub fn parse(&self, lang: &L) -> Result, TransformError> { + use Trans as T; + Ok(match self { + T::Replace(r) => T::Replace(Replace::new( + parse_meta_var(&r.source, lang)?, + r.replace.clone(), + r.by.clone(), + )), + T::Substring(s) => T::Substring(Substring { + source: parse_meta_var(&s.source, lang)?, + start_char: s.start_char, + end_char: s.end_char, + }), + T::Convert(c) => T::Convert(Convert { + source: parse_meta_var(&c.source, lang)?, + to_case: c.to_case, + separated_by: c.separated_by.clone(), + }), + T::Rewrite(r) => T::Rewrite(r.parse(lang)?), + }) + } +} +impl Trans { + pub(super) fn insert(&self, key: &str, ctx: &mut Ctx<'_, '_, D>) { + let src = self.source(); + // TODO: add this debug assertion back + // debug_assert!(ctx.env.get_transformed(key).is_none()); + // avoid cyclic + ctx.env.insert_transformation(src, key, vec![]); + let opt = self.compute(ctx); + let bytes = if let Some(s) = opt { + ::decode_str(&s).to_vec() + } else { + vec![] + }; + ctx.env.insert_transformation(src, key, bytes); + } + fn compute(&self, ctx: &mut Ctx<'_, '_, D>) -> Option { + use Trans as T; + match self { + T::Replace(r) => r.compute(ctx), + T::Substring(s) => s.compute(ctx), + T::Convert(c) => c.compute(ctx), + T::Rewrite(r) => r.compute(ctx), + } + } + + pub fn used_rewriters(&self) -> &[String] { + use Trans as T; + match self { + T::Replace(_) => &[], + T::Substring(_) => &[], + T::Convert(_) => &[], + T::Rewrite(r) => &r.rewriters, + } + } + pub fn used_vars(&self) -> &str { + let s = self.source(); + use MetaVariable as MV; + match s { + MV::Capture(v, _) => v, + MV::MultiCapture(v) => v, + MV::Dropped(_) | MV::Multiple => panic!("transform var must be named"), + } + } +} + +#[cfg(test)] +mod test { + use super::super::Transform; + use super::*; + use crate::test::TypeScript; + use crate::{DeserializeEnv, Transformation}; + use serde_yaml::with::singleton_map_recursive; + use thread_ast_engine::tree_sitter::LanguageExt; + use thread_utils::RapidMap; + + type R = std::result::Result<(), ()>; + + fn get_transformed(src: &str, pat: &str, trans: &Trans) -> Option { + let grep = TypeScript::Tsx.ast_grep(src); + let root = grep.root(); + let mut nm = root.find(pat).expect("should find"); + let mut ctx = Ctx { + env: nm.get_env_mut(), + rewriters: &Default::default(), + enclosing_env: &Default::default(), + }; + trans.parse(&TypeScript::Tsx).ok()?.compute(&mut ctx) + } + + fn parse(trans: &str) -> Result, ()> { + let deserializer = serde_yaml::Deserializer::from_str(trans); + singleton_map_recursive::deserialize(deserializer).map_err(|_| ()) + } + + #[test] + fn test_transform_parse_error() { + let str_trans = parse(r#"substring: { source: WRONG }"#).expect("should work"); + match str_trans.parse(&TypeScript::Tsx) { + Err(TransformError::MalformedVar(n)) => assert_eq!(n, "WRONG"), + _ => panic!("should be malformed var"), + } + } + + #[test] + fn test_simple_replace() -> R { + let trans = parse( + r#" + substring: + source: "$A" + startChar: 1 + endChar: -1 + "#, + )?; + let actual = get_transformed("let a = 123", "let a= $A", &trans).ok_or(())?; + assert_eq!(actual, "2"); + Ok(()) + } + + #[test] + fn test_no_end_char() -> R { + let trans = parse( + r#" + substring: + source: "$A" + startChar: 1 + "#, + )?; + let actual = get_transformed("let a = 123", "let a= $A", &trans).ok_or(())?; + assert_eq!(actual, "23"); + Ok(()) + } + #[test] + fn test_no_start_char() -> R { + let trans = parse( + r#" + substring: + source: "$A" + endChar: -1 + "#, + )?; + let actual = get_transformed("let a = 123", "let a= $A", &trans).ok_or(())?; + assert_eq!(actual, "12"); + Ok(()) + } + + #[test] + fn test_replace() -> R { + let trans = parse( + r#" + replace: + source: "$A" + replace: \d + by: "b" + "#, + )?; + let actual = get_transformed("let a = 123", "let a= $A", &trans).ok_or(())?; + assert_eq!(actual, "bbb"); + Ok(()) + } + + #[test] + fn test_wrong_rule() { + let parsed = parse( + r#" + replace: + source: "$A" + "#, + ); + assert!(parsed.is_err()); + } + + fn transform_env(trans: RapidMap>) -> RapidMap { + let grep = TypeScript::Tsx.ast_grep("let a = 123"); + let root = grep.root(); + let trans = trans + .into_iter() + .map(|(k, v)| (k, Transformation::Object(v))) + .collect(); + let mut nm = root.find("let a = $A").expect("should find"); + let env = DeserializeEnv::new(TypeScript::Tsx); + let trans = Transform::deserialize(&trans, &env).expect("should deserialize"); + trans.apply_transform(nm.get_env_mut(), &Default::default(), &Default::default()); + nm.get_env().clone().into() + } + + #[test] + fn test_insert_env() -> R { + let tr1 = parse( + r#" + replace: + source: "$A" + replace: \d + by: "b" + "#, + )?; + let tr2 = parse( + r#" + substring: + source: "$A" + startChar: 1 + endChar: -1 + "#, + )?; + let mut map = RapidMap::default(); + map.insert("TR1".into(), tr1); + map.insert("TR2".into(), tr2); + let env = transform_env(map); + assert_eq!(env["TR1"], "bbb"); + assert_eq!(env["TR2"], "2"); + Ok(()) + } + + #[test] + fn test_dependent_trans() -> R { + let rep = parse( + r#" + replace: + source: "$A" + replace: \d + by: "b" + "#, + )?; + let sub = parse( + r#" + substring: + source: "$REP" + startChar: 1 + endChar: -1 + "#, + )?; + let up = parse( + r#" + convert: + source: "$SUB" + toCase: upperCase + "#, + )?; + let mut map = RapidMap::default(); + map.insert("REP".into(), rep); + map.insert("SUB".into(), sub); + map.insert("UP".into(), up); + let env = transform_env(map); + assert_eq!(env["REP"], "bbb"); + assert_eq!(env["SUB"], "b"); + assert_eq!(env["UP"], "B"); + Ok(()) + } + + #[test] + fn test_uppercase_convert() -> R { + let trans = parse( + r#" + convert: + source: "$A" + toCase: upperCase + "#, + )?; + let actual = get_transformed("let a = real_quiet_now", "let a = $A", &trans).ok_or(())?; + assert_eq!(actual, "REAL_QUIET_NOW"); + Ok(()) + } + + #[test] + fn test_capitalize_convert() -> R { + let trans = parse( + r#" + convert: + source: "$A" + toCase: capitalize + "#, + )?; + let actual = get_transformed("let a = snugglebunny", "let a = $A", &trans).ok_or(())?; + assert_eq!(actual, "Snugglebunny"); + Ok(()) + } + + #[test] + fn test_lowercase_convert() -> R { + let trans = parse( + r#" + convert: + source: "$A" + toCase: lowerCase + "#, + )?; + let actual = get_transformed("let a = SCREAMS", "let a = $A", &trans).ok_or(())?; + assert_eq!(actual, "screams"); + Ok(()) + } + + #[test] + fn test_separation_convert() -> R { + let trans = parse( + r#" + convert: + source: "$A" + toCase: snakeCase + separatedBy: [underscore] + "#, + )?; + let actual = get_transformed("let a = camelCase_Not", "let a = $A", &trans).ok_or(())?; + assert_eq!(actual, "camelcase_not"); + Ok(()) + } + + #[test] + fn test_transform_indentation_with_insertion() -> R { + let src = " +if (true) { + let a = { + b: 123 + } +} +"; + // note the indentation + let expected = "{ + b: 123 + }"; + let tr = parse("{ substring: { source: $A } }")?; + let actual = get_transformed(src, "let a = $A", &tr).ok_or(())?; + assert_eq!(actual, expected); + Ok(()) + } + + // TODO: add a symbolic test for Rewrite +} diff --git a/crates/rule-engine/test_data/sample_javascript.js b/crates/rule-engine/test_data/sample_javascript.js new file mode 100644 index 0000000..0c199f3 --- /dev/null +++ b/crates/rule-engine/test_data/sample_javascript.js @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +// Sample JavaScript code for benchmarking +function testFunction() { + console.log("Hello World"); + console.log('test string'); + console.log(`template ${variable}`); +} + +class TestClass { + constructor() { + this.value = 42; + } + + method() { + console.log(this.value); + } +} + +let variable = "test"; +const constant = 123; +var oldVar = true; + +import { Component } from 'react'; +import * as React from 'react'; +import defaultExport from './module'; + +async function asyncFunction() { + const result = await fetch('/api/data'); + return result.json(); +} + +function recursion() { + recursion(); +} + +let recursion2 = () => { + recursion2(); +}; + +export default TestClass; diff --git a/crates/rule-engine/test_data/sample_python.py b/crates/rule-engine/test_data/sample_python.py new file mode 100644 index 0000000..7b79517 --- /dev/null +++ b/crates/rule-engine/test_data/sample_python.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# Sample Python code for benchmarking +def test_function(): + print("Hello World") + print('test string') + print(f"template {variable}") + +class TestClass: + def __init__(self): + self.value = 42 + + def method(self): + print(self.value) + +variable = "test" +constant = 123 +old_var = True + +import os +from typing import List, Dict +import asyncio + +async def async_function(): + result = await fetch_data() + return result + +def recursion(): + recursion() + +recursion2 = lambda: recursion2() + +if __name__ == "__main__": + test_function() diff --git a/crates/rule-engine/test_data/sample_rust.rs b/crates/rule-engine/test_data/sample_rust.rs new file mode 100644 index 0000000..30124b1 --- /dev/null +++ b/crates/rule-engine/test_data/sample_rust.rs @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +// Sample Rust code for benchmarking +fn test_function() { + println!("Hello World"); + println!("test string"); + println!("template {}", variable); +} + +struct TestStruct { + value: i32, +} + +impl TestStruct { + fn new() -> Self { + Self { value: 42 } + } + + fn method(&self) { + println!("{}", self.value); + } +} + +use std::collections::HashMap; +use std::fs::File; + +static VARIABLE: &str = "test"; +const CONSTANT: i32 = 123; + +async fn async_function() -> Result> { + let result = fetch_data().await?; + Ok(result) +} + +fn recursion() { + recursion(); +} + +fn main() { + test_function(); +} diff --git a/crates/rule-engine/test_data/sample_typescript.ts b/crates/rule-engine/test_data/sample_typescript.ts new file mode 100644 index 0000000..83b7297 --- /dev/null +++ b/crates/rule-engine/test_data/sample_typescript.ts @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +// Sample TypeScript code for benchmarking +function testFunction() { + console.log("Hello World"); + console.log('test string'); + console.log(`template ${variable}`); +} + +class TestClass { + constructor() { + this.value = 42; + } + + method() { + console.log(this.value); + } +} + +let variable = "test"; +const constant = 123; +var oldVar = true; + +import { Component } from 'react'; +import * as React from 'react'; +import defaultExport from './module'; + +async function asyncFunction() { + const result = await fetch('/api/data'); + return result.json(); +} + +function recursion() { + recursion(); +} + +let recursion2 = () => { + recursion2(); +}; + +export default TestClass; diff --git a/crates/services/Cargo.toml b/crates/services/Cargo.toml new file mode 100644 index 0000000..e250211 --- /dev/null +++ b/crates/services/Cargo.toml @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 + +[package] +name = "thread-services" +description = "Service layer interfaces for Thread" +keywords = ["ast", "pattern", "services", "interface"] +version = "0.1.0" +categories = ["services", "interface", "ast", "pattern"] +readme = "README.md" +authors.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +include.workspace = true + +[dependencies] +thread-ast-engine = { workspace = true, default-features = false } +thread-rule-engine = { workspace = true, default-features = false } +thread-language = { workspace = true, default-features = false } +thread-utils = { workspace = true, default-features = false, features = [ + "hashers", +] } + + +serde = { workspace = true, optional = true } +thiserror = { workspace = true } + +[features] +default = [] +serialization = ["dep:serde"] diff --git a/crates/services/README.md b/crates/services/README.md new file mode 100644 index 0000000..2e23dca --- /dev/null +++ b/crates/services/README.md @@ -0,0 +1,6 @@ + diff --git a/crates/services/src/lib.rs b/crates/services/src/lib.rs new file mode 100644 index 0000000..ed361e2 --- /dev/null +++ b/crates/services/src/lib.rs @@ -0,0 +1,127 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: AGPL-3.0-or-later +/*! +This module defines the service layer interfaces for Thread. + +It provides abstract traits and execution contexts that decouple the core +functionality from specific I/O, configuration, and execution environments. +This allows the same core logic to be used in CLI tools, WASM environments, +cloud services, and other contexts. +*/ + +use std::path::Path; +use thiserror::Error; + +/// Error types for service operations +#[derive(Error, Debug)] +pub enum ServiceError { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("Configuration error: {0}")] + Config(String), + #[error("Execution error: {0}")] + Execution(String), +} + +/// Abstract execution context that can provide code from various sources +pub trait ExecutionContext { + /// Read content from a source (could be file, memory, network, etc.) + fn read_content(&self, source: &str) -> Result; + + /// Write content to a destination + fn write_content(&self, destination: &str, content: &str) -> Result<(), ServiceError>; + + /// List available sources (files, URLs, etc.) + fn list_sources(&self) -> Result, ServiceError>; +} + +/// File system based execution context +pub struct FileSystemContext { + base_path: std::path::PathBuf, +} + +impl FileSystemContext { + pub fn new>(base_path: P) -> Self { + Self { + base_path: base_path.as_ref().to_path_buf(), + } + } +} + +impl ExecutionContext for FileSystemContext { + fn read_content(&self, source: &str) -> Result { + let path = self.base_path.join(source); + Ok(std::fs::read_to_string(path)?) + } + + fn write_content(&self, destination: &str, content: &str) -> Result<(), ServiceError> { + let path = self.base_path.join(destination); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + Ok(std::fs::write(path, content)?) + } + + fn list_sources(&self) -> Result, ServiceError> { + // Basic implementation - can be enhanced with glob patterns, etc. + let mut sources = Vec::new(); + for entry in std::fs::read_dir(&self.base_path)? { + let entry = entry?; + if entry.file_type()?.is_file() { + if let Some(name) = entry.file_name().to_str() { + sources.push(name.to_string()); + } + } + } + Ok(sources) + } +} + +/// In-memory execution context for testing and WASM environments +pub struct MemoryContext { + content: thread_utils::RapidMap, +} + +impl MemoryContext { + pub fn new() -> Self { + Self { + content: thread_utils::RapidMap::default(), + } + } + + pub fn add_content(&mut self, name: String, content: String) { + self.content.insert(name, content); + } +} + +impl Default for MemoryContext { + fn default() -> Self { + Self::new() + } +} + +impl ExecutionContext for MemoryContext { + fn read_content(&self, source: &str) -> Result { + self.content + .get(source) + .cloned() + .ok_or_else(|| ServiceError::Execution(format!("Source not found: {source}"))) + } + + fn write_content(&self, _destination: &str, _content: &str) -> Result<(), ServiceError> { + // For read-only memory context, we could store writes separately + // or return an error. For now, we'll just succeed silently. + Ok(()) + } + + fn list_sources(&self) -> Result, ServiceError> { + Ok(self.content.keys().cloned().collect()) + } +} + +// Service trait definitions will be added here in future iterations +// For example: +// pub trait ScanService { ... } +// pub trait FixService { ... } +// pub trait RuleValidationService { ... } diff --git a/crates/thread-cli/Cargo.toml b/crates/thread-cli/Cargo.toml deleted file mode 100644 index 7d2d709..0000000 --- a/crates/thread-cli/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "thread-cli" -version = "0.0.1" -edition.workspace = true -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -authors.workspace = true - -[dependencies] - -[lints] -workspace = true diff --git a/crates/thread-cli/README.md b/crates/thread-cli/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-cli/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-cli/src/main.rs b/crates/thread-cli/src/main.rs deleted file mode 100644 index e7a11a9..0000000 --- a/crates/thread-cli/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello, world!"); -} diff --git a/crates/thread-core/Cargo.toml b/crates/thread-core/Cargo.toml deleted file mode 100644 index 4f72a87..0000000 --- a/crates/thread-core/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "thread-core" -version = "0.0.1" -edition.workspace = true -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -authors.workspace = true - -[dependencies] -# string-interner.workspace = true - -ast-grep-core = { version = "0.38.6" } # core library for AST manipulation -ast-grep-dynamic = { version = "0.38.6" } # dynamic language loading at runtime -ast-grep-language = { version = "0.38.6" } # for language-specific AST manipulation - -[lints] -workspace = true diff --git a/crates/thread-core/README.md b/crates/thread-core/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-core/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-core/src/engine.rs b/crates/thread-core/src/engine.rs deleted file mode 100644 index 16b6a60..0000000 --- a/crates/thread-core/src/engine.rs +++ /dev/null @@ -1,220 +0,0 @@ -// crates/thread-core/src/engine.rs -use petgraph::{Graph, NodeIndex}; -use std::collections::HashMap; -use std::path::Path; - -// Your single source of truth -type CodeGraph = Graph; - -#[derive(Debug, Clone)] -pub struct CodeNode { - pub id: u64, // rapidhash of content - pub kind: NodeKind, - pub name: String, - pub line: usize, - pub column: usize, - pub text: String, -} - -#[derive(Debug, Clone)] -pub enum NodeKind { - Function, - Struct, - Import, - Variable, - FunctionCall, -} - -#[derive(Debug, Clone)] -pub enum CodeEdge { - Calls, - Imports, - Defines, - References, -} - -pub struct ThreadEngine { - graph: CodeGraph, - parser: thread_parse::Parser, - store: thread_store::ContentStore, - editor: Option, - node_index: DashMap, // Fast lookups -} - -impl ThreadEngine { - pub fn new() -> Self { - Self { - graph: Graph::new(), - parser: thread_parse::Parser::new(), - store: thread_store::ContentStore::new(), - editor: None, - node_index: HashMap::new(), - } - } - - pub fn analyze_file>(&mut self, path: P) -> Result { - let path = path.as_ref(); - let content = std::fs::read_to_string(path)?; - - // Step 1: Detect language and parse - let language = self.parser.detect_language(path)?; - let ast_elements = self.parser.parse(&content, language)?; - - // Step 2: Store content with deduplication - let content_hash = self.store.intern(&content); - - // Step 3: Build graph from AST elements - let mut nodes_added = Vec::new(); - - for element in ast_elements { - let node = CodeNode { - id: element.content_hash, - kind: element.kind, - name: element.name, - line: element.line, - column: element.column, - text: element.text, - }; - - let node_idx = self.graph.add_node(node.clone()); - self.node_index.insert(node.id, node_idx); - nodes_added.push(node_idx); - } - - // Step 4: Add relationships (calls, imports, etc.) - self.add_relationships(&ast_elements)?; - - Ok(AnalysisResult { - path: path.to_path_buf(), - content_hash, - nodes_count: nodes_added.len(), - graph_size: self.graph.node_count(), - }) - } - - pub fn update_file>(&mut self, path: P, new_content: &str) -> Result<(), ThreadError> { - // Step 1: Find what changed using ropey - if let Some(ref mut editor) = self.editor { - let changes = editor.compute_changes(path.as_ref(), new_content)?; - - // Step 2: Incrementally update only changed parts - for change in changes { - self.update_graph_region(change)?; - } - } else { - // Fallback: re-analyze entire file - self.analyze_file(path)?; - } - - Ok(()) - } - - pub fn find_function(&self, name: &str) -> Vec<&CodeNode> { - self.graph - .node_weights() - .filter(|node| matches!(node.kind, NodeKind::Function) && node.name == name) - .collect() - } - - pub fn get_dependencies(&self, node_id: u64) -> Option> { - let node_idx = self.node_index.get(&node_id)?; - - Some( - self.graph - .neighbors(*node_idx) - .map(|idx| &self.graph[idx]) - .collect() - ) - } - - fn add_relationships(&mut self, elements: &[AstElement]) -> Result<(), ThreadError> { - // Find function calls, imports, etc. and add edges - for element in elements { - if let Some(calls) = &element.calls { - for call in calls { - if let (Some(&caller_idx), Some(&callee_idx)) = - (self.node_index.get(&element.content_hash), - self.node_index.get(&call.target_hash)) { - self.graph.add_edge(caller_idx, callee_idx, CodeEdge::Calls); - } - } - } - } - - Ok(()) - } - - fn update_graph_region(&mut self, change: thread_edit::Change) -> Result<(), ThreadError> { - // Remove old nodes in changed region - let old_nodes: Vec<_> = self.graph - .node_indices() - .filter(|&idx| { - let node = &self.graph[idx]; - node.line >= change.start_line && node.line <= change.end_line - }) - .collect(); - - for node_idx in old_nodes { - let node_id = self.graph[node_idx].id; - self.graph.remove_node(node_idx); - self.node_index.remove(&node_id); - } - - // Re-parse changed region and add new nodes - let new_elements = self.parser.parse_region(&change.new_content, change.start_line)?; - - for element in new_elements { - let node = CodeNode { - id: element.content_hash, - kind: element.kind, - name: element.name, - line: element.line, - column: element.column, - text: element.text, - }; - - let node_idx = self.graph.add_node(node); - self.node_index.insert(element.content_hash, node_idx); - } - - Ok(()) - } -} - -#[derive(Debug)] -pub struct AnalysisResult { - pub path: std::path::PathBuf, - pub content_hash: u64, - pub nodes_count: usize, - pub graph_size: usize, -} - -#[derive(Debug)] -pub struct AstElement { - pub content_hash: u64, - pub kind: NodeKind, - pub name: String, - pub line: usize, - pub column: usize, - pub text: String, - pub calls: Option>, -} - -#[derive(Debug)] -pub struct FunctionCall { - pub target_hash: u64, - pub name: String, -} - -#[derive(Debug)] -pub enum ThreadError { - Io(std::io::Error), - Parse(String), - NotFound(String), -} - -impl From for ThreadError { - fn from(e: std::io::Error) -> Self { - ThreadError::Io(e) - } -} diff --git a/crates/thread-core/src/error.rs b/crates/thread-core/src/error.rs deleted file mode 100644 index 5781f67..0000000 --- a/crates/thread-core/src/error.rs +++ /dev/null @@ -1,60 +0,0 @@ -// thread-core/src/error.rs -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum ThreadError { - #[error("Parser error: {0}")] - ParseError(String), - - #[error("Language not supported: {0}")] - UnsupportedLanguage(String), - - #[error("File too large: {size_mb}MB exceeds limit of {limit_mb}MB")] - FileTooLarge { size_mb: usize, limit_mb: usize }, - - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - - #[error("Tree-sitter error: {0}")] - TreeSitter(String), - - #[error("Serialization error: {0}")] - Serialization(#[from] serde_json::Error), -} - -pub type Result = std::result::Result; - -// thread-core/src/hash.rs -use blake3::Hasher; -use serde::{Deserialize, Serialize}; - -/// Content-addressable hash for deduplication -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct ContentHash(String); - -impl ContentHash { - /// Create hash from content string - pub fn from_content(content: &str) -> Self { - let mut hasher = Hasher::new(); - hasher.update(content.as_bytes()); - Self(hasher.finalize().to_hex().to_string()) - } - - /// Create hash from bytes - pub fn from_bytes(bytes: &[u8]) -> Self { - let mut hasher = Hasher::new(); - hasher.update(bytes); - Self(hasher.finalize().to_hex().to_string()) - } - - /// Get the hash as a string - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl std::fmt::Display for ContentHash { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} diff --git a/crates/thread-core/src/lib.rs b/crates/thread-core/src/lib.rs deleted file mode 100644 index 1b2d3c9..0000000 --- a/crates/thread-core/src/lib.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! Core traits and types for thread code analysis -//! -//! This crate defines the fundamental abstractions that make thread -//! language-agnostic and highly extensible. - -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::Path; - -pub mod error; -pub mod hash; -pub mod location; - -pub use error::*; -pub use hash::*; -pub use location::*; - -/// A parsed code element with its metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CodeElement { - pub id: ElementId, - pub kind: ElementKind, - pub name: String, - pub signature: String, - pub location: SourceLocation, - pub content_hash: ContentHash, - pub dependencies: Vec, - pub metadata: ElementMetadata, -} - -/// Unique identifier for a code element -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct ElementId(pub String); - -/// Types of code elements we can extract -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum ElementKind { - Function, - Method, - Class, - Struct, - Enum, - Interface, - Trait, - Constant, - Variable, - Module, - Import, - Export, - Type, - Macro, - // Extensible for language-specific elements - Custom(String), -} - -/// Language-agnostic metadata for code elements -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ElementMetadata { - pub visibility: Option, - pub is_async: bool, - pub is_generic: bool, - pub docstring: Option, - pub annotations: Vec, - pub return_type: Option, - pub parameters: Vec, - // Extensible key-value store for language-specific data - pub extra: HashMap, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum Visibility { - Public, - Private, - Protected, - Internal, - Package, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Parameter { - pub name: String, - pub type_annotation: Option, - pub default_value: Option, - pub is_optional: bool, -} - -/// Result of parsing a single file -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileParseResult { - pub file_path: &'static Path, - pub language: &'static str, - pub elements: &'static [CodeElement], - pub imports: &'static [Import], - pub exports: &'static [Export], - pub content_hash: ContentHash, - pub parse_time_ms: &'static u64, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Import { - pub module: String, - pub items: Vec, - pub alias: Option, - pub location: SourceLocation, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Export { - pub name: String, - pub kind: ElementKind, - pub location: SourceLocation, -} - -/// Result of parsing an entire project -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProjectParseResult { - pub project_id: String, - pub files: Vec, - pub dependency_graph: DependencyGraph, - pub total_parse_time_ms: u64, - pub statistics: ParseStatistics, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DependencyGraph { - pub nodes: Vec, // file paths - pub edges: Vec<(String, String)>, // (from_file, to_file) -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ParseStatistics { - pub total_files: usize, - pub total_elements: usize, - pub elements_by_kind: HashMap, - pub files_by_language: HashMap, -} - -/// Core trait for language-specific parsers -/// -/// Implementing this trait is all that's needed to add support for a new language. -pub trait LanguageParser: Send + Sync { - /// Unique identifier for this language (e.g., "rust", "python", "typescript") - fn language_id(&self) -> &'static str; - - /// File extensions this parser handles (e.g., [".rs", ".rust"]) - fn file_extensions(&self) -> &'static [&'static str]; - - /// Parse a single file and extract code elements - fn parse_file(&self, content: &str, file_path: &Path) -> Result; - - /// Parse incrementally if the parser supports it - /// Default implementation falls back to full parse - fn parse_incremental( - &self, - old_content: &str, - new_content: &str, - file_path: &Path, - ) -> Result { - self.parse_file(new_content, file_path) - } - - /// Extract dependencies from content (imports, includes, etc.) - fn extract_dependencies(&self, content: &str, file_path: &Path) -> Result>; - - /// Check if this parser can handle the given file - fn can_parse(&self, file_path: &Path) -> bool { - if let Some(ext) = file_path.extension().and_then(|e| e.to_str()) { - let ext_with_dot = format!(".{}", ext); - self.file_extensions().contains(&ext_with_dot.as_str()) - } else { - false - } - } -} - -/// Configuration for parsing operations -#[derive(Debug, Clone)] -pub struct ParseConfig { - pub include_private: bool, - pub include_tests: bool, - pub max_file_size_mb: usize, - pub parallel_parsing: bool, - pub incremental_mode: bool, - pub extract_docstrings: bool, - pub language_specific: HashMap, -} - -impl Default for ParseConfig { - fn default() -> Self { - Self { - include_private: true, - include_tests: false, - max_file_size_mb: 10, - parallel_parsing: true, - incremental_mode: true, - extract_docstrings: true, - language_specific: HashMap::new(), - } - } -} - -/// Trait for custom query extractors -/// -/// This allows users to define custom patterns to extract from parsed trees -pub trait QueryExtractor: Send + Sync { - /// Unique name for this extractor - fn name(&self) -> &'static str; - - /// Extract custom data from a parsed tree - fn extract(&self, tree: &tree_sitter::Tree, source: &str) -> Result>; - - /// Languages this extractor supports - fn supported_languages(&self) -> &'static [&'static str]; -} diff --git a/crates/thread-core/src/location.rs b/crates/thread-core/src/location.rs deleted file mode 100644 index 38158bf..0000000 --- a/crates/thread-core/src/location.rs +++ /dev/null @@ -1,70 +0,0 @@ - - -// knitli-core/src/location.rs -use serde::{Deserialize, Serialize}; - -/// Source location with line and column information -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct SourceLocation { - pub file_path: String, - pub start_line: usize, - pub end_line: usize, - pub start_column: usize, - pub end_column: usize, - pub start_byte: usize, - pub end_byte: usize, -} - -impl SourceLocation { - pub fn new( - file_path: String, - start_line: usize, - end_line: usize, - start_column: usize, - end_column: usize, - start_byte: usize, - end_byte: usize, - ) -> Self { - Self { - file_path, - start_line, - end_line, - start_column, - end_column, - start_byte, - end_byte, - } - } - - /// Create from tree-sitter node - pub fn from_node(node: &tree_sitter::Node, file_path: String, source: &str) -> Self { - let start_point = node.start_position(); - let end_point = node.end_position(); - - Self::new( - file_path, - start_point.row + 1, // Convert to 1-based - end_point.row + 1, - start_point.column + 1, - end_point.column + 1, - node.start_byte(), - node.end_byte(), - ) - } - - /// Check if this location contains another location - pub fn contains(&self, other: &SourceLocation) -> bool { - self.file_path == other.file_path - && self.start_byte <= other.start_byte - && self.end_byte >= other.end_byte - } - - /// Get the line range as a string (e.g., "42-67") - pub fn line_range(&self) -> String { - if self.start_line == self.end_line { - self.start_line.to_string() - } else { - format!("{}-{}", self.start_line, self.end_line) - } - } -} diff --git a/crates/thread-core/src/nodes.rs b/crates/thread-core/src/nodes.rs deleted file mode 100644 index e69de29..0000000 diff --git a/crates/thread-diff/Cargo.toml b/crates/thread-diff/Cargo.toml deleted file mode 100644 index 83bd6f2..0000000 --- a/crates/thread-diff/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "thread-diff" -version = "0.1.0" -edition = "2024" -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -homepage.workspace = true -authors.workspace = true - -[dependencies] - -[lints] -workspace = true diff --git a/crates/thread-diff/README.md b/crates/thread-diff/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-diff/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-diff/src/lib.rs b/crates/thread-diff/src/lib.rs deleted file mode 100644 index b93cf3f..0000000 --- a/crates/thread-diff/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/crates/thread-engine/Cargo.toml b/crates/thread-engine/Cargo.toml deleted file mode 100644 index bc3debb..0000000 --- a/crates/thread-engine/Cargo.toml +++ /dev/null @@ -1,29 +0,0 @@ -[package] -name = "thread-engine" -version.workspace = true -edition.workspace = true -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -homepage.workspace = true -authors.workspace = true -description = "Analysis engine implementation for Thread code analysis" -keywords = ["parsing", "code-analysis", "static-analysis", "graph-algorithms"] -categories = ["development-tools", "parser-implementations"] - -[dependencies] -thread-core = { path = "../thread-core" } -thread-parser = { path = "../thread-parse" } -thread-store = { path = "../thread-store" } - -# Core dependencies -anyhow.workspace = true -thiserror.workspace = true -serde.workspace = true - -# Graph processing -petgraph = "0.6" - -# Performance -rayon = "1.10" \ No newline at end of file diff --git a/crates/thread-engine/README.md b/crates/thread-engine/README.md deleted file mode 100644 index 043149f..0000000 --- a/crates/thread-engine/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# thread-engine - -Analysis engine implementation for Thread code analysis. - -This crate provides the main analysis engine that orchestrates parsing, graph building, and query operations. - -## Architecture - -- **ThreadEngine**: Main engine that manages the code graph -- **Analyzer**: High-level orchestration of analysis pipeline -- **GraphBuilder**: Constructs petgraph from parsed code elements -- **QueryEngine**: Extracts context and relationships from the graph - -## Usage - -```rust -use thread_engine::Analyzer; - -let mut analyzer = Analyzer::new(); -let result = analyzer.analyze_rust_file(content, file_path)?; -println!("Found {} elements", result.elements_found); -``` - -## Day 2 Target - -The immediate goal is implementing `analyze_rust_file()` to: -1. Parse Rust code with thread-parse -2. Extract functions and basic relationships -3. Build a queryable graph representation -4. Return analysis results \ No newline at end of file diff --git a/crates/thread-engine/src/analyzer.rs b/crates/thread-engine/src/analyzer.rs deleted file mode 100644 index f73cbc0..0000000 --- a/crates/thread-engine/src/analyzer.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! High-level analysis orchestration - -use crate::*; -use thread_core::*; -use std::path::Path; - -/// High-level analyzer that orchestrates the full analysis pipeline -pub struct Analyzer { - engine: ThreadEngine, - config: AnalysisConfig, -} - -impl Analyzer { - /// Create a new analyzer with default configuration - pub fn new() -> Self { - Self { - engine: ThreadEngine::new(), - config: AnalysisConfig::default(), - } - } - - /// Create a new analyzer with custom configuration - pub fn with_config(config: AnalysisConfig) -> Self { - Self { - engine: ThreadEngine::new(), - config, - } - } - - /// Analyze a single Rust file (Day 2 MVP target) - pub fn analyze_rust_file(&mut self, content: &str, file_path: &Path) -> Result { - // TODO: This is the Day 2 deliverable target - // 1. Use thread-parse to extract functions from Rust content - // 2. Build graph representation - // 3. Return analysis result - - self.engine.analyze_file(file_path, content) - } - - /// Get current engine statistics - pub fn stats(&self) -> EngineStats { - self.engine.stats() - } - - /// Get a reference to the underlying engine - pub fn engine(&self) -> &ThreadEngine { - &self.engine - } - - /// Get a mutable reference to the underlying engine - pub fn engine_mut(&mut self) -> &mut ThreadEngine { - &mut self.engine - } -} - -impl Default for Analyzer { - fn default() -> Self { - Self::new() - } -} - -/// Configuration for the analysis process -#[derive(Debug, Clone)] -pub struct AnalysisConfig { - pub include_private: bool, - pub include_tests: bool, - pub max_file_size_mb: usize, - pub extract_docstrings: bool, -} - -impl Default for AnalysisConfig { - fn default() -> Self { - Self { - include_private: true, - include_tests: false, - max_file_size_mb: 10, - extract_docstrings: true, - } - } -} \ No newline at end of file diff --git a/crates/thread-engine/src/graph.rs b/crates/thread-engine/src/graph.rs deleted file mode 100644 index 10477ed..0000000 --- a/crates/thread-engine/src/graph.rs +++ /dev/null @@ -1,74 +0,0 @@ -//! Graph building and manipulation utilities - -use crate::*; -use thread_core::*; -use petgraph::Graph; -use std::collections::HashMap; - -/// Graph builder for constructing code graphs from parsed elements -pub struct GraphBuilder { - graph: Graph, - node_index: HashMap, -} - -impl GraphBuilder { - /// Create a new graph builder - pub fn new() -> Self { - Self { - graph: Graph::new(), - node_index: HashMap::new(), - } - } - - /// Add a code element as a node in the graph - pub fn add_element(&mut self, element: CodeElement) -> petgraph::graph::NodeIndex { - let node = CodeNode { element: element.clone() }; - let index = self.graph.add_node(node); - self.node_index.insert(element.id, index); - index - } - - /// Add a relationship between two elements - pub fn add_relationship( - &mut self, - from: &ElementId, - to: &ElementId, - kind: EdgeKind, - metadata: EdgeMetadata, - ) -> Result<()> { - let from_index = self.node_index.get(from) - .ok_or_else(|| Error::ElementNotFound(from.clone()))?; - let to_index = self.node_index.get(to) - .ok_or_else(|| Error::ElementNotFound(to.clone()))?; - - let edge = CodeEdge { kind, metadata }; - self.graph.add_edge(*from_index, *to_index, edge); - Ok(()) - } - - /// Build the final graph (consumes the builder) - pub fn build(self) -> (Graph, HashMap) { - (self.graph, self.node_index) - } - - /// Get current graph statistics - pub fn stats(&self) -> GraphStats { - GraphStats { - node_count: self.graph.node_count(), - edge_count: self.graph.edge_count(), - } - } -} - -impl Default for GraphBuilder { - fn default() -> Self { - Self::new() - } -} - -/// Statistics about a code graph -#[derive(Debug, Clone)] -pub struct GraphStats { - pub node_count: usize, - pub edge_count: usize, -} \ No newline at end of file diff --git a/crates/thread-engine/src/lib.rs b/crates/thread-engine/src/lib.rs deleted file mode 100644 index 63415ca..0000000 --- a/crates/thread-engine/src/lib.rs +++ /dev/null @@ -1,110 +0,0 @@ -//! Thread analysis engine implementation -//! -//! This crate provides the main analysis engine that orchestrates parsing, -//! graph building, and query operations for Thread code analysis. - -use thread_core::*; -use petgraph::Graph; -use std::collections::HashMap; -use std::path::Path; - -pub mod analyzer; -pub mod graph; -pub mod query; - -pub use analyzer::*; -pub use graph::*; -pub use query::*; - -/// Main analysis engine for Thread -pub struct ThreadEngine { - graph: Graph, - node_index: HashMap, -} - -impl ThreadEngine { - /// Create a new Thread analysis engine - pub fn new() -> Self { - Self { - graph: Graph::new(), - node_index: HashMap::new(), - } - } - - /// Analyze a single file and integrate into the graph - pub fn analyze_file(&mut self, file_path: &Path, content: &str) -> Result { - // TODO: Implement file analysis - // 1. Parse with thread-parse - // 2. Build graph nodes and edges - // 3. Store in content store - // 4. Return analysis result - - Ok(AnalysisResult { - file_path: file_path.to_path_buf(), - elements_found: 0, - relationships_found: 0, - }) - } - - /// Get the current graph statistics - pub fn stats(&self) -> EngineStats { - EngineStats { - total_nodes: self.graph.node_count(), - total_edges: self.graph.edge_count(), - total_files: 0, // TODO: Track files - } - } -} - -impl Default for ThreadEngine { - fn default() -> Self { - Self::new() - } -} - -/// Graph node representing a code element -#[derive(Debug, Clone)] -pub struct CodeNode { - pub element: CodeElement, -} - -/// Graph edge representing a relationship between code elements -#[derive(Debug, Clone)] -pub struct CodeEdge { - pub kind: EdgeKind, - pub metadata: EdgeMetadata, -} - -/// Types of relationships between code elements -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum EdgeKind { - Calls, - Imports, - Implements, - Extends, - Contains, - References, -} - -/// Metadata for graph edges -#[derive(Debug, Clone, Default)] -pub struct EdgeMetadata { - pub line_number: Option, - pub confidence: f32, -} - -/// Result of analyzing a single file -#[derive(Debug, Clone)] -pub struct AnalysisResult { - pub file_path: std::path::PathBuf, - pub elements_found: usize, - pub relationships_found: usize, -} - -/// Statistics about the analysis engine state -#[derive(Debug, Clone)] -pub struct EngineStats { - pub total_nodes: usize, - pub total_edges: usize, - pub total_files: usize, -} \ No newline at end of file diff --git a/crates/thread-engine/src/query.rs b/crates/thread-engine/src/query.rs deleted file mode 100644 index 9deb497..0000000 --- a/crates/thread-engine/src/query.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Graph query operations for extracting context - -use crate::*; -use thread_core::*; -use petgraph::Graph; -use std::collections::HashMap; - -/// Query engine for extracting context from code graphs -pub struct QueryEngine<'a> { - graph: &'a Graph, - node_index: &'a HashMap, -} - -impl<'a> QueryEngine<'a> { - /// Create a new query engine - pub fn new( - graph: &'a Graph, - node_index: &'a HashMap, - ) -> Self { - Self { graph, node_index } - } - - /// Find a function by name - pub fn find_function(&self, name: &str) -> Vec<&CodeElement> { - self.graph - .node_weights() - .filter(|node| { - matches!(node.element.kind, ElementKind::Function | ElementKind::Method) - && node.element.name == name - }) - .map(|node| &node.element) - .collect() - } - - /// Get all functions that call the given function - pub fn get_callers(&self, function_id: &ElementId) -> Result> { - let target_index = self.node_index.get(function_id) - .ok_or_else(|| Error::ElementNotFound(function_id.clone()))?; - - let callers = self.graph - .neighbors_directed(*target_index, petgraph::Direction::Incoming) - .filter_map(|idx| self.graph.node_weight(idx)) - .map(|node| &node.element) - .collect(); - - Ok(callers) - } - - /// Get all functions called by the given function - pub fn get_callees(&self, function_id: &ElementId) -> Result> { - let source_index = self.node_index.get(function_id) - .ok_or_else(|| Error::ElementNotFound(function_id.clone()))?; - - let callees = self.graph - .neighbors_directed(*source_index, petgraph::Direction::Outgoing) - .filter_map(|idx| self.graph.node_weight(idx)) - .map(|node| &node.element) - .collect(); - - Ok(callees) - } - - /// Generate AI-friendly context for a function - pub fn generate_context(&self, function_name: &str) -> Result { - let functions = self.find_function(function_name); - if functions.is_empty() { - return Err(Error::FunctionNotFound(function_name.to_string())); - } - - // For now, take the first match (TODO: handle multiple matches) - let function = functions[0]; - let callers = self.get_callers(&function.id)?; - let callees = self.get_callees(&function.id)?; - - Ok(FunctionContext { - function: function.clone(), - callers: callers.into_iter().cloned().collect(), - callees: callees.into_iter().cloned().collect(), - }) - } -} - -/// Context information for a function (AI-friendly format) -#[derive(Debug, Clone)] -pub struct FunctionContext { - pub function: CodeElement, - pub callers: Vec, - pub callees: Vec, -} - -impl FunctionContext { - /// Format as markdown for AI consumption - pub fn to_markdown(&self) -> String { - let mut md = String::new(); - - md.push_str(&format!("## Function: {} (line {})\n\n", - self.function.name, - self.function.location.line)); - - md.push_str("```rust\n"); - md.push_str(&self.function.signature); - md.push_str("\n```\n\n"); - - if !self.callers.is_empty() { - md.push_str("## Called by\n\n"); - for caller in &self.callers { - md.push_str(&format!("- {} (line {})\n", caller.name, caller.location.line)); - } - md.push('\n'); - } - - if !self.callees.is_empty() { - md.push_str("## Calls\n\n"); - for callee in &self.callees { - md.push_str(&format!("- {} (line {})\n", callee.name, callee.location.line)); - } - md.push('\n'); - } - - md - } -} \ No newline at end of file diff --git a/crates/thread-fs/Cargo.toml b/crates/thread-fs/Cargo.toml deleted file mode 100644 index b1d3ab7..0000000 --- a/crates/thread-fs/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "thread-fs" -version = "0.1.0" -edition = "2024" -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -homepage.workspace = true -authors.workspace = true - -[dependencies] - -[lints] -workspace = true diff --git a/crates/thread-fs/README.md b/crates/thread-fs/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-fs/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-fs/src/lib.rs b/crates/thread-fs/src/lib.rs deleted file mode 100644 index b93cf3f..0000000 --- a/crates/thread-fs/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/crates/thread-parse/Cargo.toml b/crates/thread-parse/Cargo.toml deleted file mode 100644 index b838bca..0000000 --- a/crates/thread-parse/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "thread-parser" -version = "0.0.1" -edition.workspace = true -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -authors.workspace = true - -[dependencies] -#rayon.workspace = true -#dashmap.workspace = true -#string-interner.workspace = true -#petgraph.workspace = true - -ast-grep-core = { version = "0.38.6" } # core library for AST manipulation -ast-grep-dynamic = { version = "0.38.6" } # dynamic language loading at runtime -ast-grep-language = { version = "0.38.6" } # for language-specific AST manipulation - -[lints] -workspace = true diff --git a/crates/thread-parse/README.md b/crates/thread-parse/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-parse/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-parse/src/detection.rs b/crates/thread-parse/src/detection.rs deleted file mode 100644 index a54b800..0000000 --- a/crates/thread-parse/src/detection.rs +++ /dev/null @@ -1,105 +0,0 @@ -// crates/thread-lang/src/detection.rs -use ast_grep_core::{AstGrep, Language}; -use std::path::Path; -use std::collections::HashMap; -use anyhow::Result; - -pub struct LanguageDetector { - extensions: HashMap, - custom_languages: HashMap, -} - -#[derive(Debug, Clone)] -pub enum SupportedLanguage { - Rust, - JavaScript, - TypeScript, - Python, - Custom(String), -} - -#[derive(Debug, Clone)] -pub struct CustomLanguage { - pub name: String, - pub library_path: String, - pub extensions: Vec, - pub expando_char: char, -} - -impl LanguageDetector { - pub fn new() -> Self { - let mut extensions = HashMap::new(); - extensions.insert("rs".to_string(), SupportedLanguage::Rust); - extensions.insert("js".to_string(), SupportedLanguage::JavaScript); - extensions.insert("mjs".to_string(), SupportedLanguage::JavaScript); - extensions.insert("ts".to_string(), SupportedLanguage::TypeScript); - extensions.insert("tsx".to_string(), SupportedLanguage::TypeScript); - extensions.insert("py".to_string(), SupportedLanguage::Python); - - Self { - extensions, - custom_languages: HashMap::new(), - } - } - - pub fn detect_language>(&self, file_path: P) -> Option { - let path = file_path.as_ref(); - let extension = path.extension()?.to_str()?; - - self.extensions.get(extension).cloned() - } - - pub fn register_custom_language(&mut self, lang: CustomLanguage) -> Result<()> { - // Register with ast-grep - ast_grep_core::register_dynamic_language( - &lang.name, - &lang.library_path, - &lang.extensions.iter().map(|s| s.as_str()).collect::>(), - lang.expando_char, - )?; - - // Update extension mapping - for ext in &lang.extensions { - self.extensions.insert(ext.clone(), SupportedLanguage::Custom(lang.name.clone())); - } - - self.custom_languages.insert(lang.name.clone(), lang); - Ok(()) - } - - pub fn analyze_with_ast_grep(&self, content: &str, language: &SupportedLanguage) -> Result> { - let ast_grep_lang = match language { - SupportedLanguage::Rust => ast_grep_core::Language::Rust, - SupportedLanguage::JavaScript => ast_grep_core::Language::JavaScript, - SupportedLanguage::TypeScript => ast_grep_core::Language::TypeScript, - SupportedLanguage::Python => ast_grep_core::Language::Python, - SupportedLanguage::Custom(name) => { - // Use custom language - return Ok(vec![]); // Placeholder - } - }; - - let ast = AstGrep::new(content, ast_grep_lang); - let root = ast.root(); - - // Example: Find all function definitions - let matches = root.find_all("function $NAME($PARAMS) { $BODY }"); - - Ok(matches.into_iter().map(|node| AstGrepMatch { - text: node.text().to_string(), - start_line: node.start_position().row, - start_column: node.start_position().column, - end_line: node.end_position().row, - end_column: node.end_position().column, - }).collect()) - } -} - -#[derive(Debug, Clone)] -pub struct AstGrepMatch { - pub text: String, - pub start_line: usize, - pub start_column: usize, - pub end_line: usize, - pub end_column: usize, -} diff --git a/crates/thread-parse/src/graph.rs b/crates/thread-parse/src/graph.rs deleted file mode 100644 index b413dd2..0000000 --- a/crates/thread-parse/src/graph.rs +++ /dev/null @@ -1,125 +0,0 @@ -// crates/thread-ast/src/graph.rs -use petgraph::{Graph, NodeIndex, EdgeIndex}; -use serde::{Serialize, Deserialize}; -use std::collections::HashMap; - -pub type AstGraph = Graph; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AstNode { - pub id: NodeId, - pub kind: String, - pub text: String, - pub start_line: usize, - pub start_column: usize, - pub end_line: usize, - pub end_column: usize, - pub metadata: NodeMetadata, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NodeMetadata { - pub is_named: bool, - pub is_error: bool, - pub byte_range: (usize, usize), - pub content_hash: blake3::Hash, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum AstEdge { - Child { field_name: Option }, - Reference { ref_type: ReferenceType }, - DataFlow { flow_type: DataFlowType }, - ControlFlow { flow_type: ControlFlowType }, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ReferenceType { - Definition, - Usage, - Import, - Export, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum DataFlowType { - Assignment, - Parameter, - Return, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum ControlFlowType { - Conditional, - Loop, - Call, - Exception, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct NodeId(pub u32); - -pub struct AstGraphBuilder { - graph: AstGraph, - node_map: HashMap, - next_id: u32, -} - -impl AstGraphBuilder { - pub fn new() -> Self { - Self { - graph: Graph::new(), - node_map: HashMap::new(), - next_id: 0, - } - } - - pub fn add_node(&mut self, node: AstNode) -> NodeIndex { - let node_id = node.id; - let index = self.graph.add_node(node); - self.node_map.insert(node_id, index); - index - } - - pub fn add_edge(&mut self, parent: NodeId, child: NodeId, edge: AstEdge) -> Option { - let parent_idx = self.node_map.get(&parent)?; - let child_idx = self.node_map.get(&child)?; - Some(self.graph.add_edge(*parent_idx, *child_idx, edge)) - } - - pub fn build(self) -> AstGraph { - self.graph - } - - fn next_id(&mut self) -> NodeId { - let id = NodeId(self.next_id); - self.next_id += 1; - id - } -} - -// Convert from type-sitter nodes to petgraph -impl From<&thread_parser::TypedNode<'_>> for AstNode { - fn from(node: &thread_parser::TypedNode<'_>) -> Self { - let text = node.text(); - let content_hash = blake3::hash(text.as_bytes()); - let (start_line, start_column) = node.start_position(); - let (end_line, end_column) = node.end_position(); - - AstNode { - id: NodeId(0), // Will be set by builder - kind: node.kind().to_string(), - text: text.to_string(), - start_line, - start_column, - end_line, - end_column, - metadata: NodeMetadata { - is_named: node.node.is_named(), - is_error: node.node.is_error(), - byte_range: (node.node.start_byte(), node.node.end_byte()), - content_hash, - }, - } - } -} diff --git a/crates/thread-parse/src/lib.rs b/crates/thread-parse/src/lib.rs deleted file mode 100644 index b93cf3f..0000000 --- a/crates/thread-parse/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/crates/thread-parse/src/typed.rs b/crates/thread-parse/src/typed.rs deleted file mode 100644 index 0efe39b..0000000 --- a/crates/thread-parse/src/typed.rs +++ /dev/null @@ -1,110 +0,0 @@ -// crates/thread-parser/src/typed.rs -use tree_sitter::{Parser, Tree, Node}; -use anyhow::Result; - -// Include generated type-sitter nodes -#[cfg(feature = "rust")] -include!(env!("RUST_NODES_PATH")); - -pub struct TypedParser { - parser: Parser, - language: SupportedLanguage, -} - -#[derive(Debug, Clone)] -pub enum SupportedLanguage { - #[cfg(feature = "rust")] - Rust, - #[cfg(feature = "javascript")] - JavaScript, - #[cfg(feature = "typescript")] - TypeScript, -} - -impl TypedParser { - pub fn new(language: SupportedLanguage) -> Result { - let mut parser = Parser::new(); - - let ts_language = match language { - #[cfg(feature = "rust")] - SupportedLanguage::Rust => tree_sitter_rust::language(), - #[cfg(feature = "javascript")] - SupportedLanguage::JavaScript => tree_sitter_javascript::language(), - #[cfg(feature = "typescript")] - SupportedLanguage::TypeScript => tree_sitter_typescript::language_typescript(), - }; - - parser.set_language(ts_language)?; - Ok(Self { parser, language }) - } - - pub fn parse(&mut self, content: &str) -> Result { - let tree = self.parser.parse(content, None) - .ok_or_else(|| anyhow::anyhow!("Failed to parse content"))?; - - Ok(TypedTree { - tree, - content: content.to_string(), - language: self.language.clone(), - }) - } -} - -pub struct TypedTree { - tree: Tree, - content: String, - language: SupportedLanguage, -} - -impl TypedTree { - pub fn root_node(&self) -> TypedNode { - TypedNode { - node: self.tree.root_node(), - content: &self.content, - language: &self.language, - } - } -} - -pub struct TypedNode<'a> { - node: Node<'a>, - content: &'a str, - language: &'a SupportedLanguage, -} - -impl<'a> TypedNode<'a> { - pub fn text(&self) -> &'a str { - self.node.utf8_text(self.content.as_bytes()).unwrap_or("") - } - - pub fn kind(&self) -> &'a str { - self.node.kind() - } - - pub fn children(&self) -> impl Iterator> { - let mut cursor = self.node.walk(); - cursor.goto_first_child(); - - std::iter::from_fn(move || { - if cursor.goto_next_sibling() { - Some(TypedNode { - node: cursor.node(), - content: self.content, - language: self.language, - }) - } else { - None - } - }) - } - - pub fn start_position(&self) -> (usize, usize) { - let start = self.node.start_position(); - (start.row, start.column) - } - - pub fn end_position(&self) -> (usize, usize) { - let end = self.node.end_position(); - (end.row, end.column) - } -} diff --git a/crates/thread-store/Cargo.toml b/crates/thread-store/Cargo.toml deleted file mode 100644 index c9959f6..0000000 --- a/crates/thread-store/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "thread-store" -version = "0.1.0" -edition.workspace = true -rust-version.workspace = true -license.workspace = true -repository.workspace = true -documentation.workspace = true -authors.workspace = true - -[dependencies] - -[lints] -workspace = true diff --git a/crates/thread-store/README.md b/crates/thread-store/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-store/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-store/src/lib.rs b/crates/thread-store/src/lib.rs deleted file mode 100644 index b93cf3f..0000000 --- a/crates/thread-store/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/crates/thread-wasm/.gitignore b/crates/thread-wasm/.gitignore deleted file mode 100644 index 9e8611b..0000000 --- a/crates/thread-wasm/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -/target -**/*.rs.bk -Cargo.lock -bin/ -pkg/ -wasm-pack.log diff --git a/crates/thread-wasm/.travis.yml b/crates/thread-wasm/.travis.yml deleted file mode 100644 index 570b95b..0000000 --- a/crates/thread-wasm/.travis.yml +++ /dev/null @@ -1,69 +0,0 @@ -language: rust -sudo: "enabled" - -cache: cargo - -matrix: - include: - - # Builds with wasm-pack. - - rust: beta - env: RUST_BACKTRACE=1 - addons: - firefox: latest - chrome: stable - before_script: - - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) - - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) - - cargo install-update -a - - curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh -s -- -f - script: - - cargo generate --git . --name testing - # Having a broken Cargo.toml (in that it has curlies in fields) anywhere - # in any of our parent dirs is problematic. - - mv Cargo.toml Cargo.toml.tmpl - - cd testing - - wasm-pack build - - wasm-pack test --chrome --firefox --headless - - # Builds on nightly. - - rust: nightly - env: RUST_BACKTRACE=1 - before_script: - - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) - - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) - - cargo install-update -a - - rustup target add wasm32-unknown-unknown - script: - - cargo generate --git . --name testing - - mv Cargo.toml Cargo.toml.tmpl - - cd testing - - cargo check - - cargo check --target wasm32-unknown-unknown - - cargo check --no-default-features - - cargo check --target wasm32-unknown-unknown --no-default-features - - cargo check --no-default-features --features console_error_panic_hook - - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook - - cargo check --no-default-features --features "console_error_panic_hook wee_alloc" - - cargo check --target wasm32-unknown-unknown --no-default-features --features "console_error_panic_hook wee_alloc" - - # Builds on beta. - - rust: beta - env: RUST_BACKTRACE=1 - before_script: - - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) - - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) - - cargo install-update -a - - rustup target add wasm32-unknown-unknown - script: - - cargo generate --git . --name testing - - mv Cargo.toml Cargo.toml.tmpl - - cd testing - - cargo check - - cargo check --target wasm32-unknown-unknown - - cargo check --no-default-features - - cargo check --target wasm32-unknown-unknown --no-default-features - - cargo check --no-default-features --features console_error_panic_hook - - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook - # Note: no enabling the `wee_alloc` feature here because it requires - # nightly for now. diff --git a/crates/thread-wasm/README.md b/crates/thread-wasm/README.md deleted file mode 100644 index f468bd6..0000000 --- a/crates/thread-wasm/README.md +++ /dev/null @@ -1 +0,0 @@ -such empty diff --git a/crates/thread-wasm/src/lib.rs b/crates/thread-wasm/src/lib.rs deleted file mode 100644 index 1be1d47..0000000 --- a/crates/thread-wasm/src/lib.rs +++ /dev/null @@ -1,70 +0,0 @@ -// crates/thread-wasm/src/lib.rs -use wasm_bindgen::prelude::*; -use thread_core::ThreadAnalysisEngine; -use js_sys::Array; -use web_sys::console; - -mod utils; - -// Use dlmalloc for memory management in WASM -#[global_allocator] -static ALLOC: std::sync::GlobalDlmalloc = std::sync::GlobalDlmalloc; - -#[wasm_bindgen] -pub struct ThreadWasm { - engine: ThreadAnalysisEngine, -} - -#[wasm_bindgen] -impl ThreadWasm { - #[wasm_bindgen(constructor)] - pub fn new() -> Result { - let engine = ThreadAnalysisEngine::new() - .map_err(|e| JsValue::from_str(&e.to_string()))?; - - Ok(ThreadWasm { engine }) - } - - #[wasm_bindgen] - pub fn analyze_code(&mut self, content: &str, language: &str) -> Result { - let lang = match language { - "rust" => thread_lang::SupportedLanguage::Rust, - "javascript" => thread_lang::SupportedLanguage::JavaScript, - "typescript" => thread_lang::SupportedLanguage::TypeScript, - "python" => thread_lang::SupportedLanguage::Python, - _ => return Err(JsValue::from_str("Unsupported language")), - }; - - // Create temporary file path for analysis - let temp_path = std::path::Path::new("temp.rs"); // Would be dynamic in real implementation - - // This would need to be adapted for WASM - can't directly use file system - // Instead, we'd parse the content directly - let result = self.parse_content_directly(content, &lang) - .map_err(|e| JsValue::from_str(&e.to_string()))?; - - // Convert to JS-compatible format - let js_result = serde_wasm_bindgen::to_value(&result)?; - Ok(js_result) - } - - fn parse_content_directly(&mut self, content: &str, language: &thread_lang::SupportedLanguage) -> Result> { - // Direct parsing without file system access - let ast_graph = self.engine.parse_to_graph(content, language)?; - - Ok(WasmAnalysisResult { - node_count: ast_graph.node_count(), - edge_count: ast_graph.edge_count(), - language: format!("{:?}", language), - line_count: content.lines().count(), - }) - } -} - -#[derive(serde::Serialize)] -struct WasmAnalysisResult { - node_count: usize, - edge_count: usize, - language: String, - line_count: usize, -} diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml new file mode 100644 index 0000000..be89132 --- /dev/null +++ b/crates/utils/Cargo.toml @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# SPDX-License-Identifier: MIT OR Apache-2.0 + +[package] +name = "thread-utils" +version = "0.0.1" +description = "A collection of utilities for working with Thread. Includes fast hashers, SIMD operations, and more." +keywords = ["thread", "utils", "hashing", "simd", "fast", "performance"] +categories = ["utils", "performance", "data-structures"] +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +documentation.workspace = true +homepage.workspace = true +authors.workspace = true +include.workspace = true + +[dependencies] +rapidhash = { workspace = true, features = ["std"], optional = true } +simdeez = { workspace = true, optional = true } +memchr = { workspace = true, optional = true } + +[features] +default = ["hashers", "random", "simd"] +# enable SIMD support for fast vector operations +# simdeez gives SIMD support on Rust stable with wasm32 (SIMD128), ARM (NEON), and X86 processors (SSE2, 3, 4.1, avx2). +simd = ["dep:simdeez", "dep:memchr"] + +hashers = ["dep:rapidhash"] +# use `rand` library for seeding hash generation +random = ["rapidhash?/rand"] + +# can't use RapidHash's implementation in a CloudFlare worker; it relies on `os_rand`, which isn't available; rapidhash falls back to a different implementation that's good enough for most cases. If you need a more robust solution, consider seeding with TCP/IP calls to Cloudflare's [`drand`](https://docs.drand.love/docs/overview/). +worker = ["dep:simdeez"] + +serialization = [] + +[lints] +workspace = true diff --git a/crates/utils/README.md b/crates/utils/README.md new file mode 100644 index 0000000..9f1f170 --- /dev/null +++ b/crates/utils/README.md @@ -0,0 +1,8 @@ + + +placeholder diff --git a/crates/utils/src/hash_help.rs b/crates/utils/src/hash_help.rs new file mode 100644 index 0000000..c51e745 --- /dev/null +++ b/crates/utils/src/hash_help.rs @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: AGPL-3.0-or-later +//! Hash map, set, and related hashing utilities. +//! +//! Thread uses [`rapidhash::RapidInlineHashMap`] and [`rapidhash::RapidInlineHashSet`] as stand-ins for +//! `std::collections::HashMap` and `std::collections::HashSet` (they ARE `std::collections::HashMap` and +//! `std::collections::HashSet`, but using the [`rapidhash::``RapidInlineHashBuilder`] hash builder.) +//! +//! For Thread's expected workloads, it's *very fast* and sufficiently secure for our needs. +//! // Important to note that `rapidhash` is not a cryptographic hash, and while it's a high quality hash that's optimal in most ways, it hasn't been thoroughly tested for `HashDoD` resistance. +//! For how we use it, this isn't a concern. We also use random seeds for the hash builder, so it should be resistant to hash collision attacks. + +use rapidhash::RapidInlineBuildHasher; + +// export RapidInlineHasher for use as a type +pub use rapidhash::RapidInlineHasher; + +// These are effectively aliases for `rapidhash::RapidInlineHashMap` and `rapidhash::RapidInlineHashSet` +// They're less of a mouthful, and we avoid type aliasing a type alias +/// A type alias for `[rapidhash::RapidInlineHashMap]` with a custom build hasher. +pub type RapidMap = std::collections::HashMap; +/// A type alias for `[rapidhash::RapidInlineHashSet]` with a custom build hasher. +pub type RapidSet = std::collections::HashSet; + +/// Creates a new `RapidMap` with the specified capacity; returning the initialized map for use. +#[inline(always)] +#[must_use] pub fn map_with_capacity(capacity: usize) -> RapidMap +where + K: std::hash::Hash + Eq, + V: Default, +{ + RapidMap::with_capacity_and_hasher(capacity, RapidInlineBuildHasher::default()) +} + +/// Creates a new `RapidInlineHashSet` with the specified capacity; returning the initialized set for use. +#[inline(always)] +#[must_use] pub fn set_with_capacity(capacity: usize) -> RapidSet +where + T: std::hash::Hash + Eq, +{ + RapidSet::with_capacity_and_hasher(capacity, RapidInlineBuildHasher::default()) +} + +/// Returns a new `RapidMap` with default values. +#[inline(always)] +#[must_use] pub fn get_map() -> RapidMap { + RapidMap::default() +} + +/// Returns a new `RapidSet` with default values (a [`rapidhash::RapidInlineSet`]). +#[inline(always)] +#[must_use] pub fn get_set() -> RapidSet { + RapidSet::default() +} + +/// Computes a hash for a [`std::fs::File`] object using `rapidhash`. +#[inline(always)] +pub fn hash_file(file: &mut std::fs::File) -> Result { + rapidhash::rapidhash_file(file).map_err(std::io::Error::other) +} + +/// Computes a hash for a [`std::fs::File`] object using `rapidhash` with a specified seed. +pub fn hash_file_with_seed(file: &mut std::fs::File, seed: u64) -> Result { + rapidhash::rapidhash_file_inline(file, seed) + .map_err(std::io::Error::other) +} + +/// Computes a hash for a byte slice using `rapidhash`. +#[inline(always)] +#[must_use] pub const fn hash_bytes(bytes: &[u8]) -> u64 { + rapidhash::rapidhash(bytes) +} + +/// Computes a hash for a byte slice using `rapidhash` with a specified seed. +#[inline(always)] +#[must_use] pub const fn hash_bytes_with_seed(bytes: &[u8], seed: u64) -> u64 { + rapidhash::rapidhash_inline(bytes, seed) +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs new file mode 100644 index 0000000..ea853c7 --- /dev/null +++ b/crates/utils/src/lib.rs @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. + +// SPDX-FileContributor: Adam Poulemanos + +// SPDX-License-Identifier: AGPL-3.0-or-later +#![allow(unused_imports)] + +#[cfg(feature = "hashers")] +mod hash_help; +#[cfg(feature = "hashers")] +pub use hash_help::{ + RapidMap, RapidSet, RapidInlineHasher,get_map, get_set, hash_bytes, hash_bytes_with_seed, hash_file, + hash_file_with_seed, map_with_capacity, set_with_capacity, +}; + +#[cfg(feature = "simd")] +mod simd; +#[cfg(feature = "simd")] +pub use simd::{get_char_column_simd, is_ascii_simd}; diff --git a/crates/utils/src/simd.rs b/crates/utils/src/simd.rs new file mode 100644 index 0000000..c0becc0 --- /dev/null +++ b/crates/utils/src/simd.rs @@ -0,0 +1,531 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: AGPL-3.0-or-later +//! SIMD optimized utilities for string processing. +//! +//! This module provides a series of SIMD optimized functions for string processing. +//! Its operations use the `simdeez` crate, along with `memchr` for strong SIMD support. +//! Both libraries provide SIMD support for wasm32, `x86_64/x86`, and aarch64 and can find +//! optimal instruction sets at runtime. +//! If no SIMD support is available, they will fall back to scalar operations. + +use memchr::memmem::FinderRev; +use simdeez::{prelude::*, simd_runtime_generate}; +use std::sync::OnceLock; + +static REV_LINE_FINDER: OnceLock = OnceLock::new(); + +// Checks if a string is all ascii. +simd_runtime_generate!( + pub fn is_ascii_simd(text: &str) -> bool { + let bytes = text.as_bytes(); + let len = bytes.len(); + + // reinterpret u8 as i8 slice (safe because underlying bits match) + let bytes_i8 = unsafe { std::slice::from_raw_parts(bytes.as_ptr().cast::(), len) }; + + let mut remainder = bytes_i8; + + // Process in vector-width chunks + while remainder.len() >= S::Vi8::WIDTH { + let chunk = &remainder[..S::Vi8::WIDTH]; + let v = S::Vi8::load_from_slice(chunk); + + // For ASCII, all values must be >= 0 (since ASCII is 0..127) + let mask = v.cmp_lt(S::Vi8::set1(0)); + // Check if any lane is negative (non-ASCII) + // get_mask() returns a bitmask, if any bit is set, it means non-ASCII was found + if mask.get_mask() != 0 { + return false; + } + + remainder = &remainder[S::Vi8::WIDTH..]; + } + + // Handle remaining bytes + remainder.iter().all(|&b| b >= 0) + } +); + +// Find the last occurrence of a byte value in a slice, searching backwards +// Returns the index of the last occurrence, or None if not found +simd_runtime_generate!( + fn find_last_byte_simd(haystack: &[u8], needle: u8, is_eol: bool) -> Option { + if haystack.is_empty() { + return None; + } + if is_eol { + // Special case for newline, use cached finder + // Use into_owned() to ensure the FinderRev outlives the reference to its needle (it doesn't need after it's constructed) + let line_finder = + REV_LINE_FINDER.get_or_init(|| FinderRev::new(&[needle]).into_owned()); + return line_finder.rfind(haystack); + } + let bound_needle = &[needle]; + let finder = FinderRev::new(bound_needle); + + finder.rfind(haystack) + } +); + +// Count UTF-8 characters in a byte slice using SIMD +// This counts by identifying non-continuation bytes +simd_runtime_generate!( + fn count_utf8_chars_simd(bytes: &[u8]) -> usize { + let len = bytes.len(); + if len == 0 { + return 0; + } + + // Convert to i8 for SIMD operations + let bytes_i8 = unsafe { std::slice::from_raw_parts(bytes.as_ptr().cast::(), len) }; + + let mut remainder = bytes_i8; + let mut char_count = 0; + + // UTF-8 continuation bytes have pattern 10xxxxxx (0x80-0xBF) + // We want to count bytes that are NOT continuation bytes + let continuation_pattern = S::Vi8::set1(0b1000_0000_u8 as i8); + let mask_pattern = S::Vi8::set1(0b1100_0000_u8 as i8); + + // Process in SIMD chunks + while remainder.len() >= S::Vi8::WIDTH { + let chunk = &remainder[..S::Vi8::WIDTH]; + let v = S::Vi8::load_from_slice(chunk); + + // Check which bytes are NOT continuation bytes + // Continuation bytes: (byte & 0b11000000) == 0b10000000 + let masked = v & mask_pattern; + let is_continuation = masked.cmp_eq(continuation_pattern); + + // Count non-continuation bytes + let mask = is_continuation.get_mask(); + // Count zeros in the mask (non-continuation bytes) + char_count += S::Vi8::WIDTH - mask.count_ones() as usize; + + remainder = &remainder[S::Vi8::WIDTH..]; + } + + // Handle remaining bytes + for &byte in remainder { + if (byte as u8) & 0b1100_0000 != 0b1000_0000 { + char_count += 1; + } + } + + char_count + } +); + +/// Optimized character column calculation with SIMD, finding the last newline character's index +/// +/// It first checks if the line is entirely `ascii` with [`is_ascii_simd`], +/// and if so, uses a faster search strategy with [`find_last_byte_simd`]. +/// If there are utf-8 characters present, it still uses the same approach but then +/// must use [`count_utf8_chars_simd`] to count non-continuation bytes. +/// All operations are highly optimized with full SIMD support. +#[inline] +#[must_use] pub fn get_char_column_simd(text: &str, offset: usize) -> usize { + if offset == 0 { + return 0; + } + + let bytes = text.as_bytes(); + if offset > bytes.len() { + return 0; + } + + let search_slice = &bytes[..offset]; + + // Check if the text is ASCII for fast path + if is_ascii_simd(text) { + // ASCII fast path: find last newline and count bytes + match find_last_byte_simd(search_slice, b'\n', true) { + Some(newline_pos) => offset - newline_pos - 1, + None => offset, // No newline found, entire offset is the column + } + } else { + // UTF-8 path: find last newline then count UTF-8 characters + match find_last_byte_simd(search_slice, b'\n', true) { + Some(newline_pos) => { + let line_start = newline_pos + 1; + let line_bytes = &search_slice[line_start..]; + count_utf8_chars_simd(line_bytes) + } + None => { + // No newline found, count characters from start + count_utf8_chars_simd(search_slice) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_string() { + assert!(is_ascii_simd("")); + } + + #[test] + fn test_pure_ascii() { + assert!(is_ascii_simd("Hello, World!")); + assert!(is_ascii_simd("123456789")); + assert!(is_ascii_simd("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); + assert!(is_ascii_simd("abcdefghijklmnopqrstuvwxyz")); + assert!(is_ascii_simd("!@#$%^&*()_+-=[]{}|;':\",./<>?")); + } + + #[test] + fn test_ascii_with_newlines_and_tabs() { + assert!(is_ascii_simd("Hello\nWorld\t!")); + assert!(is_ascii_simd("\t\n\r")); + } + + #[test] + fn test_ascii_control_characters() { + // Test ASCII control characters (0-31, 127) + assert!(is_ascii_simd("\x00\x01\x02\x03\x04\x05\x06\x07")); + assert!(is_ascii_simd("\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F")); + assert!(is_ascii_simd("\x10\x11\x12\x13\x14\x15\x16\x17")); + assert!(is_ascii_simd("\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F")); + assert!(is_ascii_simd("\x7F")); // DEL character + } + + #[test] + fn test_non_ascii_characters() { + // UTF-8 encoded non-ASCII characters + assert!(!is_ascii_simd("cafΓ©")); // contains Γ© + assert!(!is_ascii_simd("naΓ―ve")); // contains Γ― + assert!(!is_ascii_simd("rΓ©sumΓ©")); // contains Γ© + assert!(!is_ascii_simd("πŸš€")); // emoji + assert!(!is_ascii_simd("こんにけは")); // Japanese + assert!(!is_ascii_simd("ΠŸΡ€ΠΈΠ²Π΅Ρ‚")); // Russian + assert!(!is_ascii_simd("Ω…Ψ±Ψ­Ψ¨Ψ§")); // Arabic + // all together for fun + assert!(!is_ascii_simd( + "cafΓ© Ω…Ψ±Ψ­Ψ¨Ψ§ こんにけは πŸš€ ΠŸΡ€ΠΈΠ²Π΅Ρ‚ rΓ©sumΓ© naΓ―ve" + )); + } + + #[test] + fn test_mixed_ascii_non_ascii() { + assert!(!is_ascii_simd("Hello cafΓ©")); + assert!(!is_ascii_simd("ASCII and πŸš€")); + assert!(!is_ascii_simd("test\u{200B}")); // zero-width space + } + + #[test] + fn test_long_ascii_strings() { + // Test strings longer than typical SIMD vector width + let long_ascii = "a".repeat(1000); + assert!(is_ascii_simd(&long_ascii)); + + let long_ascii_mixed = "ABC123!@#".repeat(100); + assert!(is_ascii_simd(&long_ascii_mixed)); + } + + #[test] + fn test_long_non_ascii_strings() { + let long_non_ascii = "cafΓ©".repeat(100); + assert!(!is_ascii_simd(&long_non_ascii)); + } + + #[test] + fn test_ascii_boundary_values() { + // Test characters at ASCII boundaries + assert!(is_ascii_simd("\x00")); // NULL (0) + assert!(is_ascii_simd("\x7F")); // DEL (127) + + // Test non-ASCII characters (properly encoded UTF-8) + assert!(!is_ascii_simd("ΓΌ")); // UTF-8 encoded ΓΌ (first byte is 0xC3) + assert!(!is_ascii_simd("€")); // UTF-8 encoded € (first byte is 0xE2) + } + + #[test] + fn test_various_lengths() { + // Test strings of various lengths to exercise both SIMD and scalar paths + for i in 1..=100 { + let ascii_string = "a".repeat(i); + assert!(is_ascii_simd(&ascii_string), "Failed for length {}", i); + } + } + + #[test] + fn test_non_ascii_at_different_positions() { + // Non-ASCII at the beginning + assert!(!is_ascii_simd("Γ©abc")); + + // Non-ASCII in the middle + assert!(!is_ascii_simd("abΓ©c")); + + // Non-ASCII at the end + assert!(!is_ascii_simd("abcΓ©")); + + // Multiple non-ASCII characters + assert!(!is_ascii_simd("Γ©abcΓ©")); + } + + #[test] + fn test_consistency_with_str_is_ascii() { + let test_strings = vec![ + "", + "Hello", + "cafΓ©", + "πŸš€", + "ASCII123!@#", + "test\u{200B}", + "\x00\x7F", + ]; + + // Test regular strings + for test_str in &test_strings { + assert_eq!( + is_ascii_simd(test_str), + test_str.is_ascii(), + "Mismatch for string: {:?}", + test_str + ); + } + + // Test long string separately + let long_string = "a".repeat(1000); + assert_eq!( + is_ascii_simd(&long_string), + long_string.is_ascii(), + "Mismatch for long string" + ); + + // Test additional non-ASCII characters + let non_ascii_chars = ["ΓΌ", "€", "ζΌ’", "πŸŽ‰"]; + for ch in &non_ascii_chars { + assert_eq!( + is_ascii_simd(ch), + ch.is_ascii(), + "Mismatch for non-ASCII character: {:?}", + ch + ); + } + } + + #[test] + fn test_simd_vector_width_boundaries() { + // Test strings that are exactly SIMD vector width and around those boundaries + // Common SIMD widths are 16, 32, 64 bytes + for width in [16, 32, 64] { + // Exactly vector width + let exact = "a".repeat(width); + assert!(is_ascii_simd(&exact)); + + // One less than vector width + let one_less = "a".repeat(width - 1); + assert!(is_ascii_simd(&one_less)); + + // One more than vector width + let one_more = "a".repeat(width + 1); + assert!(is_ascii_simd(&one_more)); + + // Non-ASCII at exact boundary + let mut boundary_test = "a".repeat(width - 1); + boundary_test.push('Γ©'); + assert!(!is_ascii_simd(&boundary_test)); + } + } + + #[test] + fn test_all_ascii_characters() { + // Test all valid ASCII characters (0-127) + let mut all_ascii = String::new(); + for i in 0u8..=127 { + all_ascii.push(i as char); + } + assert!(is_ascii_simd(&all_ascii)); + } + + #[test] + fn debug_simple_case() { + // Test with simple ASCII first + assert!(is_ascii_simd("a")); + assert!(is_ascii_simd("aa")); + assert!(is_ascii_simd("aaa")); + + // Test with simple non-ASCII + assert!(!is_ascii_simd("Γ©")); + + println!("Simple cases work"); + } + + // Tests for find_last_byte_simd + #[test] + fn test_find_last_byte_empty() { + assert_eq!(find_last_byte_simd(&[], b'a', false), None); + } + + #[test] + fn test_find_last_byte_single() { + assert_eq!(find_last_byte_simd(&[b'a'], b'a', false), Some(0)); + assert_eq!(find_last_byte_simd(&[b'a'], b'b', false), None); + } + + #[test] + fn test_find_last_byte_multiple() { + let haystack = b"hello world hello"; + assert_eq!(find_last_byte_simd(haystack, b'l', false), Some(15)); // Last 'l' + assert_eq!(find_last_byte_simd(haystack, b'h', false), Some(12)); // Last 'h' + assert_eq!(find_last_byte_simd(haystack, b'o', false), Some(16)); // Last 'o' + assert_eq!(find_last_byte_simd(haystack, b'x', false), None); // Not found + } + + #[test] + fn test_find_last_byte_newlines() { + let text = b"line1\nline2\nline3"; + assert_eq!(find_last_byte_simd(text, b'\n', true), Some(11)); // Last newline + + let single_line = b"no newlines here"; + assert_eq!(find_last_byte_simd(single_line, b'\n', true), None); + } + + #[test] + fn test_find_last_byte_long() { + // Test with strings longer than SIMD width + let long_text = "a".repeat(100) + "b" + &"a".repeat(100); + let bytes = long_text.as_bytes(); + assert_eq!(find_last_byte_simd(bytes, b'b', false), Some(100)); + } + + // Tests for count_utf8_chars_simd + #[test] + fn test_count_utf8_chars_empty() { + assert_eq!(count_utf8_chars_simd(&[]), 0); + } + + #[test] + fn test_count_utf8_chars_ascii() { + assert_eq!(count_utf8_chars_simd(b"hello"), 5); + assert_eq!(count_utf8_chars_simd(b"Hello, World!"), 13); + assert_eq!(count_utf8_chars_simd(b"123"), 3); + } + + #[test] + fn test_count_utf8_chars_utf8() { + // "cafΓ©" in UTF-8: c(1) a(1) f(1) Γ©(2 bytes: 0xC3 0xA9) + assert_eq!(count_utf8_chars_simd("cafΓ©".as_bytes()), 4); + + // "πŸš€" in UTF-8: 4 bytes (0xF0 0x9F 0x9A 0x80) + assert_eq!(count_utf8_chars_simd("πŸš€".as_bytes()), 1); + + // Mixed: "HelloπŸš€" = 5 ASCII + 1 emoji = 6 chars + assert_eq!(count_utf8_chars_simd("HelloπŸš€".as_bytes()), 6); + } + + #[test] + fn test_count_utf8_chars_consistency() { + let test_strings = vec!["Hello", "cafΓ©", "πŸš€", "Hello, δΈ–η•Œ!", "rΓ©sumΓ©", "ζ΅‹θ―•", ""]; + + for test_str in test_strings { + let simd_count = count_utf8_chars_simd(test_str.as_bytes()); + let std_count = test_str.chars().count(); + assert_eq!(simd_count, std_count, "Mismatch for string: {:?}", test_str); + } + } + + // Tests for get_char_column_simd + #[test] + fn test_get_char_column_simple() { + // Simple case: no newlines + assert_eq!(get_char_column_simd("hello", 5), 5); + assert_eq!(get_char_column_simd("hello", 3), 3); + assert_eq!(get_char_column_simd("hello", 0), 0); + } + + #[test] + fn test_get_char_column_with_newlines() { + let text = "line1\nline2\nline3"; + + // Position at start of each line + assert_eq!(get_char_column_simd(text, 0), 0); // Start of "line1" + assert_eq!(get_char_column_simd(text, 6), 0); // Start of "line2" + assert_eq!(get_char_column_simd(text, 12), 0); // Start of "line3" + + // Positions within lines + assert_eq!(get_char_column_simd(text, 3), 3); // "lin|e1" + assert_eq!(get_char_column_simd(text, 9), 3); // "lin|e2" + assert_eq!(get_char_column_simd(text, 15), 3); // "lin|e3" + } + + #[test] + fn test_get_char_column_utf8() { + // Test with UTF-8 characters + let text = "cafΓ©\nnaΓ―ve"; + + // Position within first line: "ca|fΓ©" = position 2 + assert_eq!(get_char_column_simd(text, 2), 2); + + // Position at start of second line after newline + assert_eq!(get_char_column_simd(text, 6), 0); // Start of "naΓ―ve" + + // Position within second line: "na|Γ―ve" = position 2 (after 'n', 'a') + assert_eq!(get_char_column_simd(text, 8), 2); + } + + #[test] + fn test_get_char_column_consistency_with_original() { + fn original_get_char_column(text: &str, offset: usize) -> usize { + let src = text.as_bytes(); + let mut col = 0; + for &b in src[..offset].iter().rev() { + if b == b'\n' { + break; + } + if b & 0b1100_0000 != 0b1000_0000 { + col += 1; + } + } + col + } + + let test_cases = vec![ + ("hello", vec![0, 1, 3, 5]), + ("line1\nline2", vec![0, 3, 5, 6, 9]), + ("cafΓ©\nworld", vec![0, 2, 5, 6, 8]), + ("πŸš€test\nnew", vec![0, 1, 3, 6, 7]), + ("", vec![0]), + ("a", vec![0, 1]), + ]; + + for (text, offsets) in test_cases { + for offset in offsets { + if offset <= text.len() { + let original = original_get_char_column(text, offset); + let simd = get_char_column_simd(text, offset); + assert_eq!( + original, simd, + "Mismatch for text: {:?}, offset: {}", + text, offset + ); + } + } + } + } + + #[test] + fn test_get_char_column_edge_cases() { + // Test edge cases + assert_eq!(get_char_column_simd("", 0), 0); + assert_eq!(get_char_column_simd("test", 0), 0); + assert_eq!(get_char_column_simd("test", 100), 0); // Offset beyond length + + // Test with only newlines + assert_eq!(get_char_column_simd("\n\n\n", 1), 0); + assert_eq!(get_char_column_simd("\n\n\n", 2), 0); + + // Test long lines + let long_line = "a".repeat(1000); + assert_eq!(get_char_column_simd(&long_line, 500), 500); + + let long_with_newline = "a".repeat(500) + "\n" + &"b".repeat(300); + assert_eq!(get_char_column_simd(&long_with_newline, 800), 299); + } +} diff --git a/crates/thread-wasm/.appveyor.yml b/crates/wasm/.appveyor.yml similarity index 67% rename from crates/thread-wasm/.appveyor.yml rename to crates/wasm/.appveyor.yml index 6736e61..04804d0 100644 --- a/crates/thread-wasm/.appveyor.yml +++ b/crates/wasm/.appveyor.yml @@ -1,11 +1,13 @@ -install: - - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - - if not defined RUSTFLAGS rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly - - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - - rustc -V - - cargo -V - -build: false - -test_script: - - cargo test --locked +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +install: + - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - if not defined RUSTFLAGS rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly + - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin + - rustc -V + - cargo -V +build: false +test_script: + - cargo test --locked diff --git a/crates/wasm/.gitignore b/crates/wasm/.gitignore new file mode 100644 index 0000000..dc20797 --- /dev/null +++ b/crates/wasm/.gitignore @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + +/target +**/*.rs.bk +Cargo.lock +bin/ +pkg/ +wasm-pack.log diff --git a/crates/wasm/.travis.yml b/crates/wasm/.travis.yml new file mode 100644 index 0000000..ec65fca --- /dev/null +++ b/crates/wasm/.travis.yml @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +language: rust +sudo: "enabled" +cache: cargo +matrix: + include: + # Builds with wasm-pack. + - rust: beta + env: RUST_BACKTRACE=1 + addons: + firefox: latest + chrome: stable + before_script: + - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) + - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) + - cargo install-update -a + - curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh -s -- -f + script: + - cargo generate --git . --name testing + # Having a broken Cargo.toml (in that it has curlies in fields) anywhere + # in any of our parent dirs is problematic. + - mv Cargo.toml Cargo.toml.tmpl + - cd testing + - wasm-pack build + - wasm-pack test --chrome --firefox --headless + # Builds on nightly. + - rust: nightly + env: RUST_BACKTRACE=1 + before_script: + - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) + - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) + - cargo install-update -a + - rustup target add wasm32-unknown-unknown + script: + - cargo generate --git . --name testing + - mv Cargo.toml Cargo.toml.tmpl + - cd testing + - cargo check + - cargo check --target wasm32-unknown-unknown + - cargo check --no-default-features + - cargo check --target wasm32-unknown-unknown --no-default-features + - cargo check --no-default-features --features console_error_panic_hook + - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook + - cargo check --no-default-features --features "console_error_panic_hook wee_alloc" + - cargo check --target wasm32-unknown-unknown --no-default-features --features "console_error_panic_hook wee_alloc" + # Builds on beta. + - rust: beta + env: RUST_BACKTRACE=1 + before_script: + - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) + - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) + - cargo install-update -a + - rustup target add wasm32-unknown-unknown + script: + - cargo generate --git . --name testing + - mv Cargo.toml Cargo.toml.tmpl + - cd testing + - cargo check + - cargo check --target wasm32-unknown-unknown + - cargo check --no-default-features + - cargo check --target wasm32-unknown-unknown --no-default-features + - cargo check --no-default-features --features console_error_panic_hook + - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook + # Note: no enabling the `wee_alloc` feature here because it requires + # nightly for now. diff --git a/crates/thread-wasm/Cargo.toml b/crates/wasm/Cargo.toml similarity index 59% rename from crates/thread-wasm/Cargo.toml rename to crates/wasm/Cargo.toml index 96b149b..9b44bf3 100644 --- a/crates/thread-wasm/Cargo.toml +++ b/crates/wasm/Cargo.toml @@ -1,6 +1,15 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + [package] name = "thread-wasm" description = "WASM bindings for Thread. Deploy Thread to the web!" +readme = "README.md" +keywords = ["wasm", "webassembly", "thread", "bindings"] +categories = ["webassembly"] +publish = true version = "0.0.1" edition.workspace = true rust-version.workspace = true @@ -8,15 +17,15 @@ license.workspace = true repository.workspace = true documentation.workspace = true authors.workspace = true +include.workspace = true [dependencies] -# TODO: Add compile-for-os support as a universal binary (i.e. for moonrepo) -dlmalloc = { version = "0.2.9", features = ["global"] } +# TODO: Add WASI support as a universal binary (i.e. for moonrepo) js-sys = { version = "0.3.77" } wasm-bindgen = { version = "0.2.100" } -web-sys = { version = "0.3.77" } -thread-core = { version = "0.0.1", path = "../thread-core" } -serde.workspace = true +web-sys = { version = "0.3.77", features = ["console"], optional = true } +thread-language = { workspace = true } +serde = { workspace = true, optional = true } # The `console_error_panic_hook` crate provides better debugging of panics by # logging them with `console.error`. This is great for development, but requires @@ -24,26 +33,33 @@ serde.workspace = true # code size when deploying. console_error_panic_hook = { version = "0.1.7", optional = true } -rayon = { version = "1.10.0", features = ["web_spin_lock"], optional = true } +rayon = { workspace = true, features = ["web_spin_lock"], optional = true } + +thread-utils = { workspace = true, default-features = false, features = [ + "hashers", +] } [dev-dependencies] -wasm-bindgen-test = "0.3.34" +wasm-bindgen-test = "0.3.50" [lints] workspace = true [lib] -name="thread_wasm" +name = "thread_wasm" crate-type = ["cdylib", "rlib"] -path="src/lib.rs" - +path = "src/lib.rs" [features] -worker=["panic_hook"] -browser=["multi-threading"] -default=[] -panic_hook = ['console_error_panic_hook'] -multi-threading = ["dep:rayon"] +default = ["worker"] +worker = ["web-sys/console"] # single-threaded worker deployment +browser = ["multi-threading", "serialization"] +# TODO: wasi = [ ] + + +serialization = ["serde/derive"] +panic_hook = ['console_error_panic_hook', "web-sys/console"] +multi-threading = ["rayon"] [package.metadata.wasm-pack.profile.profiling.wasm-bindgen] debug-js-glue = false diff --git a/crates/wasm/README.md b/crates/wasm/README.md new file mode 100644 index 0000000..1913628 --- /dev/null +++ b/crates/wasm/README.md @@ -0,0 +1,8 @@ + + +such empty diff --git a/crates/wasm/src/lib.rs b/crates/wasm/src/lib.rs new file mode 100644 index 0000000..03a443e --- /dev/null +++ b/crates/wasm/src/lib.rs @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +mod utils; + +#[cfg_attr(feature = "serialization", derive(serde::Serialize))] +struct WasmAnalysisResult { + node_count: usize, + edge_count: usize, + language: String, + line_count: usize, +} diff --git a/crates/thread-wasm/src/utils.rs b/crates/wasm/src/utils.rs similarity index 60% rename from crates/thread-wasm/src/utils.rs rename to crates/wasm/src/utils.rs index 88b6f4f..0d15ef3 100644 --- a/crates/thread-wasm/src/utils.rs +++ b/crates/wasm/src/utils.rs @@ -1,12 +1,19 @@ -#[cfg(feature = "console_error_panic_hook")] -#[wasm_bindgen(start)] -pub fn set_panic_hook() { - // When the `console_error_panic_hook` feature is enabled, we can call the - // `set_panic_hook` function at least once during initialization, and then - // we will get better error messages if our code ever panics. - // - // For more details see - // https://github.com/rustwasm/console_error_panic_hook#readme - - console_error_panic_hook::set_once(); -} +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +#[cfg(feature = "console_error_panic_hook")] +use wasm_bindgen::*; +#[cfg(feature = "console_error_panic_hook")] +#[wasm_bindgen(start)] +pub fn set_panic_hook() { + // When the `console_error_panic_hook` feature is enabled, we can call the + // `set_panic_hook` function at least once during initialization, and then + // we will get better error messages if our code ever panics. + // + // For more details see + // https://github.com/rustwasm/console_error_panic_hook#readme + + console_error_panic_hook::set_once(); +} diff --git a/crates/thread-wasm/tests/web.rs b/crates/wasm/tests/web.rs similarity index 60% rename from crates/thread-wasm/tests/web.rs rename to crates/wasm/tests/web.rs index c1cd526..9b350d1 100644 --- a/crates/thread-wasm/tests/web.rs +++ b/crates/wasm/tests/web.rs @@ -1,13 +1,18 @@ -//! Test suite for the Web and headless browsers. - -#![cfg(target_arch = "wasm32")] - -extern crate wasm_bindgen_test; -use wasm_bindgen_test::*; - -wasm_bindgen_test_configure!(run_in_browser); - -#[wasm_bindgen_test] -fn pass() { - assert_eq!(1 + 1, 2); -} +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! Test suite for the Web and headless browsers. + +#![cfg(target_arch = "wasm32")] + +extern crate wasm_bindgen_test; +use wasm_bindgen_test::*; + +wasm_bindgen_test_configure!(run_in_browser); + +#[wasm_bindgen_test] +fn pass() { + assert_eq!(1 + 1, 2); +} diff --git a/deny.toml b/deny.toml index e54c058..2ac4451 100644 --- a/deny.toml +++ b/deny.toml @@ -1,6 +1,9 @@ +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 # SPDX-FileCopyrightText: 2025 Knitli Inc. # -# SPDX-License-Identifier: AGPL-3.0-or-later + # This template contains all of the possible sections and their default values @@ -27,20 +30,20 @@ # dependencies not shared by any other crates, would be ignored, as the target # list here is effectively saying which targets you are building for. targets = [ - "x86_64-unknown-linux-gnu", - "x86_64-unknown-linux-musl", - "aarch64-apple-darwin", - "aarch64-unknown-linux-gnu", - "x86_64-apple-darwin", - "x86_64-pc-windows-gnu", - "x86_64-pc-windows-msvc", - # The triple can be any string, but only the target triples built in to - # rustc (as of 1.40) can be checked against actual config expressions - #"x86_64-unknown-linux-musl", - # You can also specify which target_features you promise are enabled for a - # particular target. target_features are currently not validated against - # the actual valid features supported by the target architecture. - #{ triple = "wasm32-unknown-unknown", features = ["atomics"] }, + "x86_64-unknown-linux-gnu", + "x86_64-unknown-linux-musl", + "aarch64-apple-darwin", + "aarch64-unknown-linux-gnu", + "x86_64-apple-darwin", + "x86_64-pc-windows-gnu", + "x86_64-pc-windows-msvc", + # The triple can be any string, but only the target triples built in to + # rustc (as of 1.40) can be checked against actual config expressions + #"x86_64-unknown-linux-musl", + # You can also specify which target_features you promise are enabled for a + # particular target. target_features are currently not validated against + # the actual valid features supported by the target architecture. + #{ triple = "wasm32-unknown-unknown", features = ["atomics"] }, ] # When creating the dependency graph used as the source of truth when checks are # executed, this field can be used to prune crates from the graph, removing them @@ -53,7 +56,7 @@ targets = [ # If true, metadata will be collected with `--all-features`. Note that this can't # be toggled off if true, if you want to conditionally enable `--all-features` it # is recommended to pass `--all-features` on the cmd line instead -all-features = false +all-features = true # If true, metadata will be collected with `--no-default-features`. The same # caveat with `all-features` applies no-default-features = false @@ -81,10 +84,11 @@ feature-depth = 1 # A list of advisory IDs to ignore. Note that ignored advisories will still # output a note when they are encountered. ignore = [ - #"RUSTSEC-0000-0000", - #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, - #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish - #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, + { id = "RUSTSEC-2024-0436", reason = "Paste is used as a dependency for macros, and is not used at runtime" }, + #"RUSTSEC-0000-0000", + #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, + #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish + #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, ] # If this is true, then cargo deny will use the git executable to fetch advisory database. # If this is false, then it uses a built-in git library. @@ -100,19 +104,14 @@ ignore = [ # See https://spdx.org/licenses/ for list of possible licenses # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. allow = [ - "Apache-2.0 WITH LLVM-exception", - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "CC0-1.0", - "CC-BY-4.0", - "ISC", - "MIT", - "MIT-0", - "MPL-2.0", - "Unicode-3.0", - "Unlicense", - "Zlib", + "Apache-2.0", + "BSD-2-Clause", + + "BSL-1.0", + "MIT", + "Unicode-3.0", + "Unlicense", + ] # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the @@ -122,9 +121,18 @@ confidence-threshold = 0.8 # Allow 1 or more licenses on a per-crate basis, so that particular licenses # aren't accepted for every possible crate as with the normal allow list exceptions = [ - # Each entry is the crate and version constraint, and its specific allow - # list - #{ allow = ["Zlib"], crate = "adler32" }, + { allow = ["AGPL-3.0"], crate = "xtask" }, + { allow = ["AGPL-3.0"], crate = "thread-ast-engine" }, + { allow = ["AGPL-3.0"], crate = "thread-rule-engine" }, + { allow = ["AGPL-3.0"], crate = "thread-utils" }, + { allow = ["AGPL-3.0"], crate = "thread-language" }, + { allow = ["AGPL-3.0"], crate = "thread-services" }, + { allow = ["AGPL-3.0"], crate = "thread-wasm" }, + + + # Each entry is the crate and version constraint, and its specific allow + # list + #{ allow = ["Zlib"], crate = "adler32" }, ] # Some crates don't have (easily) machine readable licensing information, @@ -155,7 +163,7 @@ ignore = false # is only published to private registries, and ignore is true, the crate will # not have its license(s) checked registries = [ - #"https://sekretz.com/registry + #"https://sekretz.com/registry ] # This section is considered when running `cargo deny check bans`. @@ -182,16 +190,16 @@ workspace-default-features = "allow" external-default-features = "allow" # List of crates that are allowed. Use with care! allow = [ - #"ansi_term@0.11.0", - #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" }, + #"ansiterm@0.11.0", + #{ crate = "ansiterm@0.11.0", reason = "you can specify a reason it is allowed" }, ] # List of crates to deny deny = [ - #"ansi_term@0.11.0", - #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" }, - # Wrapper crates can optionally be specified to allow the crate when it - # is a direct dependency of the otherwise banned crate - #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] }, + #"ansiterm@0.11.0", + #{ crate = "ansiterm@0.11.0", reason = "you can specify a reason it is banned" }, + # Wrapper crates can optionally be specified to allow the crate when it + # is a direct dependency of the otherwise banned crate + #{ crate = "ansiterm@0.11.0", wrappers = ["this-crate-directly-depends-on-ansiterm"] }, ] # List of features to allow/deny @@ -219,16 +227,16 @@ deny = [ # Certain crates/versions that will be skipped when doing duplicate detection. skip = [ - #"ansi_term@0.11.0", - #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, + #"ansiterm@0.11.0", + #{ crate = "ansiterm@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, ] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive # dependencies starting at the specified crate, up to a certain depth, which is # by default infinite. skip-tree = [ - #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies - #{ crate = "ansi_term@0.11.0", depth = 20 }, + #"ansiterm@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies + #{ crate = "ansiterm@0.11.0", depth = 20 }, ] # This section is considered when running `cargo deny check sources`. diff --git a/hk.pkl b/hk.pkl index 058ff98..ca6fbd8 100644 --- a/hk.pkl +++ b/hk.pkl @@ -1,9 +1,3 @@ -/* - * SPDX-FileCopyrightText: 2025 Knitli Inc. - * - * * Licensed under the [Plain MIT License](LICENSE.md) -*/ - amends "package://github.com/jdx/hk/releases/download/v1.2.0/hk@1.2.0#/Config.pkl" import "package://github.com/jdx/hk/releases/download/v1.2.0/hk@1.2.0#/Builtins.pkl" @@ -33,11 +27,18 @@ local linters = new Mapping { // Spellchecker ["typos"] = new Step { workspace_indicator = "_typos.toml" - glob = List( "*README", "*.{login,astro,bash,bash_logout,bashrc,browserlistrc,conf,config,csh,css,cts,fish,gitattributes,gitmodules,html,htmx,ini,j2,jinja,jinja2,json,json5,jsonc,jsonl,ksh,md,mdown,mdtext,mdtxt,mdwn,mdx,mk,mkd,mts,nix,nu,pkl,profile,quokka,sass,scss,sh,sh,shellcheckrc,sql,sqlite,stylelintrc,svelte,tcsh,toml,ts,tsx,txt,yaml,yml,zlogin,zlogout,zprofile,zsh,zshenv,zshrc}", "*Dockerfile*", "*Makefile*", "*makefile*", "CHANGELOG*", "CODE_OF_CONDUCT*", "CONTRIBUTING*", "HACKING*", "LICENSE", "README*", "SECURITY*", "UNLICENSE") + glob = List( "*README", "*.{login,astro,bash,bash_logout,bashrc,browserlistrc,conf,config,csh,css,cts,fish,gitattributes,gitmodules,html,htmx,ini,j2,jinja,jinja2,json,json5,jsonc,jsonl,ksh,md,mdown,mdtext,mdtxt,mdwn,mdx,mk,mkd,mts,nix,nu,pkl,profile,py,quokka,rs,sass,scss,sh,sh,shellcheckrc,sql,sqlite,stylelintrc,svelte,tcsh,toml,ts,tsx,txt,yaml,yml,zlogin,zlogout,zprofile,zsh,zshenv,zshrc}", "*Dockerfile*", "*Makefile*", "*makefile*", "CHANGELOG*", "CODE_OF_CONDUCT*", "CONTRIBUTING*", "HACKING*", "LICENSE", "README*", "SECURITY*", "UNLICENSE") check = "typos -j 8 --config {{ workspace_indicator }} {{ files }}" fix = "typos --write-changes --config {{ workspace_indicator }} {{ files }}" } + ["reuse"] = new Step { + glob = List("*README", "*.{login,astro,bash,bash_logout,bashrc,browserlistrc,conf,config,csh,css,cts,fish,gitattributes,gitmodules,html,htmx,ini,j2,jinja,jinja2,json,json5,jsonc,jsonl,ksh,md,mdown,mdtext,mdtxt,mdwn,mdx,mk,mkd,mts,nix,nu,pkl,profile,py,quokka,rs,sass,scss,sh,sh,shellcheckrc,sql,sqlite,stylelintrc,svelte,tcsh,toml,ts,tsx,txt,yaml,yml,zlogin,zlogout,zprofile,zsh,zshenv,zshrc}", "*Dockerfile*", "*Makefile*", "*makefile*", "CHANGELOG*", "CODE_OF_CONDUCT*", "CONTRIBUTING*", "HACKING*", "README*", "SECURITY*", "SHARING") + batch = true + check = "reuse lint-file {{ files }}" + fix = "./scripts/update-licenses.py add {{ files }}" + } + // check hk.pkl (and any others) ["pkl"] = new Step { @@ -51,6 +52,7 @@ local linters = new Mapping { glob = List( "*.{.yaml,yml}") batch = true check = "yamlfmt -conf {{ workspace_indicator }} -lint {{ files }}" + fix = "yamlfmt -conf {{ workspace_indicator }} {{ files }}" } } diff --git a/hk.pkl.license b/hk.pkl.license new file mode 100644 index 0000000..e7b6acf --- /dev/null +++ b/hk.pkl.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/info/Pattern.md b/info/Pattern.md new file mode 100644 index 0000000..0967348 --- /dev/null +++ b/info/Pattern.md @@ -0,0 +1,150 @@ + + +# A Deep Dive Into ast-grep’s Pattern + +Herrington Darkholme + +Follow +7 min read +Β· +May 31, 2023 +83 + +Photo by niko photos on Unsplash +If you are interested in code refactoring tools, you may have heard of ast-grep, a Tree-sitter-based tool for structural search and replacement. ast-grep allows you to write code patterns for finding and modifying code based on its structure, not just text. But how does it work under the hood? + +In this article, I will deep dive into ast-grep’s pattern. It also helps you to understand the core concepts of Tree-sitter. + +Pattern is a convenient way to write and read expressions that describe syntax trees. It resembles code but with some special syntax and semantics that allow you to match parts of a syntax tree based on their structure, type, or content. + +While ast-grep’s pattern is easy to learn, it is hard to master. It requires you to know the Tree-sitter grammar and meaning of the target language and the rules and conventions of ast-grep. + +In this guide, we will help you grasp the core concepts that are common to all Tree-sitter based tools. We will also show you how to leverage the full power of ast-grep pattern for your own usage. + +What is Tree-sitter? +ast-grep uses Tree-sitter as its underlying parsing framework due to its popularity, performance, and robustness. + +Tree-sitter is a tool that generates parsers and provides an incremental parsing library. + +A parser is a program that takes a source code file as input and produces a tree structure that describes the organization of the code. (The tree structure is not an abstract syntax tree, as we will see later). + +Writing good parsers for various programming languages is a laborious task, if possible, for one project like ast-grep. Fortunately, Tree-sitter is a popular tool with wide community support. Many mainstream languages such as C, Java, JavaScript, Python, Rust, and more are supported by Tree-sitter. Using Tree-sitter as ast-grep’s underlying parsing library allows it to work with any language with a well-maintained grammar. + +Another perk of Tree-sitter is its incremental nature. An incremental parser is a parser that can update the syntax tree efficiently when the source code file is edited without having to reparse the entire file. It can run very fast on every code change in ast-greps’ interactive editing. + +Finally, Tree-sitter handles syntax errors gracefully and can parse multiple languages within the same file. This makes pattern code more robust to parse and easier to write. In the future, we can also support multi-language source code like Vue. + +Textual vs Structural +When you use ast-grep to search for patterns in source code, you need to understand the difference between textual and structural matching. + +Source code input is text, a sequence of characters that follows certain syntax rules. You can use common search tools like silver-searcher or ripgrep to search for text patterns in source code. + +However, ast-grep does not match patterns against the text directly. Instead, it parses the text into a tree structure that represents the syntax of the code. This allows ast-grep to match patterns based on the semantic meaning of the code, not just its surface appearance. This is known as structural search, which searches for code with a specific structure, not just a specific text. + +Therefore, the patterns you write must also be of a valid syntax that can be compared with the code tree. + +Textual Search in ast-grep + +Though pattern structurally matches code, you can use the atomic rule regex to matches the text of a node by specifying a regular expression. This way, it is possible to combine textual and structural matching in ast-grep. + +AST vs CST +To represent the syntax and semantics of code, we have two types of tree structures: AST and CST. + +AST stands for Abstract Syntax Tree, which is a simplified representation of the code that omits some details like punctuation and whitespaces. CST stands for Concrete Syntax Tree, a more faithful representation of the code that includes all the details. + +Tree-sitter is a library that can parse code into CSTs for many programming languages. Thusly, ast-grep, contrary to its name, searches and rewrites code based on CST patterns instead of AST. + +Let’s walk through an example to see why CST makes more sense. Consider the JavaScript snippet 1 + 1. Its AST representation looks like this: + +binary_expression + number + number +An astute reader should notice the important operator + is not encoded in AST. Meanwhile, its CST faithfully represents all critical information. + +binary_expression + number + + # note this + operator! + number +You might wonder if using CST will make trivial whitespaces affect your search results. Fortunately, ast-grep uses a smart matching algorithm that can skip trivial nodes in CST when appropriate, which saves you a lot of trouble. + +Named vs Unnamed +It is possible to convert CST to AST if we don’t care about punctuation and whitespaces. Tree-sitter has two types of nodes: named nodes and unnamed nodes(anonymous nodes). + +The more important named nodes are defined with a regular name in the grammar rules, such as binary_expression or identifier. The less important unnamed nodes are defined with literal strings such as "," or "+". + +Named nodes are more important for understanding the code’s structure and meaning, while unnamed nodes are less important and can sometimes be skipped by ast-grep’s matching algorithms. + +The following example, adapted from Tree-sitter’s official guide, shows the difference in grammar definition. + +rules: { + // named nodes are defined with the format `kind: parseRule` + identifier: $ => /[a-z]+/, + // binary_expression is also a named node, + // the `+` operator is defined with a string literal, so it is an unnamed node + binary_expression: $ => seq($.identifier, '+', $.identifier), + // ↑ unnamed node +} +Practically, named nodes have a property called kind that indicates their names. You can use ast-grep's atomic rule kind to find the specific AST node. Playground link for the example below: + +rule: + kind: binary_expression +# matches `1 + 1` +Furthermore, ast-grep’s meta variable matches only named nodes by default. return $A matches only the first statement below. Here’s the Playground link. + +return 123 // `123` is named `number` and matched. +return; // `;` is unnamed and not matched. +We can use double dollar $$VAR to include unnamed nodes in the pattern result. return $$A will match both statements above. Playground link. + +Kind vs Field +Sometimes, using kind alone is insufficient to find the nodes we want. A node may have several children with the same kind but different roles in the code. For example, in JavaScript, an object may have multiple keys and values, all with the string kind. + +To distinguish them, we can use field to specify the relation between a node and its parent. In ast-grep, field can be specified in two relational rules: has and inside. + +has and inside accept a special configuration item called field. The value of field is the field name of the parent-child relation. For example, the key-value pair in JavaScript object has two children: one with field key and the other with field value. We can use this rule to match the key node of kind string. + +rule: + kind: string + inside: + field: key + kind: pair +field can help us narrow the search scope and make the pattern more precise. + +We can also use has to rewrite the rule above, searching for the key-value pair with string key. Playground link. + +rule: + kind: pair + has: + field: key + kind: string +Key difference between kind and field: + +kind is the property of the node itself. Only named nodes have kinds. + +field is the property of the relation between parent and child. Unnamed nodes can also have fields. + +It might be confusing to new users that a node has both kind and field. kind belongs to the node itself, represented by the blue text in ast-grep's playground. The child node has a field only relative to its parent, and vice-versa. field is represented by dark yellow text in the playground. Since field is a property of a node relation, unnamed nodes can also have field. For example, the + in the binary expression 1 + 1 has the field operator. + +Significant vs Trivial +ast-grep goes further beyond tree-sitter. It has a concept about the β€œsignificance” of a node. + +If a node is a named node or has a field relative to its parent, it is a significant node. +Otherwise, the node is trivial. +Even significance is not enough. + +Most tree-sitter languages do not encode all critical semantics in AST, the tree with named nodes only. Even significant nodes are not sufficient to represent the meaning of code. We have to preserve some trivial nodes for precise matching. + +Tree-sitter parsers do not encode all semantics with named nodes. For example, class A { get method() {} } and class A { method() {} } are equivalent to Tree-sitter's AST. The critical token get is not named nor has a field name. It is a trivial node! + +If you do not care about whether the method is a getter method, a static method, or an instance method, you can use class $A { method() {} } to match all three methods at once. Alternatively, you can fully spell out the method modifier if you need to tell a getter method from a normal method. + +Summary +Thank you for reading this far! There are many concepts in this article. Let’s summarize them in a few bullet points: + +ast-grep uses tree-sitter to parse textual source code into a detailed tree structure called CST. +We can get AST from CST by only keeping named nodes with kinds. +To search nodes in a syntax tree, you can use both node kind and node field, which is a special role of a child node relative to its parent node. +A node with either a kind or a field is a significant node. diff --git a/info/ag-instruct.md b/info/ag-instruct.md new file mode 100644 index 0000000..d62ed3b --- /dev/null +++ b/info/ag-instruct.md @@ -0,0 +1,301 @@ + + +--- +description: +globs: +alwaysApply: false +--- + +# use ast-grep to search code + +Your task is to help users to write ast-grep rules to search code. +User will query you by natural language, and you will write ast-grep rules to search code. + +You need to translate user's query into ast-grep rules. +And use ast-grep-mcp to develop a rule, test the rule and then search the codebase. + +## General Process + +1. Clearly understand the user's query. Clarify any ambiguities and if needed, ask user for more details. +2. Write a simple example code snippet that matches the user's query. +3. Write an ast-grep rule that matches the example code snippet. +4. Test the rule against the example code snippet to ensure it matches. Use ast-grep mcp tool `test_match_code_rule` to verify the rule. + a. if the rule does not match, revise the rule by removing some sub rules and debugging unmatching parts. + b. if you are using `inside` or `has` relational rules, ensure to use `stopBy: end` to ensure the search goes to the end of the direction. +5. Use the ast-grep mcp tool to search code using the rule. + +## Tips for Writing Rules + +0. always use `stopBy: end` for relational rules to ensure the search goes to the end of the direction. + +```yaml +has: + pattern: await $EXPR + stopBy: end +``` + +1. if relational rules are used but no match is found, try adding `stopBy: end` to the relational rule to ensure it searches to the end of the direction. +2. use pattern only if the code structure is simple and does not require complex matching (e.g. matching function calls, variable names, etc.). +3. use rule if the code structure is complex and can be broken down into smaller parts (e.g. find call inside certain function). +4. if pattern is not working, try using `kind` to match the node type first, then use `has` or `inside` to match the code structure. + +## Rule Development Process + +1. Break down the user's query into smaller parts. +2. Identify sub rules that can be used to match the code. +3. Combine the sub rules into a single rule using relational rules or composite rules. +4. if rule does not match example code, revise the rule by removing some sub rules and debugging unmatching parts. +5. Use ast-grep mcp tool to dump AST or dump pattern query +6. Use ast-grep mcp tool to test the rule against the example code snippet. + +## ast-grep mcp tool usage + +ast-grep mcp has several tools: + +- dump_syntax_tree will dump the AST of the code, this is useful for debugging and understanding the code structure and patterns +- test_match_code_rule will test a rule agains a code snippet, this is useful to ensure the rule matches the code + +## Rule Format + +# ast-grep Rule Documentation for Claude Code + +## 1. Introduction to ast-grep Rules + +ast-grep rules are declarative specifications for matching and filtering Abstract Syntax Tree (AST) nodes. They enable structural code search and analysis by defining conditions an AST node must meet to be matched. + +### 1.1 Overview of Rule Categories + +ast-grep rules are categorized into three types for modularity and comprehensive definition : + +- **Atomic Rules**: Match individual AST nodes based on intrinsic properties like code patterns (`pattern`), node type (`kind`), or text content (`regex`). +- **Relational Rules**: Define conditions based on a target node's position or relationship to other nodes (e.g., `inside`, `has`, `precedes`, `follows`). +- **Composite Rules**: Combine other rules using logical operations (AND, OR, NOT) to form complex matching criteria (e.g., `all`, `any`, `not`, `matches`). + +## 2. Anatomy of an ast-grep Rule Object + +The ast-grep rule object is the core configuration unit defining how ast-grep identifies and filters AST nodes. It's typically a YAML. + +### 2.1 General Structure and Optionality + +Every field within an ast-grep Rule Object is optional, but at least one "positive" key (e.g., `kind`, `pattern`) must be present. + +A node matches a rule if it satisfies all fields defined within that rule object, implying an implicit logical AND operation. + +For rules using metavariables that depend on prior matching, explicit `all` composite rules are recommended to guarantee execution order. + +**Table 1: ast-grep Rule Object Properties Overview** + +| Property | Type | Category | Purpose | Example | +| :--------- | :--------------------- | :--------- | :----------------------------------------------------- | :----------------------------------------------------------------------- | +| `pattern` | String or Object | Atomic | Matches AST node by code pattern. | `pattern: console.log($ARG)` | +| `kind` | String | Atomic | Matches AST node by its kind name. | `kind: call_expression` | +| `regex` | String | Atomic | Matches node's text by Rust regex. | `regex: ^[a-z]+$` | +| `nthChild` | number, string, Object | Atomic | Matches nodes by their index within parent's children. | `nthChild: 1` | +| `range` | RangeObject | Atomic | Matches node by character-based start/end positions. | `range: { start: { line: 0, column: 0 }, end: { line: 0, column: 10 } }` | +| `inside` | Object | Relational | Target node must be inside node matching sub-rule. | `inside: { pattern: class $C { $$$ }, stopBy: end }` | +| `has` | Object | Relational | Target node must have descendant matching sub-rule. | `has: { pattern: await $EXPR, stopBy: end }` | +| `precedes` | Object | Relational | Target node must appear before node matching sub-rule. | `precedes: { pattern: return $VAL }` | +| `follows` | Object | Relational | Target node must appear after node matching sub-rule. | `follows: { pattern: import $M from '$P' }` | +| `all` | Array | Composite | Matches if all sub-rules match. | `all: [ { kind: call_expression }, { pattern: foo($A) } ]` | +| `any` | Array | Composite | Matches if any sub-rules match. | `any: [ { pattern: foo() }, { pattern: bar() } ]` | +| `not` | Object | Composite | Matches if sub-rule does not match. | `not: { pattern: console.log($ARG) }` | +| `matches` | String | Composite | Matches if predefined utility rule matches. | `matches: my-utility-rule-id` | + +## 3. Atomic Rules: Fundamental Matching Building Blocks + +Atomic rules match individual AST nodes based on their intrinsic properties. + +### 3.1 `pattern`: String and Object Forms + +The `pattern` rule matches a single AST node based on a code pattern. + +- **String Pattern**: Directly matches using ast-grep's pattern syntax with metavariables. + - Example: `pattern: console.log($ARG)` +- **Object Pattern**: Offers granular control for ambiguous patterns or specific contexts. + + - `selector`: Pinpoints a specific part of the parsed pattern to match. + + ```yaml + pattern: + selector: field_definition + context: class { $F } + ``` + + - `context`: Provides surrounding code context for correct parsing. + - `strictness`: Modifies the pattern's matching algorithm (`cst`, `smart`, `ast`, `relaxed`, `signature`). + + ```yaml + pattern: + context: foo($BAR) + strictness: relaxed + ``` + +### 3.2 `kind`: Matching by Node Type + +The `kind` rule matches an AST node by its `tree_sitter_node_kind` name, derived from the language's Tree-sitter grammar. Useful for targeting constructs like `call_expression` or `function_declaration`. + +- Example: `kind: call_expression` + +### 3.3 `regex`: Text-Based Node Matching + +The `regex` rule matches the entire text content of an AST node using a Rust regular expression. It's not a "positive" rule, meaning it matches any node whose text satisfies the regex, regardless of its structural kind. + +### 3.4 `nthChild`: Positional Node Matching + +The `nthChild` rule finds nodes by their 1-based index within their parent's children list, counting only named nodes by default. + +- `number`: Matches the exact nth child. Example: `nthChild: 1` +- `string`: Matches positions using An+B formula. Example: `2n+1` +- `Object`: Provides granular control: + - `position`: `number` or An+B string. + - `reverse`: `true` to count from the end. + - `ofRule`: An ast-grep rule to filter the sibling list before counting. + +### 3.5 `range`: Position-Based Node Matching + +The `range` rule matches an AST node based on its character-based start and end positions. A `RangeObject` defines `start` and `end` fields, each with 0-based `line` and `column`. `start` is inclusive, `end` is exclusive. + +## 4. Relational Rules: Contextual and Hierarchical Matching + +Relational rules filter targets based on their position relative to other AST nodes. They can include `stopBy` and `field` options. + +--- + +### 4.1 `inside`: Matching Within a Parent Node + +Requires the target node to be inside another node matching the `inside` sub-rule. + +- Example: + + ```yaml + inside: + pattern: class $C { $$$ } + stopBy: end + ``` + +### 4.2 `has`: Matching with a Descendant Node + +Requires the target node to have a descendant node matching the `has` sub-rule. + +- Example: + + ```yaml + has: + pattern: await $EXPR + stopBy: end + ``` + +### 4.3 `precedes` and `follows`: Sequential Node Matching + +- `precedes`: Target node must appear before a node matching the `precedes` sub-rule. +- `follows`: Target node must appear after a node matching the `follows` sub-rule. + +Both include `stopBy` but not `field`. + +### 4.4 `stopBy` and `field`: Refining Relational Searches + +- `stopBy`: Controls search termination for relational rules. + - `"neighbor"` (default): Stops when immediate surrounding node doesn't match. + - `"end"`: Searches to the end of the direction (root for `inside`, leaf for `has`). + - `Rule object`: Stops when a surrounding node matches the provided rule (inclusive). +- `field`: Specifies a sub-node within the target node that should match the relational rule. Only for `inside` and `has`. + +When you are not sure, always use `stopBy: end` to ensure the search goes to the end of the direction. + +## 5. Composite Rules: Logical Combination of Conditions + +Composite rules combine atomic and relational rules using logical operations. + +### 5.1 `all`: Conjunction (AND) of Rules + +Matches a node only if all sub-rules in the list match. Guarantees order of rule matching, important for metavariables. + +- Example: + + ```yaml + all: + - kind: call_expression + - pattern: console.log($ARG) + ``` + +### 5.2 `any`: Disjunction (OR) of Rules + +Matches a node if any sub-rules in the list match. + +- Example: + + ```yaml + any: + - pattern: console.log($ARG) + - pattern: console.warn($ARG) + - pattern: console.error($ARG) + ``` + +### 5.3 `not`: Negation (NOT) of a Rule + +Matches a node if the single sub-rule does not match. + +- Example: + + ```yaml + not: + pattern: console.log($ARG) + ``` + +### 5.4 `matches`: Rule Reuse and Utility Rules + +Takes a rule-id string, matching if the referenced utility rule matches. Enables rule reuse and recursive rules. + +## 6. Metavariables: Dynamic Content Matching + +Metavariables are placeholders in patterns to match dynamic content in the AST. + +### 6.1 `$VAR`: Single Named Node Capture + +Captures a single named node in the AST. + +- **Valid**: `$META`, `$META_VAR`, `$_` +- **Invalid**: `$invalid`, `$123`, `$KEBAB-CASE` +- **Example**: `console.log($GREETING)` matches `console.log('Hello World')`. +- **Reuse**: `$A == $A` matches `a == a` but not `a == b`. + +### 6.2 `$$VAR`: Single Unnamed Node Capture + +Captures a single unnamed node (e.g., operators, punctuation). + +- **Example**: To match the operator in `a + b`, use `$$OP`. + + ```yaml + rule: + kind: binary_expression + has: + field: operator + pattern: $$OP + ``` + +### 6.3 `$$$MULTI_META_VARIABLE`: Multi-Node Capture + +Matches zero or more AST nodes (non-greedy). Useful for variable numbers of arguments or statements. + +- **Example**: `console.log($$$)` matches `console.log()`, `console.log('hello')`, and `console.log('debug:', key, value)`. +- **Example**: `function $FUNC($$$ARGS) { $$$ }` matches functions with varying parameters/statements. + +### 6.4 Non-Capturing Metavariables (`_VAR`) + +Metavariables starting with an underscore (`_`) are not captured. They can match different content even if named identically, optimizing performance. + +- **Example**: `$_FUNC($_FUNC)` matches `test(a)` and `testFunc(1 + 1)`. + +### 6.5 Important Considerations for Metavariable Detection + +- **Syntax Matching**: Only exact metavariable syntax (e.g., `$A`, `$$B`, `$$$C`) is recognized. +- **Exclusive Content**: Metavariable text must be the only text within an AST node. +- **Non-working**: `obj.on$EVENT`, `"Hello $WORLD"`, `a $OP b`, `$jq`. + +The ast-grep playground is useful for debugging patterns and visualizing metavariables. diff --git a/mise.toml b/mise.toml index 24421d4..33a0fa4 100644 --- a/mise.toml +++ b/mise.toml @@ -1,5 +1,12 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + [tools] act = "latest" +ast-grep = "latest" +bun = "latest" cargo-binstall = "latest" "cargo:cargo-audit" = "latest" "cargo:cargo-deny" = "latest" @@ -7,7 +14,6 @@ cargo-binstall = "latest" "cargo:cargo-generate" = "latest" "cargo:cargo-nextest" = "latest" "cargo:cargo-watch" = "latest" -"cargo:tree-sitter-cli" = "latest" gh = "latest" gitsign = "latest" hk = "latest" @@ -17,7 +23,7 @@ node = "24" "pipx:reuse" = "latest" pkl = "latest" ripgrep = "latest" -rust = "1.88" +rust = "nightly" taplo = "latest" typos = "latest" uv = "latest" @@ -32,78 +38,108 @@ HK_MISE = 1 idiomatic_version_file_enable_tools = [] [hooks] -# runs when you enter the repo -enter = "eval \"$(mise activate)\" && mise run install -y --silent && mise run update --silent -y && mise env -qy" +enter = """ +#!/usr/bin/bash +chmod +x scripts/* &>/dev/null && +mise run activate && +mise run install-tools && +mise run update-tools +""" # deactivate/unhook when you leave -leave = "eval \"$(mise deactivate)\"" +leave = """eval "$(mise deactivate)" &/dev/null""" # Tasks are run by using `mise run taskname`, like `mise run build` # run tasks run as simple shell commands #** -------------------- Tool and Setup Tasks -------------------- -[tasks.update] +[tasks.update-tools] description = "update all dev tools and mise" -run = "mise upgrade -yq && mise self-update -yq && mise reshim -yq && mise prune -yq" - -[tasks.install] +run = """ +#!/usr/bin/bash +mise upgrade -yq && +mise self-update -yq && +mise reshim -yq && +mise prune -yq +""" + +[tasks.activate] +description = "activate mise" +run = """eval "$(mise activate)" &>/dev/null""" +hide = true + +[tasks.install-tools] description = "setup dev tooling and development hooks" -run = "eval \"$(mise activate)\" && mise trust -yq && mise install && mise reshim && hk install --mise" +run = """ +#!/usr/bin/bash +mise trust -yq && +mise install -yq && +mise reshim -yq && +hk run installhooks &>/dev/null +""" [tasks.installhooks] description = "only install development hooks" run = "hk install --mise" +[tasks.update] +description = "update dependencies" +run = "cargo update && cargo update --workspace" + #** -------------------- Cleaning Tasks -------------------- [tasks.cleancache] description = "delete the cache" run = "rm -rf .cache && mise prune -yq" -hide = true # hide this task from the list +hide = true # hide this task from the list [tasks.clean] depends = ['cleancache'] description = "Removes caches and build artifacts." -run = "cargo clean && rm -r crates/thread-wasm/pkg &>/dev/null" +run = "cargo clean && rm -rf crates/thread-wasm/pkg &>/dev/null" #** -------------------- Build Tasks -------------------- [tasks.build] description = 'Build everything (except wasm)' -run = "cargo build" -alias = 'b' # `mise run b` = build +run = "cargo build --workspace" +alias = 'b' # `mise run b` = build [tasks.build-release] description = 'Build everything in release mode (except wasm)' -run = "cargo build --release" -alias = 'br' # `mise run br` = build release +run = "cargo build --workspace --release --features inline" +alias = 'br' # `mise run br` = build release [tasks.build-wasm] description = 'Build WASM target for development' run = "cargo run -p xtask build-wasm" -alias = 'bw' # `mise run bw` = build wasm +alias = 'bw' # `mise run bw` = build wasm [tasks.build-wasm-browser-dev] description = 'Build WASM target for browser development' # we don't use the browser target, so currently this is just for testing purposes run = "cargo run -p xtask build-wasm --multi-threading" -alias = 'bwd' # `mise run bwd` = build wasm browser dev +alias = 'bwd' # `mise run bwd` = build wasm browser dev [tasks.build-wasm-profile] description = 'Build WASM target with profiling' run = "cargo run -p xtask build-wasm --profiling" -alias = 'bwp' # `mise run bwp` = build wasm profiling +alias = 'bwp' # `mise run bwp` = build wasm profiling [tasks.build-wasm-browser-profile] description = 'Build WASM target for browser to profile' run = "cargo run -p xtask build-wasm --profiling --multi-threading" -alias = 'bwpd' # `mise run bwpd` = build wasm browser prod +alias = 'bwpd' # `mise run bwpd` = build wasm browser prod [tasks.build-wasm-release] description = 'Build WASM target in release mode.' run = "cargo run -p xtask build-wasm --release" -alias = 'bwr' # `mise run bwr` = build wasm release +alias = 'bwr' # `mise run bwr` = build wasm release #** -------------------- Testing/Linting/Formatting Tasks -------------------- +[tasks.update-licenses] +description = "Update license headers for all files" +run = "./scripts/update-licenses.py" + [tasks.test] description = 'Run automated tests' # multiple commands are run in series diff --git a/sbom.spdx b/sbom.spdx new file mode 100644 index 0000000..1c08b8e --- /dev/null +++ b/sbom.spdx @@ -0,0 +1,1046 @@ +SPDXVersion: SPDX-2.1 +DataLicense: CC0-1.0 +SPDXID: SPDXRef-DOCUMENT +DocumentName: thread +DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-bdaf1bc4-27a6-497c-91b3-84bb446f3f79 +Creator: Person: Adam Poulemanos () +Creator: Organization: Knitli Inc. () +Creator: Tool: reuse-5.0.2 +Created: 2025-07-14T01:39:20Z +CreatorComment: This document was created automatically using available reuse information consistent with REUSE. +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c09350ad7681b31e00d4cbe9347d9a28 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9772cb77ce5f8e5ab40c5d0d883c6b6a +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cc7a2a5256f01f6b53ffbf2c91d8816d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-21b9569e1356654b65ce429b761504cc +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-717f431b53324ab5982503ba0942204c +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f242c4117b8e90f50a4d999c8f6c41d3 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-30a1d7c15b8accb80dd84e03cfbac958 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2da833ce75133e5a7f1fe03c4277501f +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3d234cbb2af38a2552a260838c2f2780 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b52bef1803120595f4b2a7829dcbb9ff +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-78c5a17a25c0fbe9a485604f75a75a88 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-254efab25cd56c3d35894c02bf061106 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f49a1411a64e66a5b47a9f313f58203d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3b4809ddb478fee87ed9acc56cd80263 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c788f66f6d7bce45f2995c46adb99977 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7383628c997a024f664b4622800b3266 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-0fcae69dc9c3a42d7d0f5ac5ec0d4e73 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2a39efeba1882ae4dea0a92c6ca89dc7 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-39099658cc1b787334fbcc7ed50ef03b +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-da4f36c220bf6db4007063fdd168f1f7 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4ec674bfa203a164ea31b2464eafb449 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8a6591f856789d9609a1dbf8700d8f27 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-440a0faf4d201fe3c28a5b2a3eff13e3 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-aed67bd6c3c2f6a3ffea77891c719508 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3dcad423b391e453fbcd509041b97d5c +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-09fa6f8a26e96de897874878768a974a +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5e236e0e972d31b24852404c91041914 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e5a2d0f29ecfa6b574bbacdb4fe20afe +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-678e829f8b6e1ea82c4598ec9fe3fa77 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3e193753a71b49d2d17a6013aca71a00 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a7144ef2367f48f40b80a010550cbfa4 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9f11ae50ad1f27f036c90be0bdeef272 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-079ca752921b2b370f642dd3673a3dad +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f73073cd1916b86a614c6401c1c6be54 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e4755e8a20b532662c4530fbc225e898 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8d9a2705429af553ea97908a15b1a250 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-eca11699fdb87d6c1440c09cb46cd048 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a6bf022467040f75b7cf2db6a7f24fad +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c96a8cba67b92fac68d6f5e206270b4c +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-0f021921fd74ba2de2a831053b5a9646 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-69ac961743696227e7411fc5ca2e6a94 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-99407358b674bbb340970ee47b97c40d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6f08941627643ff7977c7a9a5310735c +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8d363400b7e6b0d2caf830a079503a3f +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-38f6c247a07e71a3e1a4c768ad7f9309 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-02449f9481b452608b5c7e583570efd0 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-449bbe9e330b8492ee9368aac216056e +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-14ccdccbf1f49876d41aebffa5d6f489 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-100a39ce86f3adc287535c4d31e2ed57 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b9cf4bbbc385f28b19a397c567b22ca2 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-01ebf8805fb04c95006afb70a074f4bf +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e0a8d8b7c0cef31c000331c66a98db64 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5149c394d8e1c6866a99fdb403e2b3e1 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7d06bafb476dca352c5f6373fe6bdc07 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2b16ccc2ab42f014960c65bbc80e7b85 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-ff05fc4fdc5336e80d443fd4d8c02b92 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3a0d04868bd59307158fa676595721be +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4fe6c42670dd239542cdc3dac7bd82dd +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e301ecf3829e863cb4f05049e91152aa +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d360182a7086a20b3c0705c58463e3b6 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7d48e8f79f50a633a04cbf4594fcb5b1 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3cf739e8034d59665604a60997ca06c9 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1453f5fb98775d83908543158e864718 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-0cd6be9db8cfe7338445e9d77500e13b +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c63352adae2e4c79a3b366b724c62636 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3de79ae5595ae3fff8fb9a3943888bc7 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-37778e089c4f835cc37a80d4e0033e33 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8a13b12f73326f068db26c486b0b53b9 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6f1c4f174deef93c39f8e3513e31a2ab +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-fd89fb00e1677ed98e349ce1ee67cd47 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7184ddfe7e64d5a27c5ce6e6e2481cad +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-865a5c4b0712c0fab675e38cae3522fa +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d042521c12b5e973e7d11f9257d66bc0 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-992b79db975e1d3dd355d6fb729e3127 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4438ac8225780467f1f6af36cac3c607 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e9240ed0fd5d93b5e591c23bfe7828ee +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d41de76e0684cf1b8272f4524d01d712 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7d89758e1293be0feef9f86cc906aaf4 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c62ba560541c1b3e24aaf44e75124458 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c584db239ad3a674d3d80e685a810051 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b59c4db430766b146a4d16788cee7744 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8d9d11f1a601311bb2d47b9789518d10 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1018ceacc606f9bbb70eff0af5af576d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-da48ef7cd1a99ab090dcbff5fbadedbb +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f9e22dfe0b5443081b3325e5d29ab5a4 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-92c37420e3cb62fb03ea8771867d17d8 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5809c17d3c8ce2fdfe390918384ff879 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7cd1d69437809fba1e7da6f80d225dff +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-84cf09fe3ed0320d1e4514ff60bd281a +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f46cf71b99032f5ccf7acc5458518a42 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4eb0e1712f8060bad99f9cf87751bae9 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9b0f570ff5d7001399b0082cfd36655b +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7b2801debffe14ddad59a2b912c1b3d1 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cce5ffe10aba663335680e4e43bd9288 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5aa13c18af5ef3dec3ecab0f8c63bb7a +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a52bee01e6a3136dfb1aa4c5801ca671 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-db4b8d6674824e5018c48e38aff38022 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3748ffc1bb58ea2ea7cf599ef81e64a7 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-979fb0d254aeba65211578ff8b35684d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-640c584f4da01eb49b738f7c45c188af +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-49d2aa98c1d7212438eee6fd73d05d7f +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9f2ead9ce46a115b43f8e59e3f8daf88 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7d9220e1bfa8d6cd26e5486c4d0116d1 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2b3c6dc79aaa8ab187f18575424cec72 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b0c7afe8a516b792025a21fac26f330d +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-00ab9cf365c27b94b2da4a73ab9274ba +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-acdd7bc65db8153d946983a5cd0906b5 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-0fa26d060b21f2196307d9d54cc6b9d4 +Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-45d1bf4a69990c4f8b0526410fd5fc08 + +FileName: ./.github/chatmodes/analyze.chatmode.md +SPDXID: SPDXRef-c09350ad7681b31e00d4cbe9347d9a28 +FileChecksum: SHA1: 861098d2e2826f0c3fa82f390f5191b68657bd24 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.github/dependabot.yml +SPDXID: SPDXRef-9772cb77ce5f8e5ab40c5d0d883c6b6a +FileChecksum: SHA1: c48fceac5d24c9f1cf826c6e9bba141ad70e3735 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.github/dontusefornow.md +SPDXID: SPDXRef-cc7a2a5256f01f6b53ffbf2c91d8816d +FileChecksum: SHA1: 913bfb2bf1a0d1b58b67df69b68c70e915a1a9ca +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.github/workflows/ci.yml +SPDXID: SPDXRef-21b9569e1356654b65ce429b761504cc +FileChecksum: SHA1: abfddbf281da1261272cffb71e0b5389facca5a9 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.github/workflows/cla.yml +SPDXID: SPDXRef-717f431b53324ab5982503ba0942204c +FileChecksum: SHA1: 456e15b358b7b603e3ef4c8e044badff5f18241d +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.gitignore +SPDXID: SPDXRef-f242c4117b8e90f50a4d999c8f6c41d3 +FileChecksum: SHA1: 7462ac10548581cc76e63cf5c585755fbce3149b +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.mcp.json +SPDXID: SPDXRef-30a1d7c15b8accb80dd84e03cfbac958 +FileChecksum: SHA1: cf461167ff1250468324e5cd024c03d7cbcf1c97 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.vscode/settings.json +SPDXID: SPDXRef-2da833ce75133e5a7f1fe03c4277501f +FileChecksum: SHA1: c595166ccf40a441ad0371b9f32fae9923466b12 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./.yamlfmt.yml +SPDXID: SPDXRef-3d234cbb2af38a2552a260838c2f2780 +FileChecksum: SHA1: 85fb0f5c06af8d78d149d41445566a9bed4ba3fe +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./CLAUDE.md +SPDXID: SPDXRef-b52bef1803120595f4b2a7829dcbb9ff +FileChecksum: SHA1: 1f88065390da4f46a8361fa1ee17dec735b01d0e +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./CONTRIBUTORS_LICENSE_AGREEMENT.md +SPDXID: SPDXRef-78c5a17a25c0fbe9a485604f75a75a88 +FileChecksum: SHA1: 2dde379d01d323ebbf4fd94ace8f68f08f62471c +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./Cargo.lock +SPDXID: SPDXRef-254efab25cd56c3d35894c02bf061106 +FileChecksum: SHA1: eac1a8443fb125367ad3746046045a967694ea70 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./Cargo.toml +SPDXID: SPDXRef-f49a1411a64e66a5b47a9f313f58203d +FileChecksum: SHA1: 0ed26e100efc39f4d4520b37c06bd840ccf1ccd6 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./PLAN.md +SPDXID: SPDXRef-3b4809ddb478fee87ed9acc56cd80263 +FileChecksum: SHA1: 444991826f65bc3a719dc43e8d7b0c219fa5b4ed +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./README.md +SPDXID: SPDXRef-c788f66f6d7bce45f2995c46adb99977 +FileChecksum: SHA1: 32c0f8f702ff587817c9b143cb1dcbd9bd0196bc +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./VENDORED.md +SPDXID: SPDXRef-7383628c997a024f664b4622800b3266 +FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./_typos.toml +SPDXID: SPDXRef-0fcae69dc9c3a42d7d0f5ac5ec0d4e73 +FileChecksum: SHA1: 45f3331f8e49d6c4691bbe1ceffb5493e1390b17 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./_unused.toml +SPDXID: SPDXRef-2a39efeba1882ae4dea0a92c6ca89dc7 +FileChecksum: SHA1: 21ad928a9262fdb2b1dc3ba408aa702694c4833c +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/Cargo.toml +SPDXID: SPDXRef-39099658cc1b787334fbcc7ed50ef03b +FileChecksum: SHA1: f4052fe44bd26ee3b42fe7fca74c6d14ab2ca540 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/VENDORED.md +SPDXID: SPDXRef-da4f36c220bf6db4007063fdd168f1f7 +FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/language.rs +SPDXID: SPDXRef-4ec674bfa203a164ea31b2464eafb449 +FileChecksum: SHA1: 082758427677ecf4d213c96a37111cf471aa547f +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/lib.rs +SPDXID: SPDXRef-8a6591f856789d9609a1dbf8700d8f27 +FileChecksum: SHA1: 8dfafad95d23f9d63c457b1facb80461962be64a +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/match_tree/match_node.rs +SPDXID: SPDXRef-440a0faf4d201fe3c28a5b2a3eff13e3 +FileChecksum: SHA1: 470546cd5b13e2df533408f9d0efe96ced9cf837 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/match_tree/mod.rs +SPDXID: SPDXRef-aed67bd6c3c2f6a3ffea77891c719508 +FileChecksum: SHA1: 9ad496d56b4dbdf8c617a778e4800cb81459875f +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/match_tree/strictness.rs +SPDXID: SPDXRef-3dcad423b391e453fbcd509041b97d5c +FileChecksum: SHA1: 36ada28965ca4e4babefb2ceb733c7b77ea38102 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/matcher.rs +SPDXID: SPDXRef-09fa6f8a26e96de897874878768a974a +FileChecksum: SHA1: 044f31fbf693c33f08e143776eb27842cab36ab0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/matcher/kind.rs +SPDXID: SPDXRef-5e236e0e972d31b24852404c91041914 +FileChecksum: SHA1: cbf8fbabf090ddb5945adbd93f0db726738e07d3 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/matcher/node_match.rs +SPDXID: SPDXRef-e5a2d0f29ecfa6b574bbacdb4fe20afe +FileChecksum: SHA1: 0d88702211c643dc5758c184448bd2ffac74c0f1 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/matcher/pattern.rs +SPDXID: SPDXRef-678e829f8b6e1ea82c4598ec9fe3fa77 +FileChecksum: SHA1: 4e5a27f9051b73d4b9acf433f71d40ce16d0539d +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/matcher/text.rs +SPDXID: SPDXRef-3e193753a71b49d2d17a6013aca71a00 +FileChecksum: SHA1: 5f6ac26ed9667c8f40e668b498ce8e5616b6dfd8 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/meta_var.rs +SPDXID: SPDXRef-a7144ef2367f48f40b80a010550cbfa4 +FileChecksum: SHA1: 0b28bb5f587a960c757864401e3f8ec9c693e994 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/node.rs +SPDXID: SPDXRef-9f11ae50ad1f27f036c90be0bdeef272 +FileChecksum: SHA1: d7e3ca0e6cc67f20c424aaa03f1b18bf7434a9c0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/ops.rs +SPDXID: SPDXRef-079ca752921b2b370f642dd3673a3dad +FileChecksum: SHA1: c1559f4bc58a3613ace4f22229a42ed07fe2d98a +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/pinned.rs +SPDXID: SPDXRef-f73073cd1916b86a614c6401c1c6be54 +FileChecksum: SHA1: 6c3f40112ec87e6e8a8dd398801371bed77d7d45 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/replacer.rs +SPDXID: SPDXRef-e4755e8a20b532662c4530fbc225e898 +FileChecksum: SHA1: e67dcc6408c5d3c424311f9cc9d9fce38dc71b2c +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/replacer/indent.rs +SPDXID: SPDXRef-8d9a2705429af553ea97908a15b1a250 +FileChecksum: SHA1: 27ba9f2a32da151baa8f4d9de5eb07dc27371093 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/replacer/structural.rs +SPDXID: SPDXRef-eca11699fdb87d6c1440c09cb46cd048 +FileChecksum: SHA1: 18052197b6e0a771c8c421573b8108a7ed96e26b +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/replacer/template.rs +SPDXID: SPDXRef-a6bf022467040f75b7cf2db6a7f24fad +FileChecksum: SHA1: fd09fc7b00e8279746417c560a3e40fc688e03b8 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/source.rs +SPDXID: SPDXRef-c96a8cba67b92fac68d6f5e206270b4c +FileChecksum: SHA1: cf89065168462ce594c1a33be4aa900168937faa +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/tree_sitter/mod.rs +SPDXID: SPDXRef-0f021921fd74ba2de2a831053b5a9646 +FileChecksum: SHA1: e6f0e5e4492cbb9a1ea6ed45d52dc8ddf6184169 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/ast-engine/src/tree_sitter/traversal.rs +SPDXID: SPDXRef-69ac961743696227e7411fc5ca2e6a94 +FileChecksum: SHA1: 297876778fc6fcafe560759e37dfdcd055b07809 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/Cargo.toml +SPDXID: SPDXRef-99407358b674bbb340970ee47b97c40d +FileChecksum: SHA1: 2d16d75e9b73c842a089e0e50ddc59405f825042 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/VENDORED.md +SPDXID: SPDXRef-6f08941627643ff7977c7a9a5310735c +FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/bash.rs +SPDXID: SPDXRef-8d363400b7e6b0d2caf830a079503a3f +FileChecksum: SHA1: f6032441cdc06ad020c543da11ed928a17e3e30b +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/cpp.rs +SPDXID: SPDXRef-38f6c247a07e71a3e1a4c768ad7f9309 +FileChecksum: SHA1: 71c803a8e2a86eec2190692d18c3628cacc1141c +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/csharp.rs +SPDXID: SPDXRef-02449f9481b452608b5c7e583570efd0 +FileChecksum: SHA1: 7efeb46943df29ad1c143f75228da5bdf655d17a +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/css.rs +SPDXID: SPDXRef-449bbe9e330b8492ee9368aac216056e +FileChecksum: SHA1: 74a9a06ce769907841924042a39cf8885a36d6f0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/elixir.rs +SPDXID: SPDXRef-14ccdccbf1f49876d41aebffa5d6f489 +FileChecksum: SHA1: a5bd1b34e4431fb00f54c52d00caf67dd4ad1aba +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/go.rs +SPDXID: SPDXRef-100a39ce86f3adc287535c4d31e2ed57 +FileChecksum: SHA1: 1aa88793ae58baef1c1e825118ea8cda1d48678e +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/haskell.rs +SPDXID: SPDXRef-b9cf4bbbc385f28b19a397c567b22ca2 +FileChecksum: SHA1: c49d9ffcf483b2550c4216051c137daf2caae915 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/html.rs +SPDXID: SPDXRef-01ebf8805fb04c95006afb70a074f4bf +FileChecksum: SHA1: d65a2b124845c1cc2bebc64012efa788adbbb93b +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/json.rs +SPDXID: SPDXRef-e0a8d8b7c0cef31c000331c66a98db64 +FileChecksum: SHA1: b67a3b0df7087430397b79523fbd1aad26b0cdc7 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/kotlin.rs +SPDXID: SPDXRef-5149c394d8e1c6866a99fdb403e2b3e1 +FileChecksum: SHA1: d5d19506d172c8f683abc81514e61a4f2bad54ab +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/lib.rs +SPDXID: SPDXRef-7d06bafb476dca352c5f6373fe6bdc07 +FileChecksum: SHA1: 236802e1012e19faf3659de807a8ccfd04507e99 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/lua.rs +SPDXID: SPDXRef-2b16ccc2ab42f014960c65bbc80e7b85 +FileChecksum: SHA1: 0563649957fe370d9645c05b922a0693baac001e +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/parsers.rs +SPDXID: SPDXRef-ff05fc4fdc5336e80d443fd4d8c02b92 +FileChecksum: SHA1: 0724d08aa1d6eca5124d7a57ae94aa71a0a1b530 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/php.rs +SPDXID: SPDXRef-3a0d04868bd59307158fa676595721be +FileChecksum: SHA1: 766232ae563e1238d2ee7ab8bc31d286cd3cf236 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/python.rs +SPDXID: SPDXRef-4fe6c42670dd239542cdc3dac7bd82dd +FileChecksum: SHA1: 26b1bc6f9a6e27f59f0695988c25d514bb701da0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/ruby.rs +SPDXID: SPDXRef-e301ecf3829e863cb4f05049e91152aa +FileChecksum: SHA1: 8d973d35aa6a18c81bdc49e6bc58c9ce1db4cc97 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/rust.rs +SPDXID: SPDXRef-d360182a7086a20b3c0705c58463e3b6 +FileChecksum: SHA1: b83564f8fb64d2ba47f35af947f7b59962b28cf1 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/scala.rs +SPDXID: SPDXRef-7d48e8f79f50a633a04cbf4594fcb5b1 +FileChecksum: SHA1: edf5fc8676806bfc3bc0553a599fcd5f730c2496 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/swift.rs +SPDXID: SPDXRef-3cf739e8034d59665604a60997ca06c9 +FileChecksum: SHA1: f424dc137132086bc093fc57a4ea79a9588bdbcc +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/language/src/yaml.rs +SPDXID: SPDXRef-1453f5fb98775d83908543158e864718 +FileChecksum: SHA1: b443dd552fb8867179d7c18bb04cd89ee6215003 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/Cargo.toml +SPDXID: SPDXRef-0cd6be9db8cfe7338445e9d77500e13b +FileChecksum: SHA1: dd9e5959bc4bdb867cf51fd3051615f4c09ba03e +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/VENDORED.md +SPDXID: SPDXRef-c63352adae2e4c79a3b366b724c62636 +FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/check_var.rs +SPDXID: SPDXRef-3de79ae5595ae3fff8fb9a3943888bc7 +FileChecksum: SHA1: b4fe5353a4d440043ef69d91174a4c27bd7f1611 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/combined.rs +SPDXID: SPDXRef-37778e089c4f835cc37a80d4e0033e33 +FileChecksum: SHA1: 39634d3e682d89c08654fa8988bf938f74ee50b0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/fixer.rs +SPDXID: SPDXRef-8a13b12f73326f068db26c486b0b53b9 +FileChecksum: SHA1: 312a9d8fd7ac6c7d357c6f1105a4cd67326e6507 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/label.rs +SPDXID: SPDXRef-6f1c4f174deef93c39f8e3513e31a2ab +FileChecksum: SHA1: 5f812130fcb5ad071b9817f36f8cf2fd791a7742 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/lib.rs +SPDXID: SPDXRef-fd89fb00e1677ed98e349ce1ee67cd47 +FileChecksum: SHA1: 929c5540082e807f52371d1170e4617043383203 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/maybe.rs +SPDXID: SPDXRef-7184ddfe7e64d5a27c5ce6e6e2481cad +FileChecksum: SHA1: 2f6337afa60f87909bc166c52725e8d4b84324e3 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/deserialize_env.rs +SPDXID: SPDXRef-865a5c4b0712c0fab675e38cae3522fa +FileChecksum: SHA1: 085123a0a5bc43f264addab2338b4a6ae8761503 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/mod.rs +SPDXID: SPDXRef-d042521c12b5e973e7d11f9257d66bc0 +FileChecksum: SHA1: 9cd0e8f8a51c94936f0ef8a1d84d3bf0cb84f5e0 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/nth_child.rs +SPDXID: SPDXRef-992b79db975e1d3dd355d6fb729e3127 +FileChecksum: SHA1: e5ad145f16d75eb582f5952a7436fb416a69260a +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/range.rs +SPDXID: SPDXRef-4438ac8225780467f1f6af36cac3c607 +FileChecksum: SHA1: 5ff31e3bd3195b380d6bde980ef759ce4d14b3e1 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/referent_rule.rs +SPDXID: SPDXRef-e9240ed0fd5d93b5e591c23bfe7828ee +FileChecksum: SHA1: 461bd7135890f5c500ed3008adc2d1f3d460624d +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/relational_rule.rs +SPDXID: SPDXRef-d41de76e0684cf1b8272f4524d01d712 +FileChecksum: SHA1: c8a6c7c77765af4c5dc010296126f665f2f02a4b +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule/stop_by.rs +SPDXID: SPDXRef-7d89758e1293be0feef9f86cc906aaf4 +FileChecksum: SHA1: 606dbbbec37dcad9bbe77513e0bf045b71038326 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule_collection.rs +SPDXID: SPDXRef-c62ba560541c1b3e24aaf44e75124458 +FileChecksum: SHA1: 97d19958210ceb8ec8d7c1daf43e1a9140d38b0c +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule_config.rs +SPDXID: SPDXRef-c584db239ad3a674d3d80e685a810051 +FileChecksum: SHA1: 15514a63ab2cd47cd5782193d4513f8f1ed462b2 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/rule_core.rs +SPDXID: SPDXRef-b59c4db430766b146a4d16788cee7744 +FileChecksum: SHA1: 229fdeae7376f2e4d0ed5c24b3e8b6d02bf7a4f4 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/transform/mod.rs +SPDXID: SPDXRef-8d9d11f1a601311bb2d47b9789518d10 +FileChecksum: SHA1: 1172df792c087a0f35060ee833abdf8f8274b324 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/transform/parse.rs +SPDXID: SPDXRef-1018ceacc606f9bbb70eff0af5af576d +FileChecksum: SHA1: 82bab17cf05ded5eb40b2c39f8052e45bc9fe77f +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/transform/rewrite.rs +SPDXID: SPDXRef-da48ef7cd1a99ab090dcbff5fbadedbb +FileChecksum: SHA1: cf53f6b262b9cca03a0e994447b934e264103255 +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/transform/string_case.rs +SPDXID: SPDXRef-f9e22dfe0b5443081b3325e5d29ab5a4 +FileChecksum: SHA1: bd670c800289077f66be1c525d1f9bdc585f18be +LicenseConcluded: AGPL-3.0-or-later AND MIT +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/rule-engine/src/transform/trans.rs +SPDXID: SPDXRef-92c37420e3cb62fb03ea8771867d17d8 +FileChecksum: SHA1: a6023fd2159d94a1d3fc945b6646b60347da031e +LicenseConcluded: MIT +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/services/Cargo.toml +SPDXID: SPDXRef-5809c17d3c8ce2fdfe390918384ff879 +FileChecksum: SHA1: aba99c2c38dc7adf6f050377c9d71e353d392dd3 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/services/src/lib.rs +SPDXID: SPDXRef-7cd1d69437809fba1e7da6f80d225dff +FileChecksum: SHA1: dd903c10449b6fff6e59b9a6e8ab1963a8328014 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/utils/Cargo.toml +SPDXID: SPDXRef-84cf09fe3ed0320d1e4514ff60bd281a +FileChecksum: SHA1: 9de4654c60ab672b4f8cf80a65c806c0cb4e418b +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/utils/src/fastmap.rs +SPDXID: SPDXRef-f46cf71b99032f5ccf7acc5458518a42 +FileChecksum: SHA1: ff11312dd50a0c6c50042dbdea6ef1ce939b77f9 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/utils/src/lib.rs +SPDXID: SPDXRef-4eb0e1712f8060bad99f9cf87751bae9 +FileChecksum: SHA1: 4e236c852a8bfcb6f3a551745ea1d8b1dc6f0b91 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/.appveyor.yml +SPDXID: SPDXRef-9b0f570ff5d7001399b0082cfd36655b +FileChecksum: SHA1: 8c050d78b239c9199f30f0cce8654c654ad6a6bb +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/.gitignore +SPDXID: SPDXRef-7b2801debffe14ddad59a2b912c1b3d1 +FileChecksum: SHA1: 46315b956812e71a0f7525ee2e5e948647733f0d +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/.travis.yml +SPDXID: SPDXRef-cce5ffe10aba663335680e4e43bd9288 +FileChecksum: SHA1: 848f0cc8834d6f91c2890da82f16b1a6770c36b2 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/Cargo.toml +SPDXID: SPDXRef-5aa13c18af5ef3dec3ecab0f8c63bb7a +FileChecksum: SHA1: 840c119fc76390ff5e47b4b984b1ebe3c2cb9221 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/README.md +SPDXID: SPDXRef-a52bee01e6a3136dfb1aa4c5801ca671 +FileChecksum: SHA1: f83292f24277ad935144fca7e45deff9127fcc7f +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/src/lib.rs +SPDXID: SPDXRef-db4b8d6674824e5018c48e38aff38022 +FileChecksum: SHA1: d65ab95dc716267b64e1025dd96e790748cfd701 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/src/utils.rs +SPDXID: SPDXRef-3748ffc1bb58ea2ea7cf599ef81e64a7 +FileChecksum: SHA1: bd41aa3d055fa8a40cd35fa5c581521caee92772 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./crates/wasm/tests/web.rs +SPDXID: SPDXRef-979fb0d254aeba65211578ff8b35684d +FileChecksum: SHA1: 5b6592815f627b5026bb3c4e160a749811cf0765 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./deny.toml +SPDXID: SPDXRef-640c584f4da01eb49b738f7c45c188af +FileChecksum: SHA1: fdbfd3d32e793170213fe35830100cbcd7f16300 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./hk.pkl +SPDXID: SPDXRef-49d2aa98c1d7212438eee6fd73d05d7f +FileChecksum: SHA1: 3f1091c14bba2e45bcb0ea6e47676d168f96fc90 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./info/Pattern.md +SPDXID: SPDXRef-9f2ead9ce46a115b43f8e59e3f8daf88 +FileChecksum: SHA1: c2090d02a46224575e44348c39e40b5f34f24306 +LicenseConcluded: MIT +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2023 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> + +FileName: ./info/ag-instruct.md +SPDXID: SPDXRef-7d9220e1bfa8d6cd26e5486c4d0116d1 +FileChecksum: SHA1: ede6772a521abd329f6efa48f9c56fbc445e15cf +LicenseConcluded: MIT +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./mise.toml +SPDXID: SPDXRef-2b3c6dc79aaa8ab187f18575424cec72 +FileChecksum: SHA1: dbf5723e981b595202ce4b016878dd2080cc308f +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./scripts/get-langs.sh +SPDXID: SPDXRef-b0c7afe8a516b792025a21fac26f330d +FileChecksum: SHA1: ad10a77ef3d92a5b7458d768e4b56ca28a932079 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./scripts/install-mise.sh +SPDXID: SPDXRef-00ab9cf365c27b94b2da4a73ab9274ba +FileChecksum: SHA1: a85786553578e90814aaf33e8a8ce24815f7bcd6 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./scripts/update-licenses.py +SPDXID: SPDXRef-acdd7bc65db8153d946983a5cd0906b5 +FileChecksum: SHA1: d7dc344e802e29297dee75494fd470743913e2d8 +LicenseConcluded: AGPL-3.0-or-later +LicenseInfoInFile: AGPL-3.0-or-later +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./xtask/Cargo.toml +SPDXID: SPDXRef-0fa26d060b21f2196307d9d54cc6b9d4 +FileChecksum: SHA1: 3f709c1f4bc707432c9c9b7339f6602819f522c3 +LicenseConcluded: Apache-2.0 OR MIT +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + +FileName: ./xtask/src/main.rs +SPDXID: SPDXRef-45d1bf4a69990c4f8b0526410fd5fc08 +FileChecksum: SHA1: d4f88da0a6c7927efa956c0fc26293357c899477 +LicenseConcluded: AGPL-3.0-or-later AND (Apache-2.0 OR MIT) +LicenseInfoInFile: AGPL-3.0-or-later +LicenseInfoInFile: Apache-2.0 +LicenseInfoInFile: MIT +FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. + diff --git a/scripts/get-langs.sh b/scripts/get-langs.sh index d3ad228..b924358 100755 --- a/scripts/get-langs.sh +++ b/scripts/get-langs.sh @@ -1,17 +1,65 @@ #!/bin/bash + +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + +# Script to pull or add tree-sitter language parsers. Not yet implemented. We want to get things fairly stable as-is before we start adding new languages. + set -euo pipefail declare ARG -ARG="${1:-pull}" +declare -a ARGS LANGUAGES_FOR_ACTION +ARGS=("$@") + +if [[ "${ARGS[0]}" == "--help" || "${ARGS[0]}" == "-h" ]]; then + echo "Usage: $0 [pull|add] [options] [language...]" + echo "Actions:" + echo " pull - Pull updates for specified languages or all if none specified." + echo " add - Add specified languages or all if none specified." + echo "Options:" + echo " --all - Apply action to all supported languages." + exit 0 +fi + +# Check arguments +if [[ ${#ARGS[@]} -eq 0 ]]; then + echo "Usage: $0 [pull|add] [options] [language...]" + exit 1 +fi +if [[ "${ARGS[0]}" != "pull" && "${ARGS[0]}" != "add" ]]; then + echo "Invalid action: ${ARGS[0]}. Use 'pull' or 'add'." + exit 1 +fi +ARG="${ARGS[0]}" +# Remove the first argument (action) from the array +unset 'ARGS[0]' +# Re-index the array to remove gaps +ARGS=("${ARGS[@]}") +# Declare global variables +if [[ ${#ARGS[@]} -gt 0 ]]; then + if [[ "${ARGS[*]}" == *--all* ]]; then + LANGUAGES_FOR_ACTION=("ALL") + else + LANGUAGES_FOR_ACTION=("${ARGS[@]}") + fi +else + LANGUAGES_FOR_ACTION=("ALL") +fi declare PREFIX TREE_MAIN_URL TREE_GRAMS_URL -declare -a LANGS GRAMMAR_REPOS REPO_LANGS +declare -a LANGS GRAMMAR_REPOS REPO_LANGS IN_CRATESIO declare -A REPO BRANCH PREFIX="--prefix=parsers" TREE_MAIN_URL="https://github.com/tree-sitter/tree-sitter" TREE_GRAMS_URL="https://github.com/tree-sitter-grammars/tree-sitter" +export IN_CRATESIO=("bash" "c" "cpp" "c-sharp" "css" "comment" "cuda" "dockerfile" "elixir" "go" "haskell" "hcl" "hlsl" "html" "java" "javascript" "json" "just" "julia" "kotlin" "lua" "make" "markdown" "nix" "ocaml" "pkl" "php" "python" "r" "regex" "ruby" "rst" "scala" "scss" "solidity" "sql" "swift" "svelte" "toml" "typescript" "tsx" "yaml" "xml" "zig") + +export THREAD_SUPPORT=("bash" "cpp" "c-sharp" "css" "elixir" "go" "haskell" "html" "javascript" "json" "kotlin" "lua" "php" "python" "ruby" "rust" "scala" "swift" "typescript" "tsx" "yaml") + # These are all master branches in the main tree-sitter repository LANGS=("bash" "c" "cpp" "c-sharp" "css" "go" "haskell" "html" "java" "javascript" "json" "julia" "jsdoc" "lua" "python" "php" "ocaml" "regex" "ruby" "rust" "scala" "typescript") @@ -101,16 +149,33 @@ get_cmd() { else error_exit "Invalid action: $action. Use 'pull' or 'add'." fi + echo "[$word] $lang from $url branch: $branch" echo "git subtree --squash $PREFIX/$lang $action $url $branch" 2>/dev/null || { error_exit "Failed to construct command for language: $lang" } } +is_match() { + local lang="$1" + if [[ "${LANGUAGES_FOR_ACTION[0]}" == "ALL" ]]; then + return 0 + else + if [[ "${LANGUAGES_FOR_ACTION[*]}" == *"$lang"* ]]; then + return 0 + fi + fi + return 1 +} + main() { echo "Running command: $ARG" for lang in "${LANGS[@]}"; do local repo_url cmd + if ! is_match "$lang"; then + echo "Skipping language: $lang" + continue + fi repo_url=$(get_main_repo "$lang") cmd=$(get_cmd "$lang" "$repo_url" "$ARG" "master") echo "executing command: $cmd" @@ -120,6 +185,10 @@ main() { done for lang in "${REPO_LANGS[@]}"; do local repo_url branch cmd + if ! is_match "$lang"; then + echo "Skipping language: $lang" + continue + fi repo_url=$(get_repo "$lang") branch=${BRANCH[$lang]:-main} cmd=$(get_cmd "$lang" "$repo_url" "$ARG" "$branch") @@ -130,6 +199,10 @@ main() { done for grammar in "${GRAMMAR_REPOS[@]}"; do IFS=',' read -r lang branch <<<"$grammar" + if ! is_match "$lang"; then + echo "Skipping grammar: $lang" + continue + fi repo_url=$(get_grammar_repo "$lang") cmd="$(get_cmd "$lang" "$repo_url" "$ARG" "$branch")" echo "executing command: $cmd" diff --git a/scripts/install-mise.sh b/scripts/install-mise.sh index a613956..9ffd83f 100755 --- a/scripts/install-mise.sh +++ b/scripts/install-mise.sh @@ -1,9 +1,17 @@ #!/usr/bin/env bash + +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + set -eu __mise_bootstrap() { - local script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - local project_dir=$( cd -- "$( dirname -- "$script_dir" )" &> /dev/null && pwd ) + local script_dir + script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + local project_dir + project_dir=$( cd -- "$( dirname -- "$script_dir" )" &> /dev/null && pwd ) export MISE_BOOTSTRAP_PROJECT_DIR="$project_dir" local cache_home="${XDG_CACHE_HOME:-$HOME/.cache}/mise" export MISE_INSTALL_PATH="$cache_home/mise-2025.7.0" @@ -307,4 +315,4 @@ __mise_bootstrap() { test -f "$MISE_INSTALL_PATH" || install } __mise_bootstrap -exec "$MISE_INSTALL_PATH" "$@" \ No newline at end of file +exec "$MISE_INSTALL_PATH" "$@" diff --git a/scripts/update-licenses.py b/scripts/update-licenses.py new file mode 100755 index 0000000..95ce21a --- /dev/null +++ b/scripts/update-licenses.py @@ -0,0 +1,278 @@ +#!/usr/bin/env -S uv run --all-extras -s +# /// script +# requires-python = ">=3.11" +# dependencies = ["rignore", "cyclopts"] +# /// +# sourcery skip: avoid-global-variables +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Update licenses for files in the repository.""" + +import subprocess +import sys +from pathlib import Path +from functools import cache, partial +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +from typing import Annotated, NamedTuple + +import rignore +from cyclopts import App, Parameter, validators + +BASE_PATH = Path(__file__).parent.parent + +__version__ = "0.1.0" + +CONTRIBUTORS = Parameter( + "-c", + "--contributor", + consume_multiple=True, + help="Name and email of the contributor(s) to add. May be provided multiple times, or as a json list.", + json_list=True, +) + +app = App( + name="Thread License Updater", + version=__version__, + default_command="add", + help = "Update licenses for files in the repository using Reuse. Respects .gitignore.", + help_on_error=True, +) + +def run_command(cmd: list[str], paths: list[Path]) -> None: + """Run a command with the given paths.""" + if not paths: + return + cmds = [cmd + [str(path)] for path in paths] + with ThreadPoolExecutor() as executor: + executor.map(subprocess.run, cmds) + +def years() -> str: + """ + Get the range of years for the copyright notice. + """ + if (year := str(datetime.now().year)) and year != "2025": + return f"2025-{year}" + else: + return "2025" + +BASE_CMD = [ + "reuse", + "annotate", + "--year", + years(), + "--copyright", + "Knitli Inc. ", + "--fallback-dot-license", + "--merge-copyrights", + "--skip-existing" +] + +# Collect non-code paths that are not in the AST-Grep or code paths +# Some of these are shell scripts, so technically code, but we treat them as non-code for license purposes. +NON_CODE_EXTS = { + "login", + "astro", + "bash", + "bash_logout", + "bashrc", + "browserlistrc", + "conf", + "config", + "csh", + "css", + "cts", + "fish", + "gitattributes", + "gitmodules", + "html", + "htmx", + "ini", + "j2", + "jinja", + "jinja2", + "json", + "json5", + "jsonc", + "jsonl", + "ksh", + "md", + "mdown", + "mdtext", + "mdtxt", + "mdwn", + "mdx", + "mk", + "mkd", + "mts", + "nix", + "nu", + "pkl", + "profile", + "quokka", + "rs", + "sass", + "scss", + "sh", + "shellcheckrc", + "sql", + "sqlite", + "stylelintrc", + "tcsh", + "toml", + "txt", + "yaml", + "yml", + "zlogin", + "zlogout", + "zprofile", + "zsh", + "zshenv", + "zshrc", +} + +DEFAULT_CONTRIBUTORS = ["Adam Poulemanos "] + +AST_GREP_COPYRIGHT = ( + "Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com>" +) + + +class PathsForProcessing(NamedTuple): + """Paths for processing.""" + ast_grep_paths: list[Path] + code_paths: list[Path] + non_code_paths: list[Path] + + @classmethod + def from_paths(cls, paths: tuple[list[Path], list[Path], list[Path]]) -> "PathsForProcessing": + """Create an instance from a tuple of paths.""" + if len(paths) != 3: + raise ValueError("Expected a tuple of three lists: (ast_grep_paths, code_paths, non_code_paths)") + return cls( + ast_grep_paths=paths[0], + code_paths=paths[1], + non_code_paths=paths[2], + ) + + def process_with_cmd(self, cmd: list[str]) -> None: + """Run a command with the paths.""" + if not self.ast_grep_paths and not self.code_paths and not self.non_code_paths: + return + cmds = [] + if self.ast_grep_paths: + ast_grep_cmd = cmd + ["-c", AST_GREP_COPYRIGHT, "-l", "AGPL-3.0-or-later AND MIT"] + cmds.append((ast_grep_cmd, self.ast_grep_paths)) + if self.code_paths: + code_cmd = cmd + ["-l", "AGPL-3.0-or-later"] + cmds.append((code_cmd, self.code_paths)) + if self.non_code_paths: + non_code_cmd = cmd + ["-l", "MIT OR Apache-2.0"] + cmds.append((non_code_cmd, self.non_code_paths)) + for cmd, paths in cmds: + run_command(cmd, paths) + +AST_GREP_CRATES = ["crates/ast-engine", "crates/language", "crates/rule-engine"] + +def get_staged_files() -> list[Path]: + """Get the list of staged files in the git repository.""" + try: + result = subprocess.run( + ["git", "diff", "--cached", "--name-only"], + capture_output=True, + text=True, + check=True + ) + print(result.stdout.strip()) + staged_files = result.stdout.strip().splitlines() + + return [(BASE_PATH / file) for file in staged_files] + except subprocess.CalledProcessError as e: + print(f"Error getting staged files: {e}") + return [] + +@cache +def filter_path(paths: tuple[Path] | None = None, path: Path | None = None) -> bool: + """Check if a path is in the provided list of paths.""" + if not path: + return False + if paths is None: + return path.is_file() and not path.is_symlink() + return path in paths and path.is_file() and not path.is_symlink() + +def get_empty_lists() -> tuple[list, list, list]: + """Get empty lists for AST-Grep paths, code paths, and non-code paths.""" + return [], [], [] + +def sort_paths(paths: list[Path] | None = None, base_dir: Path = BASE_PATH) -> PathsForProcessing: + """Sort paths by their string representation.""" + base_dir = base_dir or Path.cwd() + ast_grep_paths, code_paths, non_code_paths = get_empty_lists() + entry_filter = partial(filter_path, tuple(paths) if paths else None) + for p in rignore.walk(base_dir, ignore_hidden = False, read_git_ignore=True, read_ignore_files=True, same_file_system=True): + path = Path(p) + if not entry_filter(path): + continue + if any( + p + for p in AST_GREP_CRATES + if p in str(path) and p.suffix not in NON_CODE_EXTS + ): + ast_grep_paths.append(path) + elif path.suffix in NON_CODE_EXTS: + non_code_paths.append(path) + else: + code_paths.append(path) + return PathsForProcessing.from_paths((ast_grep_paths, code_paths, non_code_paths)) + +def process_contributors(contributors: list[str]) -> list[str]: + """Process contributors to ensure they are in the correct format.""" + processed = (item for contributor in contributors for item in ["--contributor", contributor]) + return list(processed) + +@app.command(help="Update all licenses in the repository. Will check every file in the repository and add license information if it's missing.") +def update_all(*, contributors: Annotated[list[str], CONTRIBUTORS] = DEFAULT_CONTRIBUTORS) -> None: + """Update all licenses in the repository.""" + path_obj = sort_paths() + BASE_CMD.extend(process_contributors(contributors)) + try: + path_obj.process_with_cmd(BASE_CMD) + except Exception as e: + print(f"Error updating licenses: {e}") + +@app.command(help="Update licenses for staged files in the repository. Will only check files that are staged for commit.") +def staged(*, contributors: Annotated[list[str], CONTRIBUTORS] = DEFAULT_CONTRIBUTORS) -> None: + """Update licenses for staged files in the repository.""" + staged_files = get_staged_files() + if not staged_files: + print("No staged files found.") + sys.exit(0) + path_obj = sort_paths(staged_files) + BASE_CMD.extend(process_contributors(contributors)) + try: + path_obj.process_with_cmd(BASE_CMD) + except Exception as e: + print(f"Error updating licenses: {e}") + +@app.command(help="Add licenses for specific files in the repository. Will only check the files provided. May be provided as a space separated list, or as a json list. If a file already has a license, it will be skipped.") +def add(files: Annotated[list[Path], Parameter(validator=validators.Path(exists=True), required=True, consume_multiple=True, json_list=True)], *, contributors: Annotated[list[str], CONTRIBUTORS] = DEFAULT_CONTRIBUTORS) -> None: + """Update licenses for specific files in the repository.""" + if not files: + print("No files provided.") + sys.exit(0) + path_obj = sort_paths(files) + BASE_CMD.extend(process_contributors(contributors)) + try: + path_obj.process_with_cmd(BASE_CMD) + except Exception as e: + print(f"Error updating licenses: {e}") + +def main() -> None: + """Main function to update licenses.""" + app() + +if __name__ == "__main__": + main() diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 56ef7b0..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,7 +0,0 @@ -// This is the main function. -fn main() { - // Statements here are executed when the compiled binary is called. - - // Print text to the console. - println!("Hello World!"); -} diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 89adac6..5f897cc 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -1,6 +1,16 @@ +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + [package] name = "xtask" version = "0.1.0" +publish = false +description = "Xtask for thread. Primarily used for Wasm builds." +keywords = ["xtask", "build", "wasm"] +categories = ["build-scripts"] +readme = "README.md" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/xtask/README.md b/xtask/README.md new file mode 100644 index 0000000..c4aa706 --- /dev/null +++ b/xtask/README.md @@ -0,0 +1,8 @@ + + +todo diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 3c244f9..31eead8 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -1,3 +1,8 @@ +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: MIT OR Apache-2.0 + use pico_args::Arguments; use std::path::Path; use std::process::{Command, exit}; @@ -6,7 +11,7 @@ const CRATE_PATH: &str = "crates/thread-wasm"; const PKG_PATH: &str = "crates/thread-wasm/pkg"; const DIST_PATH: &str = "dist/thread-wasm.optimized.wasm"; -const HELP: &str = r#" +const HELP: &str = r" xtask - Build thread-wasm WASM binary Usage: @@ -21,7 +26,7 @@ Options: --release Build in release mode with optimizations --profiling Build with profiling enabled (no optimizations) --help, -h Show this help message -"#; +"; #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] /// Represents the build mode for the WASM binary. @@ -33,22 +38,22 @@ enum BuildMode { } impl BuildMode { - fn as_wasm_pack_flag(&self) -> Option<&'static str> { + const fn as_wasm_pack_flag(self) -> Option<&'static str> { match self { - BuildMode::Release => Some("--release"), + Self::Release => Some("--release"), _ => None, } } - fn opt_options(&self) -> Vec<&'static str> { + fn opt_options(self) -> Vec<&'static str> { match self { - BuildMode::Release => vec![ + Self::Release => vec![ "-O4", "--enable-bulk-memory", "--enable-sign-ext", "--strip-debug", ], - BuildMode::Dev => vec!["-O", "--symbolmap", "--safe-heap"], - BuildMode::Profiling => vec!["-O", "--enable-bulk-memory", "--enable-sign-ext"], + Self::Dev => vec!["-O", "--symbolmap", "--safe-heap"], + Self::Profiling => vec!["-O", "--enable-bulk-memory", "--enable-sign-ext"], } } } @@ -56,7 +61,7 @@ impl BuildMode { fn main() { let mut args = Arguments::from_env(); if args.contains("--help") || args.contains("-h") { - println!("{}", HELP); + println!("{HELP}"); exit(0); } let subcmd = args.subcommand().unwrap_or(Some("build-wasm".to_string())); @@ -74,11 +79,11 @@ fn main() { build_wasm(mode, multi); } Some(cmd) => { - eprintln!("Unknown subcommand: {}", cmd); + eprintln!("Unknown subcommand: {cmd}"); exit(1); } None => { - println!("{}", HELP); + println!("{HELP}"); exit(1); } } @@ -87,12 +92,18 @@ fn main() { fn build_wasm(mode: BuildMode, multi: bool) { // wasm-pack build [crate-path] --target web [--release] [--features multi-threading] let mut wasm_pack = Command::new("wasm-pack"); - wasm_pack.args(&["build", CRATE_PATH, "--target", "web"]); + wasm_pack.args(["build", CRATE_PATH, "--target", "web"]); if let Some(flag) = mode.as_wasm_pack_flag() { wasm_pack.arg(flag); + wasm_pack.args(["--features", "inline"]); } if multi { - wasm_pack.args(&["--features", "multi-threading"]); + // we already have a --features flag if we're releasing + if mode == BuildMode::Release { + wasm_pack.arg("multi-threading"); + } else { + wasm_pack.args(["--features", "multi-threading"]); + } } run_or_die(wasm_pack, "wasm-pack build"); @@ -114,8 +125,8 @@ fn build_wasm(mode: BuildMode, multi: bool) { let mut wasm_opt = Command::new("wasm-opt"); wasm_opt.arg(&bg_wasm); - wasm_opt.args(&mode.opt_options()); - wasm_opt.args(&[ + wasm_opt.args(mode.opt_options()); + wasm_opt.args([ "--enable-multivalue", "--vacuum", "--enable-tail-call", @@ -124,15 +135,15 @@ fn build_wasm(mode: BuildMode, multi: bool) { "--enable-relaxed-simd", ]); if multi { - wasm_opt.args(&["--enable-threads", "--disable-multi-memories"]); + wasm_opt.args(["--enable-threads", "--disable-multi-memories"]); } else { - wasm_opt.args(&["--disable-threads", "--enable-multi-memories"]); + wasm_opt.args(["--disable-threads", "--enable-multi-memories"]); } - wasm_opt.args(&["-o", DIST_PATH]); + wasm_opt.args(["-o", DIST_PATH]); run_or_die(wasm_opt, "wasm-opt"); - println!("Built optimized wasm to {:?}", DIST_PATH); + println!("Built optimized wasm to {DIST_PATH:?}"); } fn run_or_die(mut cmd: Command, label: &str) {