Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions mise.toml
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,132 @@ git-cliff --tag "${usage_tag:?}" --prepend --output CHANGELOG.md &&
echo "✅ CHANGELOG.md generated for ${usage_tag:?}!"
'''

[tasks.prepare-version]
description = "Prepare build artifacts for a new version release"
usage = '''
arg "[version]" {
help = "Version to prepare (e.g., 1.2.0). If not provided, will prompt."
}
flag "--no-commit" {
help = "Don't commit the changes, just generate artifacts"
}
flag "--no-tag" {
help = "Don't create a git tag"
}
flag "--push" {
help = "Push changes and tags to remote after committing"
}
'''
run = '''
#!/usr/bin/env bash
set -e

# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
RED='\033[0;31m'
NC='\033[0m' # No Color

echo -e "${BLUE}[codeweaver]${NC} Preparing version release..."

# Get version
VERSION="${usage_version:-}"
if [ -z "$VERSION" ]; then
read -p "Enter version number (e.g., 1.2.0): " VERSION
fi

# Validate version format
if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo -e "${RED}Error: Invalid version format. Use semantic versioning (e.g., 1.2.0)${NC}"
exit 1
fi

echo -e "${BLUE}[codeweaver]${NC} Preparing version ${GREEN}v${VERSION}${NC}..."

# Step 1: Run build preparation
echo -e "${BLUE}[codeweaver]${NC} Step 1/5: Running build preparation..."
uv run python scripts/build/prepare-build.py || {
echo -e "${RED}Build preparation failed!${NC}"
exit 1
}

# Step 2: Check for generated artifacts
echo -e "${BLUE}[codeweaver]${NC} Step 2/5: Verifying generated artifacts..."
if [ ! -f "src/codeweaver/data/node_types_cache.pkl" ]; then
echo -e "${YELLOW}Warning: node_types_cache.pkl not found${NC}"
fi

# Step 3: Update CHANGELOG
echo -e "${BLUE}[codeweaver]${NC} Step 3/5: Updating CHANGELOG.md..."
git-cliff --tag "v${VERSION}" --prepend --output CHANGELOG.md || {
echo -e "${YELLOW}Warning: Failed to update CHANGELOG${NC}"
}

# Step 4: Commit if not disabled
if [ -z "${usage_no_commit:-}" ]; then
echo -e "${BLUE}[codeweaver]${NC} Step 4/5: Committing build artifacts..."
git add src/codeweaver/data/node_types_cache.pkl 2>/dev/null || true
git add CHANGELOG.md 2>/dev/null || true
git add schema/ 2>/dev/null || true

if ! git diff --cached --quiet; then
git commit -m "chore: prepare build artifacts for v${VERSION}

- Update node_types cache
- Generate schema for v${VERSION}
- Update CHANGELOG.md" || {
echo -e "${RED}Commit failed!${NC}"
exit 1
}
echo -e "${GREEN}✓ Build artifacts committed${NC}"
else
echo -e "${YELLOW}No changes to commit${NC}"
fi
else
echo -e "${YELLOW}Skipping commit (--no-commit flag)${NC}"
fi

# Step 5: Tag if not disabled
if [ -z "${usage_no_tag:-}" ] && [ -z "${usage_no_commit:-}" ]; then
echo -e "${BLUE}[codeweaver]${NC} Step 5/5: Creating git tag v${VERSION}..."
git tag -a "v${VERSION}" -m "Release v${VERSION}" || {
echo -e "${RED}Tagging failed!${NC}"
exit 1
}
echo -e "${GREEN}✓ Created tag v${VERSION}${NC}"
else
echo -e "${YELLOW}Skipping tag creation${NC}"
fi

# Push if requested
if [ -n "${usage_push:-}" ]; then
echo -e "${BLUE}[codeweaver]${NC} Pushing to remote..."
git push && git push --tags || {
echo -e "${RED}Push failed!${NC}"
exit 1
}
echo -e "${GREEN}✓ Pushed to remote${NC}"
fi

echo ""
echo -e "${GREEN}════════════════════════════════════════════════${NC}"
echo -e "${GREEN}✓ Version ${VERSION} prepared successfully!${NC}"
echo -e "${GREEN}════════════════════════════════════════════════${NC}"
echo ""
echo -e "${BLUE}Next steps:${NC}"
if [ -z "${usage_push:-}" ]; then
echo -e " 1. Review the changes: ${YELLOW}git show${NC}"
echo -e " 2. Push to remote: ${YELLOW}git push && git push --tags${NC}"
fi
echo -e " 3. Create GitHub release from tag v${VERSION}"
echo ""
'''
run_windows = '''
echo [codeweaver] Version preparation on Windows not yet supported. Use WSL or Linux.
exit 1
'''

[tasks.pre-commit]
shell = "zsh -c"
run = [
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ __all__ = ("__version__",)
[tool.hatch.build.targets.wheel]
artifacts = ["*.so", "src/**"]
packages = ["src/codeweaver"]
include = ["node_types/**", "typings/**"]
include = ["typings/**"]

[tool.hatch.build.targets.sdist]
include = [
Expand All @@ -348,9 +348,10 @@ include = [
"README.md",
"context7.json",
"context7.json.license",
"node_types/**",
"pyproject.toml",
"sbom.spdx",
"schema/**",
"scripts/**",
"src/**",
"typings/**",
"uv.lock",
Expand Down
54 changes: 54 additions & 0 deletions scripts/build/generate-schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2025 Knitli Inc.
# SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
#
# SPDX-License-Identifier: MIT OR Apache-2.0
"""Generate JSON schema for CodeWeaver settings.

This script generates the JSON schema file for CodeWeaver settings validation.
It should be run during the build process when the schema version changes or
when the schema file doesn't exist.
"""

from __future__ import annotations

import sys
from pathlib import Path


def main() -> int:
"""Generate the JSON schema file for CodeWeaver settings."""
# Add src to path so we can import codeweaver
repo_root = Path(__file__).parent.parent.parent
src_path = repo_root / "src"
if src_path not in sys.path:
sys.path.insert(0, str(src_path))

from codeweaver.config.settings import CodeWeaverSettings

# Get the schema version from CodeWeaverSettings
version = CodeWeaverSettings.model_fields["__version__"].default
schema_dir = repo_root / "schema" / f"v{version}"
schema_file = schema_dir / "codeweaver.schema.json"

# Check if schema file already exists
if schema_file.exists():
print(f"Schema file already exists: {schema_file}")
print("Skipping schema generation. Delete the file to regenerate.")
return 0

# Generate schema
print(f"Generating schema for version {version}...")
schema_dir.mkdir(parents=True, exist_ok=True)

schema_bytes = CodeWeaverSettings.json_schema()
bytes_written = schema_file.write_bytes(schema_bytes)

print(f"✓ Generated schema file: {schema_file}")
print(f" Size: {bytes_written:,} bytes")

return 0


if __name__ == "__main__":
sys.exit(main())
89 changes: 89 additions & 0 deletions scripts/build/prepare-build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2025 Knitli Inc.
# SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
#
# SPDX-License-Identifier: MIT OR Apache-2.0
"""Master build preparation script for CodeWeaver.

This script orchestrates all the build preparation steps in the correct order:
1. Generate supported languages list
2. Generate provider lists
3. Update node-types from tree-sitter grammars
4. Preprocess node-types into cached format
5. Generate JSON schema (if needed)
"""

from __future__ import annotations

import subprocess
import sys
from pathlib import Path


def run_script(script_path: Path, *args: str) -> int:
"""Run a script and return its exit code."""
script_name = script_path.name
print(f"\n{'=' * 70}")
print(f"Running: {script_name}")
print(f"{'=' * 70}")

result = subprocess.run(
[sys.executable, str(script_path), *args],
cwd=script_path.parent.parent.parent,
check=False,
)

if result.returncode != 0:
print(f"✗ {script_name} failed with exit code {result.returncode}")
return result.returncode

print(f"✓ {script_name} completed successfully")
return 0


def main() -> int:
"""Run all build preparation steps."""
repo_root = Path(__file__).parent.parent.parent
scripts_build = repo_root / "scripts" / "build"
scripts_lang = repo_root / "scripts" / "language-support"

print("=" * 70)
print("CodeWeaver Build Preparation")
print("=" * 70)

# Step 1: Generate supported languages
exit_code = run_script(scripts_build / "generate-supported-languages.py")
if exit_code != 0:
return exit_code

# Step 2: Generate provider lists
exit_code = run_script(scripts_build / "generate-provider-lists.py")
if exit_code != 0:
return exit_code

# Step 3: Update node-types from tree-sitter grammars
exit_code = run_script(
scripts_lang / "download-ts-grammars.py", "fetch", "--only-update", "--only-node-types"
)
if exit_code != 0:
return exit_code

# Step 4: Preprocess node-types into cache
exit_code = run_script(scripts_build / "preprocess-node-types.py")
if exit_code != 0:
return exit_code

# Step 5: Generate schema (if needed)
exit_code = run_script(scripts_build / "generate-schema.py")
if exit_code != 0:
return exit_code

print("\n" + "=" * 70)
print("✓ Build preparation completed successfully!")
print("=" * 70)

return 0


if __name__ == "__main__":
sys.exit(main())
60 changes: 60 additions & 0 deletions scripts/build/preprocess-node-types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2025 Knitli Inc.
# SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
#
# SPDX-License-Identifier: MIT OR Apache-2.0
"""Preprocess node-types JSON files and cache the parsed grammar data.

This script loads all tree-sitter node-types.json files, parses them into
CodeWeaver's internal Thing/Category representation, and serializes the
result to a pickle cache. This cache is loaded at runtime for fast startup.
"""

from __future__ import annotations

import pickle
import sys
from pathlib import Path


def main() -> int:
"""Preprocess node types and generate cache file."""
# Add src to path so we can import codeweaver
repo_root = Path(__file__).parent.parent.parent
src_path = repo_root / "src"
if src_path not in sys.path:
sys.path.insert(0, str(src_path))

from codeweaver.semantic.node_type_parser import NodeTypeParser

print("Preprocessing node-types JSON files...")

# Create parser and process all languages (disable cache since we're building it)
parser = NodeTypeParser(use_cache=False)
all_things = parser.parse_all_nodes()

print(f" Parsed {len(all_things)} Things/Categories across all languages")

# Get the cache from the parser's registration cache
# Note: We only cache the registration_cache, not all_things,
# since all_things can be reconstructed from the cache at runtime
cache_data = {
"registration_cache": parser.registration_cache,
}

# Write cache file
cache_file = repo_root / "src" / "codeweaver" / "data" / "node_types_cache.pkl"
print(f"Writing cache to {cache_file}...")

with cache_file.open("wb") as f:
pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)

cache_size = cache_file.stat().st_size
print(f"✓ Generated node_types cache: {cache_file}")
print(f" Size: {cache_size:,} bytes ({cache_size / 1024:.1f} KB)")

return 0


if __name__ == "__main__":
sys.exit(main())
19 changes: 11 additions & 8 deletions src/codeweaver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,21 @@
import shutil
import subprocess

from pathlib import Path

if git := (shutil.which("git") is not None):
# Try to get version from git if available
# Git commands work from any directory within a repo, so no need to specify cwd
if git := shutil.which("git"):
git_describe = subprocess.run(

Check failure on line 54 in src/codeweaver/__init__.py

View workflow job for this annotation

GitHub Actions / Lint / Lint and Format

Ruff (S603)

src/codeweaver/__init__.py:54:36: S603 `subprocess` call: check for execution of untrusted input
["describe", "--tags", "--always", "--dirty"], # noqa: S607
executable=git,
[git, "describe", "--tags", "--always", "--dirty"],
capture_output=True,
text=True,
check=True,
cwd=str(Path(__file__).parent.parent.parent),
check=False,
)
Comment on lines 54 to 59
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security (python.lang.security.audit.dangerous-subprocess-use-audit): Detected subprocess function 'run' without a static string. If this data can be controlled by a malicious actor, it may be an instance of command injection. Audit the use of this call to ensure it is not controllable by an external resource. You may consider using 'shlex.escape()'.

Source: opengrep

__version__ = git_describe.stdout.strip()
if git_describe.returncode == 0:
__version__ = git_describe.stdout.strip()
else:
__version__ = "0.0.0"
else:
__version__ = "0.0.0"
except Exception:
__version__ = "0.0.0"
return __version__
Expand Down
Loading
Loading