diff --git a/.aic/graph.db b/.aic/graph.db new file mode 100644 index 0000000..03e0a24 Binary files /dev/null and b/.aic/graph.db differ diff --git a/README.md b/README.md index e6a57dc..fb4c9dd 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ The philosophy behind Conductor is simple: control your code. By treating contex - **Iterate safely**: Review plans before code is written, keeping you firmly in the loop. - **Work as a team**: Set project-level context for your product, tech stack, and workflow preferences that become a shared foundation for your team. - **Build on existing projects**: Intelligent initialization for both new (Greenfield) and existing (Brownfield) projects. +- **Semantic Awareness (AIC)**: Automatically indexes your codebase into "Rich Skeletons" using the AI Compiler (AIC). This provides the agent with deep context about your API contracts and dependencies with minimal token overhead. - **Smart revert**: A git-aware revert command that understands logical units of work (tracks, phases, tasks) rather than just commit hashes. ## Installation @@ -114,6 +115,7 @@ During implementation, you can also: ## Resources +- [AI Compiler Patent](https://www.tdcommons.org/dpubs_series/8241/): Semantic Dependency Graph for AI Agents - [Gemini CLI extensions](https://geminicli.com/docs/extensions/): Documentation about using extensions in Gemini CLI - [GitHub issues](https://github.com/gemini-cli-extensions/conductor/issues): Report bugs or request features diff --git a/aic/cli.py b/aic/cli.py new file mode 100644 index 0000000..06df88b --- /dev/null +++ b/aic/cli.py @@ -0,0 +1,110 @@ +import argparse +import os +from aic.db import init_db, upsert_node, get_node, get_dependencies, update_edges, mark_dirty +from aic.skeleton import RichSkeletonizer +from aic.utils import calculate_hash + +def index_repo(root_dir="."): + init_db() + skeletonizer = RichSkeletonizer() + for root, dirs, files in os.walk(root_dir): + # Exclusions + dirs[:] = [d for d in dirs if d not in ('.git', '.aic', '__pycache__', 'node_modules')] + + for file in files: + if not file.endswith('.py'): + continue + + file_path = os.path.join(root, file) + rel_path = os.path.relpath(file_path, root_dir) + + with open(file_path, 'r') as f: + content = f.read() + + current_hash = calculate_hash(content) + existing = get_node(rel_path) + + if existing and existing['hash'] == current_hash: + continue + + print(f"Indexing: {rel_path}") + skeleton, dependencies = skeletonizer.skeletonize(content, rel_path) + upsert_node(rel_path, current_hash, skeleton) + mark_dirty(rel_path) + + # Resolve dependencies to file paths + resolved_deps = [] + for dep in dependencies: + resolved = resolve_dep_to_path(dep, rel_path, root_dir) + if resolved: + resolved_deps.append(resolved) + + update_edges(rel_path, resolved_deps) + +def resolve_dep_to_path(dep_name, current_file, root_dir): + """Simple heuristic to resolve module name to file path.""" + # Handle relative imports (e.g., '.module' or '..module') + if dep_name.startswith('.'): + levels = 0 + while dep_name.startswith('.'): + levels += 1 + dep_name = dep_name[1:] + + curr_dir = os.path.dirname(current_file) + for _ in range(levels - 1): + curr_dir = os.path.dirname(curr_dir) + + base_path = os.path.join(curr_dir, dep_name.replace('.', os.sep)) + else: + base_path = os.path.join(root_dir, dep_name.replace('.', os.sep)) + + candidates = [ + base_path + ".py", + os.path.join(base_path, "__init__.py") + ] + + for cand in candidates: + if os.path.exists(cand): + return os.path.relpath(cand, root_dir) + return None + +def get_context(file_path): + node = get_node(file_path) + if not node: + return f"# Error: {file_path} not indexed." + + output = [f"# Context for {file_path}", node['skeleton'], ""] + + deps = get_dependencies(file_path) + if deps: + output.append("## Dependencies") + for dep in deps: + dep_node = get_node(dep) + if dep_node: + output.append(f"### {dep}") + output.append(dep_node['skeleton']) + output.append("") + + return "\n".join(output) + +def main(): + parser = argparse.ArgumentParser(description="AIC: AI Compiler") + subparsers = parser.add_subparsers(dest="command") + + subparsers.add_parser("index") + + context_parser = subparsers.add_parser("context") + context_parser.add_argument("file") + + args = parser.parse_args() + + if args.command == "index": + index_repo() + print("Finished indexing.") + elif args.command == "context": + print(get_context(args.file)) + else: + parser.print_help() + +if __name__ == "__main__": + main() diff --git a/aic/db.py b/aic/db.py new file mode 100644 index 0000000..ccd8058 --- /dev/null +++ b/aic/db.py @@ -0,0 +1,65 @@ +import sqlite3 +import os + +DB_PATH = ".aic/graph.db" + +def get_connection(): + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + +def init_db(): + with get_connection() as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS nodes ( + path TEXT PRIMARY KEY, + hash TEXT, + skeleton TEXT, + status TEXT DEFAULT 'CLEAN' + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS edges ( + source TEXT, + target TEXT, + PRIMARY KEY (source, target), + FOREIGN KEY(source) REFERENCES nodes(path) + ) + """) + +def upsert_node(path, hash_val, skeleton): + with get_connection() as conn: + conn.execute(""" + INSERT INTO nodes (path, hash, skeleton, status) + VALUES (?, ?, ?, 'CLEAN') + ON CONFLICT(path) DO UPDATE SET + hash = excluded.hash, + skeleton = excluded.skeleton, + status = 'CLEAN' + """, (path, hash_val, skeleton)) + +def mark_dirty(path): + """Mark all nodes that depend on this path as DIRTY.""" + with get_connection() as conn: + conn.execute(""" + UPDATE nodes + SET status = 'DIRTY' + WHERE path IN ( + SELECT source FROM edges WHERE target = ? + ) + """, (path,)) + +def update_edges(source_path, target_paths): + with get_connection() as conn: + conn.execute("DELETE FROM edges WHERE source = ?", (source_path,)) + for target in target_paths: + conn.execute("INSERT OR IGNORE INTO edges (source, target) VALUES (?, ?)", (source_path, target)) + +def get_node(path): + with get_connection() as conn: + return conn.execute("SELECT * FROM nodes WHERE path = ?", (path,)).fetchone() + +def get_dependencies(path): + with get_connection() as conn: + return [row['target'] for row in conn.execute("SELECT target FROM edges WHERE source = ?", (path,)).fetchall()] diff --git a/aic/skeleton.py b/aic/skeleton.py new file mode 100644 index 0000000..734ba6b --- /dev/null +++ b/aic/skeleton.py @@ -0,0 +1,108 @@ +import ast +import os + +class RichSkeletonizer(ast.NodeVisitor): + def __init__(self): + self.skeleton = [] + self.dependencies = set() + self.imports = [] + + def skeletonize(self, source_code, path): + try: + tree = ast.parse(source_code) + except Exception as e: + return f"# BUG: Failed to parse {path}: {str(e)}", set() + + self.visit(tree) + return "\n".join(self.skeleton), self.dependencies + + def visit_Import(self, node): + for alias in node.names: + self.dependencies.add(alias.name) + self.imports.append(f"import {alias.name}") + + def visit_ImportFrom(self, node): + module = node.module or "" + level = node.level + # Handle relative imports level + prefix = "." * level if level > 0 else "" + full_module = prefix + module + + for alias in node.names: + self.dependencies.add(full_module) + self.imports.append(f"from {full_module} import {alias.name}") + + def visit_ClassDef(self, node): + # Extract class signature + self.skeleton.append(f"class {node.name}:") + docstring = ast.get_docstring(node) + if docstring: + self.skeleton.append(f' """{docstring}"""') + + # We don't visit children yet, just let the visitor handle them + # But we want to indent them + old_skeleton = self.skeleton + self.skeleton = [] + self.generic_visit(node) + inner = self.skeleton + self.skeleton = old_skeleton + for line in inner: + self.skeleton.append(f" {line}") + self.skeleton.append("") # Spacer + + def visit_FunctionDef(self, node): + self._skeletonize_func(node) + + def visit_AsyncFunctionDef(self, node): + self._skeletonize_func(node, is_async=True) + + def _skeletonize_func(self, node, is_async=False): + prefix = "async " if is_async else "" + args = ast.unparse(node.args) if hasattr(ast, 'unparse') else "..." + returns = f" -> {ast.unparse(node.returns)}" if hasattr(ast, 'unparse') and node.returns else "" + + signature = f"{prefix}def {node.name}({args}){returns}:" + self.skeleton.append(signature) + + docstring = ast.get_docstring(node) + if docstring: + self.skeleton.append(f' """{docstring}"""') + + # Effects analysis + effects = self._analyze_effects(node) + if effects: + self.skeleton.append(f" # {effects}") + + self.skeleton.append(" ...") + self.skeleton.append("") # Spacer + + def _analyze_effects(self, node): + returns = [] + raises = [] + calls = [] + + for child in ast.walk(node): + if isinstance(child, ast.Return): + if child.value: + try: + returns.append(ast.unparse(child.value)) + except: + returns.append("some_value") + elif isinstance(child, ast.Raise): + if child.exc: + try: + raises.append(ast.unparse(child.exc)) + except: + raises.append("Exception") + elif isinstance(child, ast.Call): + try: + calls.append(ast.unparse(child.func)) + except: + pass + + res = [] + if returns: res.append(f"RETURNS: {' | '.join(list(set(returns))[:3])}") + if raises: res.append(f"RAISES: {' | '.join(list(set(raises))[:3])}") + if calls: res.append(f"CALLS: {' | '.join(list(set(calls))[:5])}") + + return " | ".join(res) diff --git a/aic/utils.py b/aic/utils.py new file mode 100644 index 0000000..db7ff3c --- /dev/null +++ b/aic/utils.py @@ -0,0 +1,6 @@ +import hashlib + +def calculate_hash(content): + if isinstance(content, str): + content = content.encode('utf-8') + return hashlib.sha256(content).hexdigest() diff --git a/commands/conductor/implement.toml b/commands/conductor/implement.toml index d1536bd..1959f17 100644 --- a/commands/conductor/implement.toml +++ b/commands/conductor/implement.toml @@ -67,6 +67,9 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai - `conductor/workflow.md` c. **Error Handling:** If you fail to read any of these files, you MUST stop and inform the user of the error. +4. **Semantic Dependency Awareness (AIC):** + - **Protocol:** During the implementation of any task, if you encounter imported internal modules or functions whose names are not fully self-describing or whose implementation is not directly in your current context, you SHOULD run `python3 -m aic.cli context ` for those modules. This will provide you with the "Rich Skeleton" (signatures, docstrings, and IO/side-effects) of the dependency without consuming excessive tokens. + 4. **Execute Tasks and Update Track Plan:** a. **Announce:** State that you will now execute the tasks from the track's `plan.md` by following the procedures in `workflow.md`. b. **Iterate Through Tasks:** You MUST now loop through each task in the track's `plan.md` one by one. diff --git a/commands/conductor/newTrack.toml b/commands/conductor/newTrack.toml index c4325fc..0b99a75 100644 --- a/commands/conductor/newTrack.toml +++ b/commands/conductor/newTrack.toml @@ -70,6 +70,9 @@ CRITICAL: You must validate the success of every tool call. If any tool call fai * Examples include reproduction steps for bugs, specific scope for chores, or success criteria. * Tailor the questions to the specific request. +4. **Semantic Context (Optional):** + - **Recommendation:** If the track involves complex interactions with existing code, suggest that you can run `python3 -m aic.cli context ` for relevant files to better understand the API contracts and dependencies. This will help you generate a more accurate specification and plan. + 3. **Draft `spec.md`:** Once sufficient information is gathered, draft the content for the track's `spec.md` file, including sections like Overview, Functional Requirements, Non-Functional Requirements (if any), Acceptance Criteria, and Out of Scope. 4. **User Confirmation:** Present the drafted `spec.md` content to the user for review and approval. diff --git a/commands/conductor/setup.toml b/commands/conductor/setup.toml index 97ad40c..cdb37f8 100644 --- a/commands/conductor/setup.toml +++ b/commands/conductor/setup.toml @@ -323,7 +323,10 @@ CRITICAL: When determining model complexity, ALWAYS select the "flash" model, re 1. **Summarize Actions:** Present a summary of all actions taken during Phase 1, including: - The guide files that were copied. - The workflow file that was copied. -2. **Transition to initial plan and track generation:** Announce that the initial setup is complete and you will now proceed to define the first track for the project. +2. **Semantic Indexing (AIC):** + - **Announce Action:** Inform the user: "Finally, I will perform a semantic indexing of your project using AIC (AI Compiler). This creates a lightweight dependency graph to help me understand your code better." + - **Action:** Execute `python3 -m aic.cli index`. +3. **Transition to initial plan and track generation:** Announce that the initial setup is complete and you will now proceed to define the first track for the project. --- diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ab6349e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "conductor-aic" +version = "0.1.1" +description = "Conductor Extension for Gemini CLI with AI Compiler (AIC)" +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +authors = [ + { name = "Google DeepMind" }, +] +dependencies = [] + +[project.scripts] +aic = "aic.cli:main" + +[tool.hatch.build.targets.wheel] +packages = ["aic"]