diff --git a/astwgroq/ast.json b/astwgroq/ast.json new file mode 100644 index 0000000000000..f237f306f1003 --- /dev/null +++ b/astwgroq/ast.json @@ -0,0 +1,834 @@ +{ + "id": "0x5bb44efe4a98", + "kind": "TranslationUnitDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "inner": [ + { + "id": "0x5bb44efe5308", + "kind": "TypedefDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "isImplicit": true, + "name": "__int128_t", + "type": { + "qualType": "__int128" + }, + "inner": [ + { + "id": "0x5bb44efe5060", + "kind": "BuiltinType", + "type": { + "qualType": "__int128" + } + } + ] + }, + { + "id": "0x5bb44efe5378", + "kind": "TypedefDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "isImplicit": true, + "name": "__uint128_t", + "type": { + "qualType": "unsigned __int128" + }, + "inner": [ + { + "id": "0x5bb44efe5080", + "kind": "BuiltinType", + "type": { + "qualType": "unsigned __int128" + } + } + ] + }, + { + "id": "0x5bb44efe56f0", + "kind": "TypedefDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "isImplicit": true, + "name": "__NSConstantString", + "type": { + "qualType": "__NSConstantString_tag" + }, + "inner": [ + { + "id": "0x5bb44efe5460", + "kind": "RecordType", + "type": { + "qualType": "__NSConstantString_tag" + }, + "decl": { + "id": "0x5bb44efe53d0", + "kind": "CXXRecordDecl", + "name": "__NSConstantString_tag" + } + } + ] + }, + { + "id": "0x5bb44efe5798", + "kind": "TypedefDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "isImplicit": true, + "name": "__builtin_ms_va_list", + "type": { + "qualType": "char *" + }, + "inner": [ + { + "id": "0x5bb44efe5750", + "kind": "PointerType", + "type": { + "qualType": "char *" + }, + "inner": [ + { + "id": "0x5bb44efe4b40", + "kind": "BuiltinType", + "type": { + "qualType": "char" + } + } + ] + } + ] + }, + { + "id": "0x5bb44f02e400", + "kind": "TypedefDecl", + "loc": {}, + "range": { + "begin": {}, + "end": {} + }, + "isImplicit": true, + "name": "__builtin_va_list", + "type": { + "qualType": "__va_list_tag[1]" + }, + "inner": [ + { + "id": "0x5bb44f02e3a0", + "kind": "ConstantArrayType", + "type": { + "qualType": "__va_list_tag[1]" + }, + "size": 1, + "inner": [ + { + "id": "0x5bb44efe5880", + "kind": "RecordType", + "type": { + "qualType": "__va_list_tag" + }, + "decl": { + "id": "0x5bb44efe57f0", + "kind": "CXXRecordDecl", + "name": "__va_list_tag" + } + } + ] + } + ] + }, + { + "id": "0x5bb44f02e5d8", + "kind": "FunctionDecl", + "loc": { + "offset": 4, + "file": "source.cpp", + "line": 1, + "col": 5, + "tokLen": 3 + }, + "range": { + "begin": { + "offset": 0, + "col": 1, + "tokLen": 3 + }, + "end": { + "offset": 41, + "line": 3, + "col": 1, + "tokLen": 1 + } + }, + "isUsed": true, + "name": "sum", + "mangledName": "_Z3sumii", + "type": { + "qualType": "int (int, int)" + }, + "inner": [ + { + "id": "0x5bb44f02e470", + "kind": "ParmVarDecl", + "loc": { + "offset": 12, + "line": 1, + "col": 13, + "tokLen": 1 + }, + "range": { + "begin": { + "offset": 8, + "col": 9, + "tokLen": 3 + }, + "end": { + "offset": 12, + "col": 13, + "tokLen": 1 + } + }, + "isUsed": true, + "name": "a", + "type": { + "qualType": "int" + } + }, + { + "id": "0x5bb44f02e4f0", + "kind": "ParmVarDecl", + "loc": { + "offset": 19, + "col": 20, + "tokLen": 1 + }, + "range": { + "begin": { + "offset": 15, + "col": 16, + "tokLen": 3 + }, + "end": { + "offset": 19, + "col": 20, + "tokLen": 1 + } + }, + "isUsed": true, + "name": "b", + "type": { + "qualType": "int" + } + }, + { + "id": "0x5bb44f02e778", + "kind": "CompoundStmt", + "range": { + "begin": { + "offset": 22, + "col": 23, + "tokLen": 1 + }, + "end": { + "offset": 41, + "line": 3, + "col": 1, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02e768", + "kind": "ReturnStmt", + "range": { + "begin": { + "offset": 26, + "line": 2, + "col": 2, + "tokLen": 6 + }, + "end": { + "offset": 37, + "col": 13, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02e748", + "kind": "BinaryOperator", + "range": { + "begin": { + "offset": 33, + "col": 9, + "tokLen": 1 + }, + "end": { + "offset": 37, + "col": 13, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "opcode": "+", + "inner": [ + { + "id": "0x5bb44f02e718", + "kind": "ImplicitCastExpr", + "range": { + "begin": { + "offset": 33, + "col": 9, + "tokLen": 1 + }, + "end": { + "offset": 33, + "col": 9, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "castKind": "LValueToRValue", + "inner": [ + { + "id": "0x5bb44f02e6d8", + "kind": "DeclRefExpr", + "range": { + "begin": { + "offset": 33, + "col": 9, + "tokLen": 1 + }, + "end": { + "offset": 33, + "col": 9, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "lvalue", + "referencedDecl": { + "id": "0x5bb44f02e470", + "kind": "ParmVarDecl", + "name": "a", + "type": { + "qualType": "int" + } + } + } + ] + }, + { + "id": "0x5bb44f02e730", + "kind": "ImplicitCastExpr", + "range": { + "begin": { + "offset": 37, + "col": 13, + "tokLen": 1 + }, + "end": { + "offset": 37, + "col": 13, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "castKind": "LValueToRValue", + "inner": [ + { + "id": "0x5bb44f02e6f8", + "kind": "DeclRefExpr", + "range": { + "begin": { + "offset": 37, + "col": 13, + "tokLen": 1 + }, + "end": { + "offset": 37, + "col": 13, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "lvalue", + "referencedDecl": { + "id": "0x5bb44f02e4f0", + "kind": "ParmVarDecl", + "name": "b", + "type": { + "qualType": "int" + } + } + } + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "id": "0x5bb44f02e7e8", + "kind": "FunctionDecl", + "loc": { + "offset": 48, + "line": 4, + "col": 5, + "tokLen": 4 + }, + "range": { + "begin": { + "offset": 44, + "col": 1, + "tokLen": 3 + }, + "end": { + "offset": 112, + "line": 8, + "col": 1, + "tokLen": 1 + } + }, + "name": "main", + "mangledName": "main", + "type": { + "qualType": "int ()" + }, + "inner": [ + { + "id": "0x5bb44f02ec50", + "kind": "CompoundStmt", + "range": { + "begin": { + "offset": 55, + "line": 4, + "col": 12, + "tokLen": 1 + }, + "end": { + "offset": 112, + "line": 8, + "col": 1, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02ea18", + "kind": "DeclStmt", + "range": { + "begin": { + "offset": 59, + "line": 5, + "col": 2, + "tokLen": 3 + }, + "end": { + "offset": 77, + "col": 20, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02e8d8", + "kind": "VarDecl", + "loc": { + "offset": 63, + "col": 6, + "tokLen": 1 + }, + "range": { + "begin": { + "offset": 59, + "col": 2, + "tokLen": 3 + }, + "end": { + "offset": 67, + "col": 10, + "tokLen": 2 + } + }, + "isUsed": true, + "name": "a", + "type": { + "qualType": "int" + }, + "init": "c", + "inner": [ + { + "id": "0x5bb44f02e940", + "kind": "IntegerLiteral", + "range": { + "begin": { + "offset": 67, + "col": 10, + "tokLen": 2 + }, + "end": { + "offset": 67, + "col": 10, + "tokLen": 2 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "value": "19" + } + ] + }, + { + "id": "0x5bb44f02e978", + "kind": "VarDecl", + "loc": { + "offset": 71, + "col": 14, + "tokLen": 1 + }, + "range": { + "begin": { + "offset": 59, + "col": 2, + "tokLen": 3 + }, + "end": { + "offset": 75, + "col": 18, + "tokLen": 2 + } + }, + "isUsed": true, + "name": "b", + "type": { + "qualType": "int" + }, + "init": "c", + "inner": [ + { + "id": "0x5bb44f02e9e0", + "kind": "IntegerLiteral", + "range": { + "begin": { + "offset": 75, + "col": 18, + "tokLen": 2 + }, + "end": { + "offset": 75, + "col": 18, + "tokLen": 2 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "value": "20" + } + ] + } + ] + }, + { + "id": "0x5bb44f02ec08", + "kind": "DeclStmt", + "range": { + "begin": { + "offset": 81, + "line": 6, + "col": 2, + "tokLen": 3 + }, + "end": { + "offset": 98, + "col": 19, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02ea48", + "kind": "VarDecl", + "loc": { + "offset": 85, + "col": 6, + "tokLen": 1 + }, + "range": { + "begin": { + "offset": 81, + "col": 2, + "tokLen": 3 + }, + "end": { + "offset": 97, + "col": 18, + "tokLen": 1 + } + }, + "name": "c", + "type": { + "qualType": "int" + }, + "init": "c", + "inner": [ + { + "id": "0x5bb44f02eba8", + "kind": "CallExpr", + "range": { + "begin": { + "offset": 89, + "col": 10, + "tokLen": 3 + }, + "end": { + "offset": 97, + "col": 18, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "inner": [ + { + "id": "0x5bb44f02eb90", + "kind": "ImplicitCastExpr", + "range": { + "begin": { + "offset": 89, + "col": 10, + "tokLen": 3 + }, + "end": { + "offset": 89, + "col": 10, + "tokLen": 3 + } + }, + "type": { + "qualType": "int (*)(int, int)" + }, + "valueCategory": "prvalue", + "castKind": "FunctionToPointerDecay", + "inner": [ + { + "id": "0x5bb44f02eb38", + "kind": "DeclRefExpr", + "range": { + "begin": { + "offset": 89, + "col": 10, + "tokLen": 3 + }, + "end": { + "offset": 89, + "col": 10, + "tokLen": 3 + } + }, + "type": { + "qualType": "int (int, int)" + }, + "valueCategory": "lvalue", + "referencedDecl": { + "id": "0x5bb44f02e5d8", + "kind": "FunctionDecl", + "name": "sum", + "type": { + "qualType": "int (int, int)" + } + } + } + ] + }, + { + "id": "0x5bb44f02ebd8", + "kind": "ImplicitCastExpr", + "range": { + "begin": { + "offset": 93, + "col": 14, + "tokLen": 1 + }, + "end": { + "offset": 93, + "col": 14, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "castKind": "LValueToRValue", + "inner": [ + { + "id": "0x5bb44f02eaf8", + "kind": "DeclRefExpr", + "range": { + "begin": { + "offset": 93, + "col": 14, + "tokLen": 1 + }, + "end": { + "offset": 93, + "col": 14, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "lvalue", + "referencedDecl": { + "id": "0x5bb44f02e8d8", + "kind": "VarDecl", + "name": "a", + "type": { + "qualType": "int" + } + } + } + ] + }, + { + "id": "0x5bb44f02ebf0", + "kind": "ImplicitCastExpr", + "range": { + "begin": { + "offset": 96, + "col": 17, + "tokLen": 1 + }, + "end": { + "offset": 96, + "col": 17, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "castKind": "LValueToRValue", + "inner": [ + { + "id": "0x5bb44f02eb18", + "kind": "DeclRefExpr", + "range": { + "begin": { + "offset": 96, + "col": 17, + "tokLen": 1 + }, + "end": { + "offset": 96, + "col": 17, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "lvalue", + "referencedDecl": { + "id": "0x5bb44f02e978", + "kind": "VarDecl", + "name": "b", + "type": { + "qualType": "int" + } + } + } + ] + } + ] + } + ] + } + ] + }, + { + "id": "0x5bb44f02ec40", + "kind": "ReturnStmt", + "range": { + "begin": { + "offset": 101, + "line": 7, + "col": 1, + "tokLen": 6 + }, + "end": { + "offset": 108, + "col": 8, + "tokLen": 1 + } + }, + "inner": [ + { + "id": "0x5bb44f02ec20", + "kind": "IntegerLiteral", + "range": { + "begin": { + "offset": 108, + "col": 8, + "tokLen": 1 + }, + "end": { + "offset": 108, + "col": 8, + "tokLen": 1 + } + }, + "type": { + "qualType": "int" + }, + "valueCategory": "prvalue", + "value": "0" + } + ] + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/astwgroq/astwgroq b/astwgroq/astwgroq new file mode 100644 index 0000000000000..ff015f8236a38 Binary files /dev/null and b/astwgroq/astwgroq differ diff --git a/astwgroq/astwgroq.cpp b/astwgroq/astwgroq.cpp new file mode 100644 index 0000000000000..238eed3798ae3 --- /dev/null +++ b/astwgroq/astwgroq.cpp @@ -0,0 +1,30 @@ +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return 1; + } + + const char* sourceFile = argv[1]; + const char* astFile = "ast.json"; + + // Run clang to dump AST to JSON + std::string cmd = "clang++ -Xclang -ast-dump=json -fsyntax-only "; + cmd += sourceFile; + cmd += " > "; + cmd += astFile; + std::system(cmd.c_str()); + + // Run Python script to print annotated output + std::string pythonCmd = "python3 main.py "; + pythonCmd += sourceFile; + pythonCmd += " "; + pythonCmd += astFile; + std::system(pythonCmd.c_str()); + + return 0; +} diff --git a/astwgroq/main.py b/astwgroq/main.py new file mode 100644 index 0000000000000..402efa20ecd60 --- /dev/null +++ b/astwgroq/main.py @@ -0,0 +1,85 @@ +import json +import sys +from collections import defaultdict +import os +from openai import OpenAI +from openai.types.chat import ChatCompletionMessageParam + +# ==== CONFIG ==== +GROQ_API_KEY = "" +GROQ_BASE_URL = "https://api.groq.com/openai/v1" +GROQ_MODEL = "llama3-70b-8192" +OUTPUT_FILE = "output.txt" + +client = OpenAI(api_key=GROQ_API_KEY, base_url=GROQ_BASE_URL) +explanation_cache = {} + +def explain_ast_node_kinds(kinds: list[str]) -> str: + if not kinds: + return "" + key = tuple(sorted(kinds)) + if key in explanation_cache: + return explanation_cache[key] + + prompt = f"Explain the following C++ AST nodes in simple terms: {', '.join(key)}." + messages: list[ChatCompletionMessageParam] = [{"role": "user", "content": prompt}] + + try: + response = client.chat.completions.create( + model=GROQ_MODEL, + messages=messages, + temperature=0.2 + ) + explanation = response.choices[0].message.content.strip() + except Exception as e: + explanation = f"(Groq error: {e})" + + explanation_cache[key] = explanation + return explanation + +def collect_ast_lines(node, line_map): + if isinstance(node, dict): + kind = node.get("kind") + for key in ["loc", "range"]: + if key in node and isinstance(node[key], dict): + loc = node[key].get("start") if key == "loc" else node[key].get("begin") + if isinstance(loc, dict): + line = loc.get("line") + if isinstance(line, int): + line_map[line].add(kind) + for val in node.values(): + if isinstance(val, (dict, list)): + collect_ast_lines(val, line_map) + elif isinstance(node, list): + for item in node: + collect_ast_lines(item, line_map) + +def annotate_file(source_path, ast_path): + with open(ast_path, "r") as f: + ast = json.load(f) + + line_map = defaultdict(set) + collect_ast_lines(ast, line_map) + + with open(source_path, "r") as src, open(OUTPUT_FILE, "w") as out: + for i, line in enumerate(src, start=1): + kinds = sorted(line_map[i]) if i in line_map else [] + annotation = ", ".join(kinds) + explanation = explain_ast_node_kinds(kinds) if kinds else "" + out.write(f"{i:>3}: {line.rstrip()} {'// ' + annotation if annotation else ''}\n") + if explanation: + out.write(f" >> {explanation}\n") + +def main(): + if len(sys.argv) != 3: + print("Usage: main.py ") + sys.exit(1) + + source_file, ast_file = sys.argv[1], sys.argv[2] + annotate_file(source_file, ast_file) + + with open(OUTPUT_FILE, "r") as f: + print(f.read()) + +if __name__ == "__main__": + main() diff --git a/astwgroq/output.txt b/astwgroq/output.txt new file mode 100644 index 0000000000000..5b31484920e23 --- /dev/null +++ b/astwgroq/output.txt @@ -0,0 +1,115 @@ + 1: int sum(int a, int b) { + 2: return a + b; // ReturnStmt + >> In C++, an `ReturnStmt` is an Abstract Syntax Tree (AST) node that represents a `return` statement. + +In simple terms, a `ReturnStmt` node says: "Hey, I'm done with this function! Give back control to the caller and optionally return a value." + +Here's a breakdown: + +* `Return` keyword: This is the `return` statement itself. +* `Stmt` suffix: This indicates that it's a statement, not an expression or a declaration. + +When a `ReturnStmt` node is executed, the program: + +1. Evaluates the expression (if any) associated with the `return` statement. +2. Returns the evaluated value (if any) or `void` if no value is specified. +3. Exits the current function or method. +4. Control is transferred back to the caller (the function or code that called the current function). + +Example C++ code: +```cpp +int add(int a, int b) { + return a + b; // ReturnStmt node +} +``` +In this example, the `ReturnStmt` node is created when the `return` statement is parsed. When executed, it evaluates the expression `a + b`, returns the result, and exits the `add` function. + 3: } + 4: int main() { // CompoundStmt + >> In C++, an Abstract Syntax Tree` (AST) is a tree-like data structure that represents the source code in a more structured and organized way. Each node in the tree represents a construct in the source code, such as a variable declaration, a function call, a loop, etc. + +Now, let's talk about the `CompoundStmt` node: + +**CompoundStmt (Compound Statement)** + +A `CompoundStmt` node represents a block of code that contains multiple statements enclosed in curly braces `{}`. In other words, it's a group of statements that are executed together. + +Here are some examples: +```cpp +if (x > 5) { + cout << "x is greater than 5"; + x = 10; +} +``` +In this example, the `if` statement` is a `CompoundStmt` node, which contains two statements: `cout << "x is greater than 5"` and `x = 10`. + +Another example: +```cpp +void myFunction() { + int x = 5; + cout << "Hello, World!"; +} +``` +Here, the `myFunction()` function is a `CompoundStmt` node, which contains two statements: `int x = 5` and `cout << "Hello, World!"`. + +In summary, a `CompoundStmt` node represents a block of code that contains multiple statements, enclosed in curly braces `{}`. + 5: int a = 19, b = 20; // DeclStmt + >> In C++, an Abstract Syntax Tree (AST) node represents a part of the source code. Here's a simple explanation of the `DeclStmt` node: + +**DeclStmt (Declaration Statement)** + +A `DeclStmt` node represents a statement that declares a variable, function, or other entity. In other words, it's a statement that introduces a new name into the program. + +Think of it like this: when you write a variable, like `int x;`, you're declaring a new variable named `x`. The `DeclStmt` node represents this declaration statement. + +Here are some examples of code that would generate a `DeclStmt` node: + +* `int y;` (declaring a variable `y`) +* `void foo();` (declaring a function `foo`) +* `class MyClass { };` (declaring a class `MyClass`) + +In each of these cases, the `DeclStmt` node would contain information about the declaration, such as the name, type, and any other relevant details. + +So, in summary, a `DeclStmt` node in the AST represents a statement that declares a new name or entity in the program. + 6: int c = sum(a, b); // DeclStmt + >> In C++, an Abstract Syntax Tree (AST) node represents a part of the source code. Here's a simple explanation of the `DeclStmt` node: + +**DeclStmt (Declaration Statement)** + +A `DeclStmt` node represents a statement that declares a variable, function, or other entity. In other words, it's a statement that introduces a new name into the program. + +Think of it like this: when you write a variable, like `int x;`, you're declaring a new variable named `x`. The `DeclStmt` node represents this declaration statement. + +Here are some examples of code that would generate a `DeclStmt` node: + +* `int y;` (declaring a variable `y`) +* `void foo();` (declaring a function `foo`) +* `class MyClass { };` (declaring a class `MyClass`) + +In each of these cases, the `DeclStmt` node would contain information about the declaration, such as the name, type, and any other relevant details. + +So, in summary, a `DeclStmt` node in the AST represents a statement that declares a new name or entity in the program. + 7: return 0; // ReturnStmt + >> In C++, an `ReturnStmt` is an Abstract Syntax Tree (AST) node that represents a `return` statement. + +In simple terms, a `ReturnStmt` node says: "Hey, I'm done with this function! Give back control to the caller and optionally return a value." + +Here's a breakdown: + +* `Return` keyword: This is the `return` statement itself. +* `Stmt` suffix: This indicates that it's a statement, not an expression or a declaration. + +When a `ReturnStmt` node is executed, the program: + +1. Evaluates the expression (if any) associated with the `return` statement. +2. Returns the evaluated value (if any) or `void` if no value is specified. +3. Exits the current function or method. +4. Control is transferred back to the caller (the function or code that called the current function). + +Example C++ code: +```cpp +int add(int a, int b) { + return a + b; // ReturnStmt node +} +``` +In this example, the `ReturnStmt` node is created when the `return` statement is parsed. When executed, it evaluates the expression `a + b`, returns the result, and exits the `add` function. + 8: } diff --git a/astwgroq/screeshot of result.png b/astwgroq/screeshot of result.png new file mode 100644 index 0000000000000..401458b0af2c7 Binary files /dev/null and b/astwgroq/screeshot of result.png differ diff --git a/astwgroq/source.cpp b/astwgroq/source.cpp new file mode 100644 index 0000000000000..b8d9337c2fa00 --- /dev/null +++ b/astwgroq/source.cpp @@ -0,0 +1,8 @@ +int sum(int a, int b) { + return a + b; +} +int main() { + int a = 19, b = 20; + int c = sum(a, b); +return 0; +}