Skip to content

Commit d381ceb

Browse files
author
naman-msft
committed
ada conversion in progress
1 parent 7152af7 commit d381ceb

File tree

45 files changed

+3977
-115
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3977
-115
lines changed

tools/abc.py

Lines changed: 73 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,87 @@
1-
from pathlib import Path
2-
import sys
31
import os
4-
import shutil
52
import re
3+
from pathlib import Path
4+
# filepath: [abc.py](http://_vscodecontentref_/1)
5+
def count_code_blocks(file_path):
6+
"""Count the number of code blocks (```) in a markdown file."""
7+
try:
8+
with open(file_path, 'r', encoding='utf-8') as f:
9+
content = f.read()
10+
11+
# Count opening triple backticks
12+
# This regex matches ``` with optional leading whitespace
13+
code_blocks = re.findall(r'^\s*```', content, re.MULTILINE)
14+
return len(code_blocks)
15+
except Exception as e:
16+
print(f"Error reading {file_path}: {e}")
17+
return -1
618

7-
def check_and_rename_folders():
8-
# Get the directory where the script is located
9-
script_dir = Path(sys.path[0])
19+
def find_markdown_files(root_dir):
20+
"""Find all markdown files in the directory tree."""
21+
markdown_files = []
1022

11-
# Define the untested folder path
12-
untested_dir = script_dir / "untested"
23+
for root, dirs, files in os.walk(root_dir):
24+
for file in files:
25+
if file.endswith('.md'):
26+
file_path = os.path.join(root, file)
27+
markdown_files.append(file_path)
1328

14-
if not untested_dir.exists():
15-
print(f"Error: Untested directory not found at {untested_dir}")
29+
return markdown_files
30+
31+
def main():
32+
# Get the untested folder path
33+
untested_dir = "untested"
34+
35+
if not os.path.exists(untested_dir):
36+
print(f"Error: '{untested_dir}' folder not found!")
1637
return
17-
18-
print(f"Checking folders in {untested_dir}")
1938

20-
# Get all subfolders
21-
subfolders = [f for f in untested_dir.iterdir() if f.is_dir()]
39+
# Find all markdown files
40+
print(f"Scanning for markdown files in '{untested_dir}'...")
41+
markdown_files = find_markdown_files(untested_dir)
2242

23-
if not subfolders:
24-
print("No subfolders found in the untested directory.")
43+
if not markdown_files:
44+
print("No markdown files found!")
2545
return
2646

27-
print(f"Found {len(subfolders)} subfolders.")
47+
print(f"Found {len(markdown_files)} markdown files\n")
2848

29-
# Process each subfolder
30-
for folder in subfolders:
31-
original_name = folder.name
32-
33-
# Check if folder is empty
34-
files = list(folder.iterdir())
35-
if not files:
36-
print(f"⚠️ Empty folder detected: {original_name}")
37-
continue
38-
39-
# Remove the "..." from folder name if present
40-
if original_name.endswith("..."):
41-
new_name = original_name[:-3] # Remove the trailing "..."
42-
new_folder_path = folder.parent / new_name
43-
44-
try:
45-
# Create temporary folder name to avoid potential conflicts
46-
temp_folder_path = folder.parent / f"temp_{original_name}"
47-
folder.rename(temp_folder_path)
48-
temp_folder_path.rename(new_folder_path)
49-
print(f"✓ Renamed folder: {original_name}{new_name}")
50-
# Update the folder reference to the new path
51-
folder = new_folder_path
52-
except Exception as e:
53-
print(f"✗ Error renaming folder {original_name}: {str(e)}")
54-
continue
49+
# Count code blocks in each file
50+
file_stats = []
51+
for file_path in markdown_files:
52+
count = count_code_blocks(file_path)
53+
if count >= 0: # Only include files that were successfully read
54+
# Get relative path for cleaner display
55+
relative_path = os.path.relpath(file_path, untested_dir)
56+
file_stats.append((relative_path, count))
57+
58+
# Sort by code block count (ascending)
59+
file_stats.sort(key=lambda x: x[1])
60+
61+
# Display all files ranked by code blocks
62+
print("All markdown files ranked by number of code blocks (lowest to highest):")
63+
print("-" * 100)
64+
print(f"{'Rank':<6} {'Code Blocks':<12} {'File Path'}")
65+
print("-" * 100)
66+
67+
for i, (file_path, count) in enumerate(file_stats, 1):
68+
print(f"{i:<6} {count:<12} {file_path}")
69+
70+
# Show some statistics
71+
print("\n" + "-" * 100)
72+
print("Statistics:")
73+
print(f"- Total files analyzed: {len(file_stats)}")
74+
if file_stats:
75+
print(f"- Files with 0 code blocks: {sum(1 for _, count in file_stats if count == 0)}")
76+
print(f"- Average code blocks per file: {sum(count for _, count in file_stats) / len(file_stats):.2f}")
77+
print(f"- Maximum code blocks in a file: {max(count for _, count in file_stats)}")
5578

56-
# Rename files within the folder to match folder name
57-
for file_path in folder.iterdir():
58-
if file_path.is_file():
59-
file_extension = file_path.suffix
60-
new_file_name = f"{folder.name}{file_extension}"
61-
new_file_path = folder / new_file_name
62-
63-
# Skip if the filename already matches
64-
if file_path.name == new_file_name:
65-
continue
66-
67-
try:
68-
file_path.rename(new_file_path)
69-
print(f" ✓ Renamed file: {file_path.name}{new_file_name}")
70-
except Exception as e:
71-
print(f" ✗ Error renaming file {file_path.name}: {str(e)}")
79+
# Group by code block count
80+
print("\nDistribution:")
81+
from collections import Counter
82+
count_distribution = Counter(count for _, count in file_stats)
83+
for blocks, num_files in sorted(count_distribution.items()):
84+
print(f" {blocks} code blocks: {num_files} file(s)")
7285

7386
if __name__ == "__main__":
74-
check_and_rename_folders()
75-
print("Processing complete!")
87+
main()

tools/ada.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,14 @@
186186
**Example:**
187187
188188
```bash
189-
export RANDOM_SUFFIX=$(openssl rand -hex 3)
189+
export RANDOM_SUFFIX=$(head -c 3 /dev/urandom | xxd -p)
190190
export REGION="eastus"
191191
az group create --name "MyResourceGroup$RANDOM_SUFFIX" --location $REGION
192192
```
193193
194194
>**Note:** Add a random suffix to relevant variables that are likely to be unique for each deployment, such as resource group names, VM names, and other resources that need to be uniquely identifiable. However, do not add a random suffix to variables that are constant or environment-specific, such as region, username, or configuration settings that do not change between deployments.
195195
196-
>**Note:** You can generate your own random suffix or use the one provided in the example above. The `openssl rand -hex 3` command generates a random 3-character hexadecimal string. This string is then appended to the resource group name to ensure that the resource group name is unique for each deployment.
196+
>**Note:** You can generate your own random suffix or use the one provided in the example above. The `head -c 3 /dev/urandom | xxd -p` command generates a random 3-character hexadecimal string. This string is then appended to the resource group name to ensure that the resource group name is unique for each deployment.
197197
198198
14. In Exec Docs, result blocks are distinguished by a custom expected_similarity comment tag followed by a code block. These result blocks indicate to Innovation Engine what the minimum degree of similarity should be between the actual and the expected output of a code block (one which returns something in the terminal that is relevant to benchmark against). Learn More: [Result Blocks](https://github.com/Azure/InnovationEngine/blob/main/README.md#result-blocks).
199199
@@ -227,7 +227,6 @@
227227
"type": "Microsoft.Resources/resourceGroups"
228228
}}
229229
```
230-
```
231230
- If you run into an error while executing a code block or the code block is running in an infinite loop, update the Exec Doc based on the error stack trace, restart/clear Cloudshell, and rerun the command block(s) from the start until you reach that command block. This is done to override any potential issues that may have occurred during the initial run. More guidance is given in the [FAQ section](#frequently-asked-questions-faqs) below.
232231
233232
>**Note:** The expected similarity value is a percentage of similarity between 0 and 1 which specifies how closely the true output needs to match the template output given in the results block - 0 being no similarity, 1 being an exact match. If you are uncertain about the value, it is recommended to set the expected similarity to 0.3 i.e. 30% expected similarity to account for small variations. Once you have run the command multiple times and are confident that the output is consistent, you can adjust the expected similarity value accordingly.
@@ -273,7 +272,7 @@
273272
17. If the original document lists a prerequisite resource (such as an AKS cluster, VM, storage account, etc.), you MUST NOT add any new commands to create that resource in the Exec Doc.
274273
275274
- **Example:** If the doc says "This article assumes you have an existing AKS cluster," do NOT add `az aks create` or any equivalent cluster creation commands. Only include steps for interacting with or managing the existing resource.
276-
- This rule applies to any resource type, not just AKS. Always respect explicit prerequisites and never override them by adding creation steps.
275+
- This rule applies to any resource type, not just AKS. Always respect explicit prerequisites and never override them by adding creation steps for that resource.
277276
- If the prerequisite is stated in any form (e.g., "Before you begin, create a resource group"), treat that resource as pre-existing and do not add creation commands for it.
278277
- If you are unsure whether a resource should be created, always preserve the prerequisite as stated and avoid introducing creation commands for that resource.
279278
@@ -1830,7 +1829,7 @@ def main():
18301829
input_type = 'file'
18311830
with open(user_input, "r") as f:
18321831
input_content = f.read()
1833-
input_content = f"CONVERT THE FOLLOWING EXISTING DOCUMENT INTO AN EXEC DOC. THIS IS A CONVERSION TASK, NOT CREATION FROM SCRATCH. DON'T EXPLAIN WHAT YOU ARE DOING BEHIND THE SCENES INSIDE THE DOC. PRESERVE ALL ORIGINAL CONTENT, STRUCTURE, AND NARRATIVE OUTSIDE OF CODE BLOCKS. CRITICALLY IMPORTANT: NEVER CHANGE THE LANGUAGE TYPE OF CODE BLOCKS (e.g., from 'shell' to 'bash'). KEEP THE EXACT SAME LANGUAGE IDENTIFIER AFTER TRIPLE BACKTICKS AS IN THE ORIGINAL DOCUMENT:\n\n{input_content}"
1832+
input_content = f"CONVERT THE FOLLOWING EXISTING DOCUMENT INTO AN EXEC DOC. THIS IS A CONVERSION TASK, NOT CREATION FROM SCRATCH. DON'T EXPLAIN WHAT YOU ARE DOING BEHIND THE SCENES INSIDE THE DOC. PRESERVE ALL ORIGINAL CONTENT, STRUCTURE, AND NARRATIVE OUTSIDE OF CODE BLOCKS. CRITICALLY IMPORTANT: NEVER CHANGE THE LANGUAGE TYPE OF CODE BLOCKS. KEEP THE EXACT SAME LANGUAGE IDENTIFIER AFTER TRIPLE BACKTICKS AS IN THE ORIGINAL DOCUMENT:\n\n{input_content}"
18341833
# We'll generate dependency files later in the process
18351834
dependency_files = []
18361835
generate_deps = input("\nMake new files referenced in the doc for its execution? (y/n): ").lower() == 'y'
@@ -2129,6 +2128,7 @@ def main():
21292128
success = False
21302129
dependency_files_generated = False
21312130
additional_instruction = ""
2131+
user_edited_content = None # Add this line to initialize the flag
21322132

21332133
while attempt <= max_attempts:
21342134
iteration_start_time = time.time()
@@ -2150,6 +2150,7 @@ def main():
21502150
f"or export MY_LOCATION=\\\"eastus2\\\"). The primary goal is to use the user's *names*."
21512151
)
21522152

2153+
21532154
if attempt == 1:
21542155
print_header(f"Attempt {attempt}: Generating Exec Doc", "-")
21552156

@@ -2178,42 +2179,50 @@ def main():
21782179
else:
21792180
print_header(f"Attempt {attempt}: Fixing Exec Doc", "-")
21802181

2181-
# Analyze if the error is in the main doc or in dependency files
2182-
error_analysis = analyze_error(errors_text, dependency_files)
2183-
2184-
if error_analysis["type"] == "dependency_file" and error_analysis["file"]:
2185-
# If error is in a dependency file, try to fix it
2186-
dep_file = error_analysis["file"]
2187-
print_message(f"\nDetected issue in dependency file: {dep_file['filename']}")
2188-
update_dependency_file(dep_file, error_analysis["message"], output_file)
2189-
made_dependency_change = True # Set the flag
2190-
else:
2191-
# If error is in main doc or unknown, update the main doc
2192-
user_prompt_for_fix = (
2193-
f"The following error(s) have occurred during testing:\n{errors_text}\n{additional_instruction}\n\n"
2194-
f"Please carefully analyze these errors and make necessary corrections to the document to prevent them "
2195-
f"from happening again. Try to find different solutions if the same errors keep occurring. \n"
2196-
f"IMPORTANT: NEVER change the code block language types (e.g., do not change 'shell' to 'bash'). "
2197-
f"Keep the exact same language identifier after triple backticks as in the current document."
2198-
f"{llm_variable_instruction}" # Add variable instruction here as well
2199-
f"\nGiven that context, please think hard and don't hurry. I want you to correct the converted document "
2200-
f"in ALL instances where this error has been or can be found. Then, correct ALL other errors apart "
2201-
f"from this that you see in the doc. ONLY GIVE THE UPDATED DOC, NOTHING ELSE"
2202-
)
2203-
response = client.chat.completions.create(
2204-
model=deployment_name,
2205-
messages=[
2206-
{"role": "system", "content": system_prompt},
2207-
{"role": "user", "content": input_content},
2208-
{"role": "assistant", "content": output_file_content},
2209-
{"role": "user", "content": user_prompt_for_fix}
2210-
]
2211-
)
2212-
output_file_content = response.choices[0].message.content
2213-
2182+
# Check if this is a retry after user feedback with document edits only
2183+
if attempt > 1 and 'user_edited_content' in locals() and user_edited_content:
2184+
print_message("\nUsing your directly edited version without AI modifications...")
2185+
output_file_content = user_edited_content
22142186
with open(output_file, "w") as f:
22152187
f.write(output_file_content)
2216-
2188+
# Clear the flag
2189+
user_edited_content = None
2190+
else:
2191+
# Analyze if the error is in the main doc or in dependency files
2192+
error_analysis = analyze_error(errors_text, dependency_files)
2193+
2194+
if error_analysis["type"] == "dependency_file" and error_analysis["file"]:
2195+
# If error is in a dependency file, try to fix it
2196+
dep_file = error_analysis["file"]
2197+
print_message(f"\nDetected issue in dependency file: {dep_file['filename']}")
2198+
update_dependency_file(dep_file, error_analysis["message"], output_file)
2199+
made_dependency_change = True # Set the flag
2200+
else:
2201+
# If error is in main doc or unknown, update the main doc
2202+
user_prompt_for_fix = (
2203+
f"The following error(s) have occurred during testing:\n{errors_text}\n{additional_instruction}\n\n"
2204+
f"Please carefully analyze these errors and make necessary corrections to the document to prevent them "
2205+
f"from happening again. Try to find different solutions if the same errors keep occurring. \n"
2206+
f"IMPORTANT: NEVER change the code block language types "
2207+
f"Keep the exact same language identifier after triple backticks as in the current document."
2208+
f"{llm_variable_instruction}" # Add variable instruction here as well
2209+
f"\nGiven that context, please think hard and don't hurry. I want you to correct the converted document "
2210+
f"in ALL instances where this error has been or can be found. Then, correct ALL other errors apart "
2211+
f"from this that you see in the doc. ONLY GIVE THE UPDATED DOC, NOTHING ELSE"
2212+
)
2213+
response = client.chat.completions.create(
2214+
model=deployment_name,
2215+
messages=[
2216+
{"role": "system", "content": system_prompt},
2217+
{"role": "user", "content": input_content},
2218+
{"role": "assistant", "content": output_file_content},
2219+
{"role": "user", "content": user_prompt_for_fix}
2220+
]
2221+
)
2222+
output_file_content = response.choices[0].message.content
2223+
2224+
with open(output_file, "w") as f:
2225+
f.write(output_file_content)
22172226
# Check if we need to regenerate dependency files after updating main doc
22182227
if generate_deps and dependency_files_generated:
22192228
# Regenerate dependency files if major changes were made to the main doc
@@ -2235,9 +2244,9 @@ def main():
22352244
var_names = extract_aks_env_vars(output_file)
22362245
ie_cmd = [
22372246
"ie", "execute", output_file,
2238-
"--var", f"{var_names['resource_group']}=aks-tc-rged8996",
2239-
"--var", f"{var_names['cluster_name']}=aks-tc-clustered8996",
2240-
"--var", f"{var_names['region']}=canadacentral"
2247+
"--var", f"{var_names['resource_group']}=aks-rg",
2248+
"--var", f"{var_names['cluster_name']}=aks-cluster",
2249+
"--var", f"{var_names['region']}=eastus2"
22412250
]
22422251
else:
22432252
print_header(f"Running Innovation Engine tests", "-")
@@ -2268,8 +2277,8 @@ def main():
22682277
iteration_file,
22692278
attempt,
22702279
"", # No errors in successful run
2271-
iteration_start_time,
2272-
True
2280+
start_time,
2281+
True # Assume success
22732282
)
22742283
all_iterations_data.append(iteration_data)
22752284

@@ -2368,7 +2377,7 @@ def main():
23682377
iteration_file,
23692378
attempt,
23702379
iteration_errors_text, # Only errors from this iteration
2371-
iteration_start_time,
2380+
start_time,
23722381
False
23732382
)
23742383
all_iterations_data.append(iteration_data)
@@ -2442,6 +2451,9 @@ def main():
24422451
# Only document edits - no need to call LLM again
24432452
revised_content = feedback["doc_edit"]
24442453

2454+
# Set a flag for the next iteration to use this content directly
2455+
user_edited_content = revised_content
2456+
24452457
# Just use the user's edited version directly
24462458
output_file_content = revised_content
24472459

@@ -2513,5 +2525,5 @@ def main():
25132525

25142526
print_message(f"\nThe updated file is stored at: {output_file}\n")
25152527

2516-
# if __name__ == "__main__":
2517-
# main()
2528+
if __name__ == "__main__":
2529+
main()

0 commit comments

Comments
 (0)