Skip to content

Commit 730c9a2

Browse files
committed
Format change and policy title prefix
Change the parsers expected format to be something less complex than JSON. Also add example policy prefixes.
1 parent 85924b0 commit 730c9a2

File tree

6 files changed

+334
-92
lines changed

6 files changed

+334
-92
lines changed

aws_doc_sdk_examples_tools/agent/bin/main.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import typer
66

7-
from aws_doc_sdk_examples_tools.agent.make_prompts import main as make_prompts
8-
from aws_doc_sdk_examples_tools.agent.parse_json_files import main as parse_json_files
9-
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update as update_doc_gen
7+
from aws_doc_sdk_examples_tools.agent.make_prompts import make_prompts
8+
from aws_doc_sdk_examples_tools.agent.process_ailly_files import process_ailly_files
9+
from aws_doc_sdk_examples_tools.agent.update_doc_gen import update_doc_gen
1010
from aws_doc_sdk_examples_tools.yaml_writer import prepare_write, write_many
1111

1212
app = typer.Typer()
@@ -16,26 +16,28 @@
1616
IAM_UPDATES_PATH = AILLY_DIR_PATH / "iam_updates.json"
1717

1818

19-
def get_ailly_files(dir: Path) -> List[Path]:
20-
return [
21-
file
22-
for file in dir.iterdir()
23-
if file.is_file() and file.name.endswith(".ailly.md")
24-
]
25-
26-
2719
@app.command()
2820
def update(iam_tributary_root: str, system_prompts: List[str] = []) -> None:
21+
"""
22+
Generate new IAM policy metadata for a tributary.
23+
"""
2924
doc_gen_root = Path(iam_tributary_root)
3025
make_prompts(
31-
doc_gen_root=doc_gen_root, system_prompts=system_prompts, out=AILLY_DIR_PATH
26+
doc_gen_root=doc_gen_root,
27+
system_prompts=system_prompts,
28+
out_dir=AILLY_DIR_PATH,
29+
language="IAMPolicyGrammar",
3230
)
3331
run(["npx", "@ailly/cli", "--root", AILLY_DIR])
34-
file_paths = get_ailly_files(AILLY_DIR_PATH)
35-
parse_json_files(file_paths=file_paths, out=IAM_UPDATES_PATH)
32+
33+
process_ailly_files(
34+
input_dir=str(AILLY_DIR_PATH), output_file=str(IAM_UPDATES_PATH)
35+
)
36+
3637
doc_gen = update_doc_gen(
3738
doc_gen_root=doc_gen_root, iam_updates_path=IAM_UPDATES_PATH
3839
)
40+
3941
writes = prepare_write(doc_gen.examples)
4042
write_many(doc_gen_root, writes)
4143

aws_doc_sdk_examples_tools/agent/make_prompts.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,13 @@ def make_doc_gen(root: Path) -> DocGen:
1919
return doc_gen
2020

2121

22-
def write_prompts(doc_gen: DocGen, out: Path) -> None:
23-
out.mkdir(parents=True, exist_ok=True)
22+
def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
2423
examples = doc_gen.examples
2524
snippets = doc_gen.snippets
2625
for example_id, example in examples.items():
27-
# Postfix with `.md` so Ailly will pick it up.
28-
prompt_path = out / f"{example_id}.md"
29-
# This assumes we're running DocGen specifically on AWSIAMPolicyExampleReservoir.
26+
prompt_path = out_dir / f"{example_id}.md"
3027
snippet_key = (
31-
example.languages["IAMPolicyGrammar"]
28+
example.languages[language]
3229
.versions[0]
3330
.excerpts[0]
3431
.snippet_files[0]
@@ -38,7 +35,7 @@ def write_prompts(doc_gen: DocGen, out: Path) -> None:
3835
prompt_path.write_text(snippet.code, encoding="utf-8")
3936

4037

41-
def setup_ailly(system_prompts: List[str], out: Path) -> None:
38+
def setup_ailly(system_prompts: List[str], out_dir: Path) -> None:
4239
"""Create the .aillyrc configuration file."""
4340
fence = "---"
4441
options = {"isolated": "true"}
@@ -47,32 +44,33 @@ def setup_ailly(system_prompts: List[str], out: Path) -> None:
4744

4845
content = f"{fence}\n{options_block}\n{fence}\n{prompts_block}"
4946

50-
aillyrc_path = out / ".aillyrc"
51-
aillyrc_path.parent.mkdir(parents=True, exist_ok=True)
47+
aillyrc_path = out_dir / ".aillyrc"
5248
aillyrc_path.write_text(content, encoding="utf-8")
5349

5450

55-
def read_system_prompts(values: List[str]) -> List[str]:
56-
"""Parse system prompts from a list of strings or file paths."""
57-
prompts = []
51+
def read_files(values: List[str]) -> List[str]:
52+
"""Read contents of files into a list of file contents."""
53+
contents = []
5854
for value in values:
5955
if os.path.isfile(value):
6056
with open(value, "r", encoding="utf-8") as f:
61-
prompts.append(f.read())
57+
contents.append(f.read())
6258
else:
63-
prompts.append(value)
64-
return prompts
59+
contents.append(value)
60+
return contents
6561

6662

6763
def validate_root_path(doc_gen_root: Path):
68-
assert "AWSIAMPolicyExampleReservoir" in str(doc_gen_root)
6964
assert doc_gen_root.is_dir()
7065

7166

72-
def main(doc_gen_root: Path, system_prompts: List[str], out: Path) -> None:
67+
def make_prompts(
68+
doc_gen_root: Path, system_prompts: List[str], out_dir: Path, language: str
69+
) -> None:
7370
"""Generate prompts and configuration files for Ailly."""
74-
system_prompts = read_system_prompts(system_prompts)
75-
setup_ailly(system_prompts, out)
7671
validate_root_path(doc_gen_root)
72+
out_dir.mkdir(parents=True, exist_ok=True)
73+
system_prompts = read_files(system_prompts)
74+
setup_ailly(system_prompts, out_dir)
7775
doc_gen = make_doc_gen(doc_gen_root)
78-
write_prompts(doc_gen, out)
76+
write_prompts(doc_gen=doc_gen, out_dir=out_dir, language=language)

aws_doc_sdk_examples_tools/agent/parse_json_files.py

Lines changed: 0 additions & 56 deletions
This file was deleted.
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""
2+
Parse generated Ailly output for key: value pairs.
3+
4+
This module processes *.md.ailly.md files, extracts key-value pairs,
5+
converts them to JSON entries in an array, and writes the JSON array
6+
to a specified output file.
7+
"""
8+
9+
import json
10+
import logging
11+
from pathlib import Path
12+
from typing import Any, Dict, List, Set
13+
14+
logging.basicConfig(level=logging.WARNING)
15+
logger = logging.getLogger(__name__)
16+
17+
EXPECTED_KEYS: Set[str] = set(["title", "title_abbrev"])
18+
VALUE_PREFIXES: Dict[str, str] = {
19+
"title": "Example policy: ",
20+
"title_abbrev": "Example: ",
21+
}
22+
23+
24+
class MissingExpectedKeys(Exception):
25+
pass
26+
27+
28+
def parse_fenced_blocks(content: str, fence="===") -> List[List[str]]:
29+
blocks = []
30+
inside_fence = False
31+
current_block: List[str] = []
32+
33+
for line in content.splitlines():
34+
if line.strip() == fence:
35+
if inside_fence:
36+
blocks.append(current_block)
37+
current_block = []
38+
inside_fence = not inside_fence
39+
elif inside_fence:
40+
current_block.append(line)
41+
42+
return blocks
43+
44+
45+
def parse_block_lines(
46+
block: List[str], key_pairs: Dict[str, str], expected_keys=EXPECTED_KEYS
47+
):
48+
for line in block:
49+
if "=>" in line:
50+
parts = line.split("=>", 1)
51+
key = parts[0].strip()
52+
value = parts[1].strip() if len(parts) > 1 else ""
53+
key_pairs[key] = value
54+
if missing_keys := expected_keys - key_pairs.keys():
55+
raise MissingExpectedKeys(missing_keys)
56+
57+
58+
def parse_ailly_file(
59+
file_path: str, value_prefixes: Dict[str, str] = VALUE_PREFIXES
60+
) -> Dict[str, Any]:
61+
"""
62+
Parse an .md.ailly.md file and extract key-value pairs that are between === fence markers. Each
63+
key value pair is assumed to be on one line and in the form of `key => value`. This formatting is
64+
totally dependent on the LLM output written by Ailly.
65+
66+
Args:
67+
file_path: Path to the .md.ailly.md file
68+
69+
Returns:
70+
Dictionary containing the extracted key-value pairs
71+
"""
72+
result: Dict[str, str] = {}
73+
74+
try:
75+
with open(file_path, "r", encoding="utf-8") as file:
76+
content = file.read()
77+
78+
blocks = parse_fenced_blocks(content)
79+
80+
for block in blocks:
81+
parse_block_lines(block, result)
82+
83+
for key, prefix in value_prefixes.items():
84+
if key in result:
85+
result[key] = f"{prefix}{result[key]}"
86+
87+
result["id"] = Path(file_path).name.split(".md.ailly.md")[0]
88+
result["_source_file"] = file_path
89+
90+
except Exception as e:
91+
logger.error(f"Error parsing file {file_path}", exc_info=e)
92+
93+
return result
94+
95+
96+
def process_ailly_files(
97+
input_dir: str, output_file: str, file_pattern: str = "*.md.ailly.md"
98+
) -> None:
99+
"""
100+
Process all .md.ailly.md files in the input directory and write the results as JSON to the output file.
101+
102+
Args:
103+
input_dir: Directory containing .md.ailly.md files
104+
output_file: Path to the output JSON file
105+
file_pattern: Pattern to match files (default: "*.md.ailly.md")
106+
"""
107+
results = []
108+
input_path = Path(input_dir)
109+
110+
try:
111+
for file_path in input_path.glob(file_pattern):
112+
logger.info(f"Processing file: {file_path}")
113+
parsed_data = parse_ailly_file(str(file_path))
114+
if parsed_data:
115+
results.append(parsed_data)
116+
117+
with open(output_file, "w", encoding="utf-8") as out_file:
118+
json.dump(results, out_file, indent=2)
119+
120+
logger.info(
121+
f"Successfully processed {len(results)} files. Output written to {output_file}"
122+
)
123+
124+
except Exception as e:
125+
logger.error("Error processing files", exc_info=e)

0 commit comments

Comments
 (0)