22
33import logging
44import os
5+ import yaml
56from pathlib import Path
67from typing import List
7- import yaml
8-
9- from aws_doc_sdk_examples_tools .doc_gen import DocGen , Snippet
108
11- DEFAULT_METADATA_PREFIX = "[DEFAULT]"
9+ from aws_doc_sdk_examples_tools . doc_gen import DocGen
1210
11+ DEFAULT_METADATA_PREFIX = "DEFAULT"
1312
14- # Setup logging
15- logging .basicConfig (level = logging .INFO )
1613logger = logging .getLogger (__name__ )
1714
1815
@@ -26,6 +23,8 @@ def make_doc_gen(root: Path) -> DocGen:
2623def write_prompts (doc_gen : DocGen , out_dir : Path , language : str ) -> None :
2724 examples = doc_gen .examples
2825 snippets = doc_gen .snippets
26+
27+ filtered_examples = []
2928 for example_id , example in examples .items ():
3029 # TCXContentAnalyzer prefixes new metadata title/title_abbrev entries with
3130 # the DEFAULT_METADATA_PREFIX. Checking this here to make sure we're only
@@ -35,30 +34,56 @@ def write_prompts(doc_gen: DocGen, out_dir: Path, language: str) -> None:
3534 if title .startswith (DEFAULT_METADATA_PREFIX ) and title_abbrev .startswith (
3635 DEFAULT_METADATA_PREFIX
3736 ):
38- prompt_path = out_dir / f"{ example_id } .md"
39- snippet_key = (
40- example .languages [language ]
41- .versions [0 ]
42- .excerpts [0 ]
43- .snippet_files [0 ]
44- .replace ("/" , "." )
45- )
46- snippet = snippets [snippet_key ]
47- prompt_path .write_text (snippet .code , encoding = "utf-8" )
37+ filtered_examples .append ((example_id , example ))
38+
39+ batch_size = 150
40+ total_examples = len (filtered_examples )
41+ num_batches = (total_examples + batch_size - 1 ) // batch_size
42+
43+ logger .info (
44+ f"Splitting { total_examples } examples into { num_batches } batches of { batch_size } "
45+ )
46+
47+ for batch_num in range (num_batches ):
48+ batch_dir = out_dir / f"batch_{ (batch_num + 1 ):03} "
49+ batch_dir .mkdir (exist_ok = True )
50+
51+ start_idx = batch_num * batch_size
52+ end_idx = min ((batch_num + 1 ) * batch_size , total_examples )
53+
54+ for i in range (start_idx , end_idx ):
55+ example_id , example = filtered_examples [i ]
56+ prompt_path = batch_dir / f"{ example_id } .md"
57+
58+ try :
59+ snippet_key = (
60+ example .languages [language ]
61+ .versions [0 ]
62+ .excerpts [0 ]
63+ .snippet_files [0 ]
64+ .replace ("/" , "." )
65+ )
66+ snippet = snippets [snippet_key ]
67+ prompt_path .write_text (snippet .code , encoding = "utf-8" )
68+ except (KeyError , IndexError , AttributeError ) as e :
69+ logger .warning (f"Error processing example { example_id } : { e } " )
4870
4971
5072def setup_ailly (system_prompts : List [str ], out_dir : Path ) -> None :
5173 """Create the .aillyrc configuration file."""
5274 fence = "---"
5375 options = {
5476 "isolated" : "true" ,
55- "mcp" : {
56- "awslabs.aws-documentation-mcp-server" : {
57- "type" : "stdio" ,
58- "command" : "uvx" ,
59- "args" : ["awslabs.aws-documentation-mcp-server@latest" ],
60- }
61- },
77+ "overwrite" : "true"
78+ # MCP assistance did not produce noticeably different results, but it was
79+ # slowing things down by 10x. Disabled for now.
80+ # "mcp": {
81+ # "awslabs.aws-documentation-mcp-server": {
82+ # "type": "stdio",
83+ # "command": "uvx",
84+ # "args": ["awslabs.aws-documentation-mcp-server@latest"],
85+ # }
86+ # },
6287 }
6388 options_block = yaml .dump (options ).strip ()
6489 prompts_block = "\n " .join (system_prompts )
0 commit comments