@@ -18,25 +18,44 @@ class IndentedDumper(yaml.Dumper):
1818 def increase_indent (self , flow = False , indentless = False ):
1919 return super (IndentedDumper , self ).increase_indent (flow , False )
2020
21- def extract_from_file (file_path , trigger_string ):
21+ def extract_from_file (file_path , triggers ):
2222 """
23- Helper function to read a file and extract the first code block after the trigger.
24- Returns a list containing the first code block (string) or None if no trigger found.
23+ Helper function to read a file and extract code blocks after the triggers.
24+ Returns a combined list of code blocks (strings) found for all triggers,
25+ deduplicated by their position in the file.
2526 """
2627 try :
2728 with open (file_path , "r" , encoding = "utf-8" ) as f :
2829 content = f .read ()
2930
30- if trigger_string in content :
31- # Split and take content after the trigger
32- relevant_content = content .split (trigger_string , 1 )[1 ]
33-
34- # Regex for code blocks
35- code_block_pattern = re .compile (r"```(?:\w+)?\n(.*?)```" , re .DOTALL )
36- match = code_block_pattern .search (relevant_content )
37-
38- return [match .group (1 ).strip ()] if match else []
39- return None
31+ unique_blocks = {} # Map start_index -> block_content
32+
33+ # Ensure triggers is a list
34+ if isinstance (triggers , str ):
35+ triggers = [triggers ]
36+
37+ for trigger in triggers :
38+ # Find the first occurrence of the trigger
39+ # We use re.escape to treat the trigger string literally
40+ trigger_match = re .search (re .escape (trigger ), content )
41+ if trigger_match :
42+ start_search_pos = trigger_match .end ()
43+ relevant_content = content [start_search_pos :]
44+
45+ # Regex for code blocks
46+ code_block_pattern = re .compile (r"```(?:\w+)?\n(.*?)```" , re .DOTALL )
47+
48+ for match in code_block_pattern .finditer (relevant_content ):
49+ # Calculate absolute position in the file to identify unique blocks
50+ abs_start = start_search_pos + match .start ()
51+
52+ if abs_start not in unique_blocks :
53+ unique_blocks [abs_start ] = match .group (1 ).strip ()
54+
55+ # Return blocks sorted by their appearance in the file
56+ sorted_blocks = [unique_blocks [k ] for k in sorted (unique_blocks .keys ())]
57+ return sorted_blocks if sorted_blocks else None
58+
4059 except Exception as e :
4160 print (f"Error reading { file_path } : { e } " )
4261 return None
@@ -56,35 +75,75 @@ def main():
5675 help = "Path to output file (if input is file) OR output directory (if input is dir)."
5776 )
5877 parser .add_argument (
59- "--trigger" ,
78+ "-t" , "- -trigger" ,
6079 default = "YAML" ,
6180 help = "The string to search for. Defaults to the string 'YAML'."
6281 )
82+ parser .add_argument (
83+ "-a" , "--all" ,
84+ action = "store_true" ,
85+ help = "Extract all code blocks into separate files. If not set, only the first block is extracted."
86+ )
87+ parser .add_argument (
88+ "-c" , "--concat" ,
89+ action = "store_true" ,
90+ help = "Concatenate multiple code blocks into a single YAML file. Used with -a."
91+ )
6392
6493 args = parser .parse_args ()
6594
95+ # --- TRIGGER PARSING ---
96+ triggers = []
97+ if args .trigger .startswith ("(" ) and args .trigger .endswith (")" ):
98+ # Format: ([Trigger 1], [Trigger 2])
99+ # Find content inside brackets
100+ raw_triggers = re .findall (r'\[(.*?)\]' , args .trigger )
101+ triggers = raw_triggers
102+ else :
103+ triggers = [args .trigger ]
104+
66105 # --- LOGIC ---
67106
68- def save_extracted_blocks (blocks , dest_path ):
107+ def save_single_file (data , path ):
108+ """Helper to write a list of data objects to a single YAML file."""
109+ with open (path , "w" , encoding = "utf-8" ) as outfile :
110+ if len (data ) == 1 :
111+ yaml .dump (data [0 ], outfile , Dumper = IndentedDumper , default_flow_style = False , allow_unicode = True , sort_keys = False )
112+ else :
113+ yaml .dump_all (data , outfile , Dumper = IndentedDumper , default_flow_style = False , allow_unicode = True , sort_keys = False )
114+
115+ def save_extracted_blocks (blocks , dest_path , split_files = False ):
69116 """
70- Parses extracted blocks as YAML and saves them to dest_path.
71- Unwraps single blocks to be the root object.
117+ Parses extracted blocks as YAML and saves them.
118+ If split_files is True, saves each block to a separate file (appending _N).
119+ Otherwise, saves all blocks to the single dest_path.
72120 """
73- data_to_dump = []
121+ # Parse all blocks first
122+ parsed_blocks = []
74123 for b in blocks :
124+ block_data = []
75125 try :
76- # Use safe_load_all to handle potential multi-document blocks
77126 for doc in yaml .safe_load_all (b ):
78- data_to_dump .append (doc )
127+ block_data .append (doc )
79128 except Exception as e :
80- print (f"Warning: Failed to parse block as YAML in { dest_path } . Keeping as string. Error: { e } " )
81- data_to_dump .append (b )
129+ print (f"Warning: Failed to parse block as YAML. Keeping as string. Error: { e } " )
130+ block_data .append (b )
131+ parsed_blocks .append (block_data )
82132
83- with open (dest_path , "w" , encoding = "utf-8" ) as outfile :
84- if len (data_to_dump ) == 1 :
85- yaml .dump (data_to_dump [0 ], outfile , Dumper = IndentedDumper , default_flow_style = False , allow_unicode = True )
86- else :
87- yaml .dump_all (data_to_dump , outfile , Dumper = IndentedDumper , default_flow_style = False , allow_unicode = True )
133+ if split_files :
134+ base , ext = os .path .splitext (dest_path )
135+ for i , data in enumerate (parsed_blocks ):
136+ # Construct filename: file.yaml, file_2.yaml, file_3.yaml...
137+ if i == 0 :
138+ current_path = dest_path
139+ else :
140+ current_path = f"{ base } _{ i + 1 } { ext } "
141+
142+ save_single_file (data , current_path )
143+ else :
144+ # Flatten all parsed data into one list for a single file
145+ all_data = [item for sublist in parsed_blocks for item in sublist ]
146+ save_single_file (all_data , dest_path )
88147
89148 # CASE 1: Input is a Directory
90149 if os .path .isdir (args .input ):
@@ -99,9 +158,7 @@ def save_extracted_blocks(blocks, dest_path):
99158 target_path = args .output
100159 replacements = {
101160 "{filename}" : file_stem ,
102- "{firstword}" : first_word ,
103- "$filename" : file_stem ,
104- "$firstword" : first_word
161+ "{firstword}" : first_word
105162 }
106163
107164 for placeholder , replacement in replacements .items ():
@@ -120,28 +177,38 @@ def save_extracted_blocks(blocks, dest_path):
120177 if dest_dir and not os .path .exists (dest_dir ):
121178 os .makedirs (dest_dir , exist_ok = True )
122179
123- blocks = extract_from_file (source_path , args . trigger )
180+ blocks = extract_from_file (source_path , triggers )
124181
125182 if blocks :
126- save_extracted_blocks (blocks , dest_path )
127- print (f"Processed: { filename } -> { dest_path } " )
183+ # Filter blocks based on -a flag
184+ if not args .all :
185+ blocks = blocks [:1 ]
186+
187+ should_split = args .all and not args .concat
188+ save_extracted_blocks (blocks , dest_path , split_files = should_split )
189+ print (f"Processed: { filename } -> { dest_path } (Split: { should_split } )" )
128190 count += 1
129- print (f"--- Batch Complete. Created { count } YAML files. ---" )
191+ print (f"--- Batch Complete. Processed { count } source files. ---" )
130192
131193 # CASE 2: Input is a Single File
132194 elif os .path .isfile (args .input ):
133- blocks = extract_from_file (args .input , args . trigger )
195+ blocks = extract_from_file (args .input , triggers )
134196
135197 if blocks :
136198 # If output path is a directory, verify valid filename provided or derive it
137199 if os .path .isdir (args .output ):
138200 print ("Error: Input is a file, but output is a directory. Please specify a full output filename." )
139201 sys .exit (1 )
140202
141- save_extracted_blocks (blocks , args .output )
142- print (f"Success! Extracted to '{ args .output } '" )
203+ # Filter blocks based on -a flag
204+ if not args .all :
205+ blocks = blocks [:1 ]
206+
207+ should_split = args .all and not args .concat
208+ save_extracted_blocks (blocks , args .output , split_files = should_split )
209+ print (f"Success! Extracted to '{ args .output } ' (Split: { should_split } )" )
143210 else :
144- print (f"No blocks found in '{ args .input } ' after trigger ' { args . trigger } ' " )
211+ print (f"No blocks found in '{ args .input } ' with provided triggers. " )
145212
146213 else :
147214 print ("Error: Input path does not exist." )
0 commit comments