1+ """Command line interface for structured output extraction."""
2+
3+ import json
4+ import sys
5+ from pathlib import Path
6+ from typing import Optional
7+
8+ import click
9+ from .config import Config
10+ from .extractor import StructuredExtractor
11+ from .logger import setup_logger , get_logger
12+ from .templates .job_description import JobDescriptionSchema
13+ from .templates .recipe import RecipeSchema
14+
15+ # Available predefined templates
16+ TEMPLATES = {
17+ "job" : JobDescriptionSchema ,
18+ "recipe" : RecipeSchema ,
19+ }
20+
21+
22+ @click .group ()
23+ @click .option ("--debug" , is_flag = True , help = "Enable debug logging" )
24+ @click .pass_context
25+ def main (ctx : click .Context , debug : bool ) -> None :
26+ """Structured Output Cookbook - Extract structured data from text using LLMs."""
27+ ctx .ensure_object (dict )
28+
29+ config = Config .from_env ()
30+ if debug :
31+ config .log_level = "DEBUG"
32+
33+ setup_logger (config )
34+ ctx .obj ["config" ] = config
35+ ctx .obj ["logger" ] = get_logger (__name__ )
36+
37+
38+ @main .command ()
39+ def list_templates () -> None :
40+ """List available predefined templates."""
41+ click .echo ("Available templates:" )
42+ for name , schema in TEMPLATES .items ():
43+ click .echo (f" { name } : { schema .get_schema_description ()} " )
44+
45+
46+ @main .command ()
47+ @click .argument ("template" , type = click .Choice (list (TEMPLATES .keys ())))
48+ @click .option ("--input-file" , "-i" , type = click .Path (exists = True ), help = "Input text file" )
49+ @click .option ("--text" , "-t" , help = "Input text directly" )
50+ @click .option ("--output" , "-o" , type = click .Path (), help = "Output JSON file" )
51+ @click .option ("--pretty" , is_flag = True , help = "Pretty print JSON output" )
52+ @click .pass_context
53+ def extract (
54+ ctx : click .Context ,
55+ template : str ,
56+ input_file : Optional [str ],
57+ text : Optional [str ],
58+ output : Optional [str ],
59+ pretty : bool
60+ ) -> None :
61+ """Extract data using a predefined template."""
62+ logger = ctx .obj ["logger" ]
63+ config = ctx .obj ["config" ]
64+
65+ # Get input text
66+ if input_file :
67+ input_text = Path (input_file ).read_text (encoding = "utf-8" )
68+ elif text :
69+ input_text = text
70+ else :
71+ click .echo ("Error: Must provide either --input-file or --text" , err = True )
72+ sys .exit (1 )
73+
74+ # Extract data
75+ extractor = StructuredExtractor (config )
76+ schema = TEMPLATES [template ]
77+
78+ logger .info (f"Extracting using template: { template } " )
79+ result = extractor .extract (input_text , schema )
80+
81+ if not result .success :
82+ click .echo (f"Extraction failed: { result .error } " , err = True )
83+ sys .exit (1 )
84+
85+ # Format output
86+ indent = 2 if pretty else None
87+ output_json = json .dumps (result .data , indent = indent , ensure_ascii = False )
88+
89+ # Write output
90+ if output :
91+ Path (output ).write_text (output_json , encoding = "utf-8" )
92+ click .echo (f"Results saved to { output } " )
93+ else :
94+ click .echo (output_json )
95+
96+ # Show stats
97+ if result .tokens_used :
98+ logger .info (f"Tokens used: { result .tokens_used } " )
99+
100+
101+ @main .command ()
102+ @click .option ("--schema-file" , "-s" , type = click .Path (exists = True ), required = True , help = "JSON schema file" )
103+ @click .option ("--prompt-file" , "-p" , type = click .Path (exists = True ), help = "System prompt file" )
104+ @click .option ("--prompt" , help = "System prompt text" )
105+ @click .option ("--input-file" , "-i" , type = click .Path (exists = True ), help = "Input text file" )
106+ @click .option ("--text" , "-t" , help = "Input text directly" )
107+ @click .option ("--output" , "-o" , type = click .Path (), help = "Output JSON file" )
108+ @click .option ("--pretty" , is_flag = True , help = "Pretty print JSON output" )
109+ @click .pass_context
110+ def extract_custom (
111+ ctx : click .Context ,
112+ schema_file : str ,
113+ prompt_file : Optional [str ],
114+ prompt : Optional [str ],
115+ input_file : Optional [str ],
116+ text : Optional [str ],
117+ output : Optional [str ],
118+ pretty : bool
119+ ) -> None :
120+ """Extract data using a custom JSON schema."""
121+ logger = ctx .obj ["logger" ]
122+ config = ctx .obj ["config" ]
123+
124+ # Load schema
125+ try :
126+ schema_dict = json .loads (Path (schema_file ).read_text (encoding = "utf-8" ))
127+ except (json .JSONDecodeError , FileNotFoundError ) as e :
128+ click .echo (f"Error loading schema: { e } " , err = True )
129+ sys .exit (1 )
130+
131+ # Get system prompt
132+ if prompt_file :
133+ system_prompt = Path (prompt_file ).read_text (encoding = "utf-8" )
134+ elif prompt :
135+ system_prompt = prompt
136+ else :
137+ click .echo ("Error: Must provide either --prompt-file or --prompt" , err = True )
138+ sys .exit (1 )
139+
140+ # Get input text
141+ if input_file :
142+ input_text = Path (input_file ).read_text (encoding = "utf-8" )
143+ elif text :
144+ input_text = text
145+ else :
146+ click .echo ("Error: Must provide either --input-file or --text" , err = True )
147+ sys .exit (1 )
148+
149+ # Extract data
150+ extractor = StructuredExtractor (config )
151+
152+ logger .info ("Extracting using custom schema" )
153+ result = extractor .extract_with_custom_schema (input_text , schema_dict , system_prompt )
154+
155+ if not result .success :
156+ click .echo (f"Extraction failed: { result .error } " , err = True )
157+ sys .exit (1 )
158+
159+ # Format output
160+ indent = 2 if pretty else None
161+ output_json = json .dumps (result .data , indent = indent , ensure_ascii = False )
162+
163+ # Write output
164+ if output :
165+ Path (output ).write_text (output_json , encoding = "utf-8" )
166+ click .echo (f"Results saved to { output } " )
167+ else :
168+ click .echo (output_json )
169+
170+ # Show stats
171+ if result .tokens_used :
172+ logger .info (f"Tokens used: { result .tokens_used } " )
173+
174+
175+ if __name__ == "__main__" :
176+ main ()
0 commit comments