@@ -22,10 +22,10 @@ use std::path::Path;
2222use std:: process:: exit;
2323use std:: str:: FromStr ;
2424
25- use prqlc:: ast;
2625use prqlc:: semantic;
2726use prqlc:: semantic:: reporting:: { collect_frames, label_references} ;
2827use prqlc:: semantic:: NS_DEFAULT_DB ;
28+ use prqlc:: { ast, prql_to_tokens} ;
2929use prqlc:: { ir:: pl:: Lineage , ir:: Span } ;
3030use prqlc:: { pl_to_prql, pl_to_rq_tree, prql_to_pl, prql_to_pl_tree, rq_to_sql, SourceTree } ;
3131use prqlc:: { Options , Target } ;
@@ -79,6 +79,14 @@ enum Command {
7979 format : Format ,
8080 } ,
8181
82+ /// Lex into Tokens
83+ Lex {
84+ #[ command( flatten) ]
85+ io_args : IoArgs ,
86+ #[ arg( value_enum, long, default_value = "yaml" ) ]
87+ format : Format ,
88+ } ,
89+
8290 /// Parse & generate PRQL code back
8391 #[ command( name = "fmt" ) ]
8492 Format {
@@ -288,6 +296,17 @@ impl Command {
288296 Format :: Yaml => serde_yaml:: to_string ( & ast) ?. into_bytes ( ) ,
289297 }
290298 }
299+ Command :: Lex { format, .. } => {
300+ let s = sources. sources . values ( ) . exactly_one ( ) . or_else ( |_| {
301+ // TODO: allow multiple sources
302+ bail ! ( "Currently `lex` only works with a single source, but found multiple sources" )
303+ } ) ?;
304+ let tokens = prql_to_tokens ( s) ?;
305+ match format {
306+ Format :: Json => serde_json:: to_string_pretty ( & tokens) ?. into_bytes ( ) ,
307+ Format :: Yaml => serde_yaml:: to_string ( & tokens) ?. into_bytes ( ) ,
308+ }
309+ }
291310 Command :: Collect ( _) => {
292311 let mut root_module_def = prql_to_pl_tree ( sources) ?;
293312
@@ -481,10 +500,11 @@ impl Command {
481500
482501 fn write_output ( & mut self , data : & [ u8 ] ) -> std:: io:: Result < ( ) > {
483502 use Command :: {
484- Collect , Debug , Experimental , Parse , Resolve , SQLAnchor , SQLCompile , SQLPreprocess ,
503+ Collect , Debug , Experimental , Lex , Parse , Resolve , SQLAnchor , SQLCompile , SQLPreprocess ,
485504 } ;
486505 let mut output = match self {
487506 Parse { io_args, .. }
507+ | Lex { io_args, .. }
488508 | Collect ( io_args)
489509 | Resolve { io_args, .. }
490510 | SQLCompile { io_args, .. }
@@ -815,4 +835,42 @@ sort full
815835 column: 2
816836 "### ) ;
817837 }
838+
839+ #[ test]
840+ fn lex ( ) {
841+ let output = Command :: execute (
842+ & Command :: Lex {
843+ io_args : IoArgs :: default ( ) ,
844+ format : Format :: Yaml ,
845+ } ,
846+ & mut "from x | select y" . into ( ) ,
847+ "" ,
848+ )
849+ . unwrap ( ) ;
850+
851+ // TODO: terser output; maybe serialize span as `0..4`? Remove the
852+ // `!Ident` complication?
853+ assert_snapshot ! ( String :: from_utf8( output) . unwrap( ) . trim( ) , @r###"
854+ - kind: !Ident from
855+ span:
856+ start: 0
857+ end: 4
858+ - kind: !Ident x
859+ span:
860+ start: 5
861+ end: 6
862+ - kind: !Control '|'
863+ span:
864+ start: 7
865+ end: 8
866+ - kind: !Ident select
867+ span:
868+ start: 9
869+ end: 15
870+ - kind: !Ident y
871+ span:
872+ start: 16
873+ end: 17
874+ "### ) ;
875+ }
818876}
0 commit comments