@@ -22,10 +22,10 @@ use std::path::Path;
2222use std:: process:: exit;
2323use std:: str:: FromStr ;
2424
25- use prqlc:: ast;
2625use prqlc:: semantic;
2726use prqlc:: semantic:: reporting:: { collect_frames, label_references} ;
2827use prqlc:: semantic:: NS_DEFAULT_DB ;
28+ use prqlc:: { ast, prql_to_tokens} ;
2929use prqlc:: { ir:: pl:: Lineage , ir:: Span } ;
3030use prqlc:: { pl_to_prql, pl_to_rq_tree, prql_to_pl, prql_to_pl_tree, rq_to_sql, SourceTree } ;
3131use prqlc:: { Options , Target } ;
@@ -79,6 +79,14 @@ enum Command {
7979 format : Format ,
8080 } ,
8181
82+ /// Lex into Tokens
83+ Lex {
84+ #[ command( flatten) ]
85+ io_args : IoArgs ,
86+ #[ arg( value_enum, long, default_value = "yaml" ) ]
87+ format : Format ,
88+ } ,
89+
8290 /// Parse & generate PRQL code back
8391 #[ command( name = "fmt" ) ]
8492 Format {
@@ -288,6 +296,17 @@ impl Command {
288296 Format :: Yaml => serde_yaml:: to_string ( & ast) ?. into_bytes ( ) ,
289297 }
290298 }
299+ Command :: Lex { format, .. } => {
300+ let s = sources. sources . values ( ) . exactly_one ( ) . or_else ( |_| {
301+ // TODO: allow multiple sources
302+ bail ! ( "Currently `lex` only works with a single source, but found multiple sources" )
303+ } ) ?;
304+ let tokens = prql_to_tokens ( s) ?;
305+ match format {
306+ Format :: Json => serde_json:: to_string_pretty ( & tokens) ?. into_bytes ( ) ,
307+ Format :: Yaml => serde_yaml:: to_string ( & tokens) ?. into_bytes ( ) ,
308+ }
309+ }
291310 Command :: Collect ( _) => {
292311 let mut root_module_def = prql_to_pl_tree ( sources) ?;
293312
@@ -429,7 +448,7 @@ impl Command {
429448 }
430449 }
431450
432- _ => unreachable ! ( ) ,
451+ _ => unreachable ! ( "Other commands shouldn't reach `execute`" ) ,
433452 } )
434453 }
435454
@@ -438,11 +457,10 @@ impl Command {
438457 // `input`, rather than matching on them and grabbing `input` from
439458 // `self`? But possibly if everything moves to `io_args`, then this is
440459 // quite reasonable?
441- use Command :: {
442- Collect , Debug , Experimental , Parse , Resolve , SQLAnchor , SQLCompile , SQLPreprocess ,
443- } ;
460+ use Command :: * ;
444461 let io_args = match self {
445462 Parse { io_args, .. }
463+ | Lex { io_args, .. }
446464 | Collect ( io_args)
447465 | Resolve { io_args, .. }
448466 | SQLCompile { io_args, .. }
@@ -481,10 +499,11 @@ impl Command {
481499
482500 fn write_output ( & mut self , data : & [ u8 ] ) -> std:: io:: Result < ( ) > {
483501 use Command :: {
484- Collect , Debug , Experimental , Parse , Resolve , SQLAnchor , SQLCompile , SQLPreprocess ,
502+ Collect , Debug , Experimental , Lex , Parse , Resolve , SQLAnchor , SQLCompile , SQLPreprocess ,
485503 } ;
486504 let mut output = match self {
487505 Parse { io_args, .. }
506+ | Lex { io_args, .. }
488507 | Collect ( io_args)
489508 | Resolve { io_args, .. }
490509 | SQLCompile { io_args, .. }
@@ -815,4 +834,42 @@ sort full
815834 column: 2
816835 "### ) ;
817836 }
837+
838+ #[ test]
839+ fn lex ( ) {
840+ let output = Command :: execute (
841+ & Command :: Lex {
842+ io_args : IoArgs :: default ( ) ,
843+ format : Format :: Yaml ,
844+ } ,
845+ & mut "from x | select y" . into ( ) ,
846+ "" ,
847+ )
848+ . unwrap ( ) ;
849+
850+ // TODO: terser output; maybe serialize span as `0..4`? Remove the
851+ // `!Ident` complication?
852+ assert_snapshot ! ( String :: from_utf8( output) . unwrap( ) . trim( ) , @r###"
853+ - kind: !Ident from
854+ span:
855+ start: 0
856+ end: 4
857+ - kind: !Ident x
858+ span:
859+ start: 5
860+ end: 6
861+ - kind: !Control '|'
862+ span:
863+ start: 7
864+ end: 8
865+ - kind: !Ident select
866+ span:
867+ start: 9
868+ end: 15
869+ - kind: !Ident y
870+ span:
871+ start: 16
872+ end: 17
873+ "### ) ;
874+ }
818875}
0 commit comments