Skip to content

Commit db24427

Browse files
authored
feat: Add prqlc lex command to the CLI (#4467)
1 parent 625fe36 commit db24427

File tree

10 files changed

+239
-36
lines changed

10 files changed

+239
-36
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
- Initial implementation of an experimental documentation generator that
1010
generates Markdown documentation from `.prql` files. (@vanillajonathan,
1111
#4152).
12+
- Add `prqlc lex` command to the CLI (@max-sixty)
1213

1314
**Fixes**:
1415

prqlc/prqlc-parser/src/lib.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,12 @@ mod types;
1010
use chumsky::error::SimpleReason;
1111
use chumsky::{prelude::*, Stream};
1212

13-
use prqlc_ast::error::Reason;
14-
use prqlc_ast::error::{Error, WithErrorInfo};
13+
use prqlc_ast::error::{Error, Reason, WithErrorInfo};
1514
use prqlc_ast::stmt::*;
1615
use prqlc_ast::Span;
1716

18-
use lexer::TokenKind;
19-
use lexer::{Token, TokenVec};
17+
use lexer::Token;
18+
pub use lexer::{TokenKind, TokenVec};
2019
use span::ParserSpan;
2120

2221
/// Build PRQL AST from a PRQL query string.

prqlc/prqlc/src/cli/mod.rs

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ use std::path::Path;
2222
use std::process::exit;
2323
use std::str::FromStr;
2424

25-
use prqlc::ast;
2625
use prqlc::semantic;
2726
use prqlc::semantic::reporting::{collect_frames, label_references};
2827
use prqlc::semantic::NS_DEFAULT_DB;
28+
use prqlc::{ast, prql_to_tokens};
2929
use prqlc::{ir::pl::Lineage, ir::Span};
3030
use prqlc::{pl_to_prql, pl_to_rq_tree, prql_to_pl, prql_to_pl_tree, rq_to_sql, SourceTree};
3131
use prqlc::{Options, Target};
@@ -79,6 +79,14 @@ enum Command {
7979
format: Format,
8080
},
8181

82+
/// Lex into Tokens
83+
Lex {
84+
#[command(flatten)]
85+
io_args: IoArgs,
86+
#[arg(value_enum, long, default_value = "yaml")]
87+
format: Format,
88+
},
89+
8290
/// Parse & generate PRQL code back
8391
#[command(name = "fmt")]
8492
Format {
@@ -288,6 +296,17 @@ impl Command {
288296
Format::Yaml => serde_yaml::to_string(&ast)?.into_bytes(),
289297
}
290298
}
299+
Command::Lex { format, .. } => {
300+
let s = sources.sources.values().exactly_one().or_else(|_| {
301+
// TODO: allow multiple sources
302+
bail!("Currently `lex` only works with a single source, but found multiple sources")
303+
})?;
304+
let tokens = prql_to_tokens(s)?;
305+
match format {
306+
Format::Json => serde_json::to_string_pretty(&tokens)?.into_bytes(),
307+
Format::Yaml => serde_yaml::to_string(&tokens)?.into_bytes(),
308+
}
309+
}
291310
Command::Collect(_) => {
292311
let mut root_module_def = prql_to_pl_tree(sources)?;
293312

@@ -429,7 +448,7 @@ impl Command {
429448
}
430449
}
431450

432-
_ => unreachable!(),
451+
_ => unreachable!("Other commands shouldn't reach `execute`"),
433452
})
434453
}
435454

@@ -438,11 +457,10 @@ impl Command {
438457
// `input`, rather than matching on them and grabbing `input` from
439458
// `self`? But possibly if everything moves to `io_args`, then this is
440459
// quite reasonable?
441-
use Command::{
442-
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
443-
};
460+
use Command::*;
444461
let io_args = match self {
445462
Parse { io_args, .. }
463+
| Lex { io_args, .. }
446464
| Collect(io_args)
447465
| Resolve { io_args, .. }
448466
| SQLCompile { io_args, .. }
@@ -481,10 +499,11 @@ impl Command {
481499

482500
fn write_output(&mut self, data: &[u8]) -> std::io::Result<()> {
483501
use Command::{
484-
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
502+
Collect, Debug, Experimental, Lex, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
485503
};
486504
let mut output = match self {
487505
Parse { io_args, .. }
506+
| Lex { io_args, .. }
488507
| Collect(io_args)
489508
| Resolve { io_args, .. }
490509
| SQLCompile { io_args, .. }
@@ -815,4 +834,42 @@ sort full
815834
column: 2
816835
"###);
817836
}
837+
838+
#[test]
839+
fn lex() {
840+
let output = Command::execute(
841+
&Command::Lex {
842+
io_args: IoArgs::default(),
843+
format: Format::Yaml,
844+
},
845+
&mut "from x | select y".into(),
846+
"",
847+
)
848+
.unwrap();
849+
850+
// TODO: terser output; maybe serialize span as `0..4`? Remove the
851+
// `!Ident` complication?
852+
assert_snapshot!(String::from_utf8(output).unwrap().trim(), @r###"
853+
- kind: !Ident from
854+
span:
855+
start: 0
856+
end: 4
857+
- kind: !Ident x
858+
span:
859+
start: 5
860+
end: 6
861+
- kind: !Control '|'
862+
span:
863+
start: 7
864+
end: 8
865+
- kind: !Ident select
866+
span:
867+
start: 9
868+
end: 15
869+
- kind: !Ident y
870+
span:
871+
start: 16
872+
end: 17
873+
"###);
874+
}
818875
}

prqlc/prqlc/src/error_message.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ impl From<Error> for ErrorMessage {
8787
}
8888
}
8989

90+
impl From<Vec<ErrorMessage>> for ErrorMessages {
91+
fn from(errors: Vec<ErrorMessage>) -> Self {
92+
ErrorMessages { inner: errors }
93+
}
94+
}
95+
9096
#[derive(Debug, Clone, Serialize)]
9197
pub struct ErrorMessages {
9298
pub inner: Vec<ErrorMessage>,

prqlc/prqlc/src/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ pub static COMPILER_VERSION: Lazy<Version> =
113113
Lazy::new(|| Version::parse(env!("CARGO_PKG_VERSION")).expect("Invalid prqlc version number"));
114114

115115
use once_cell::sync::Lazy;
116+
use prqlc_parser::TokenVec;
116117
use semver::Version;
117118
use serde::{Deserialize, Serialize};
118119
use std::{collections::HashMap, path::PathBuf, str::FromStr};
@@ -306,6 +307,16 @@ pub enum DisplayOptions {
306307
#[cfg(doctest)]
307308
pub struct ReadmeDoctests;
308309

310+
/// Lex PRQL source into tokens.
311+
pub fn prql_to_tokens(prql: &str) -> Result<TokenVec, ErrorMessages> {
312+
prqlc_parser::lex_source(prql).map_err(|e| {
313+
e.into_iter()
314+
.map(|e| e.into())
315+
.collect::<Vec<ErrorMessage>>()
316+
.into()
317+
})
318+
}
319+
309320
/// Parse PRQL into a PL AST
310321
// TODO: rename this to `prql_to_pl_simple`
311322
pub fn prql_to_pl(prql: &str) -> Result<ast::ModuleDef, ErrorMessages> {

prqlc/prqlc/tests/integration/cli.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ fn help() {
1717
1818
Commands:
1919
parse Parse into PL AST
20+
lex Lex into Tokens
2021
fmt Parse & generate PRQL code back
2122
collect Parse the whole project and collect it into a single PRQL source file
2223
debug Commands for meant for debugging, prone to change
@@ -471,3 +472,49 @@ fn compile_no_prql_files() {
471472
472473
"###);
473474
}
475+
476+
#[test]
477+
fn lex() {
478+
assert_cmd_snapshot!(prqlc_command().args(["lex"]).pass_stdin("from tracks"), @r###"
479+
success: true
480+
exit_code: 0
481+
----- stdout -----
482+
- kind: !Ident from
483+
span:
484+
start: 0
485+
end: 4
486+
- kind: !Ident tracks
487+
span:
488+
start: 5
489+
end: 11
490+
491+
----- stderr -----
492+
"###);
493+
494+
assert_cmd_snapshot!(prqlc_command().args(["lex", "--format=json"]).pass_stdin("from tracks"), @r###"
495+
success: true
496+
exit_code: 0
497+
----- stdout -----
498+
[
499+
{
500+
"kind": {
501+
"Ident": "from"
502+
},
503+
"span": {
504+
"start": 0,
505+
"end": 4
506+
}
507+
},
508+
{
509+
"kind": {
510+
"Ident": "tracks"
511+
},
512+
"span": {
513+
"start": 5,
514+
"end": 11
515+
}
516+
}
517+
]
518+
----- stderr -----
519+
"###);
520+
}

0 commit comments

Comments
 (0)