Skip to content

Commit 0ae66dc

Browse files
authored
v2: add xtask (#437)
another crate
1 parent a29485b commit 0ae66dc

File tree

9 files changed

+675
-7
lines changed

9 files changed

+675
-7
lines changed

.cargo/config.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[alias]
2+
xtask = "run --package xtask --"

.vscode/settings.json

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
{
22
"editor.formatOnSave": true,
33
"editor.defaultFormatter": "esbenp.prettier-vscode",
4-
"rust-analyzer.checkOnSave.command": "clippy",
5-
"rust-analyzer.checkOnSave.extraArgs": [
6-
"--all-features",
7-
"--",
8-
"-D",
9-
"clippy::pedantic"
10-
],
4+
"rust-analyzer.check.command": "build",
5+
"rust-analyzer.showSyntaxTree": true,
116
"[rust]": {
127
"editor.defaultFormatter": "rust-lang.rust-analyzer"
8+
},
9+
"[sql]": {
10+
"editor.tabSize": 2
1311
}
1412
}

crates/xtask/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "xtask"
3+
version = "0.1.0"
4+
5+
authors.workspace = true
6+
edition.workspace = true
7+
license.workspace = true
8+
rust-version.workspace = true
9+
10+
[dependencies]
11+
anyhow.workspace = true
12+
clap.workspace = true
13+
enum-iterator.workspace = true
14+
reqwest = { version = "0.12.9", features = ["blocking", "json"] }
15+
serde.workspace = true
16+
convert_case.workspace = true
17+
18+
[lints]
19+
workspace = true

crates/xtask/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# xtask
2+
3+
> see <https://github.com/matklad/cargo-xtask>
4+
5+
Run via:
6+
7+
```sh
8+
cargo xtask --help
9+
```
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
use crate::path_util::cwd_to_workspace_root;
2+
use anyhow::{Context, Ok, Result};
3+
use enum_iterator::{all, Sequence};
4+
use std::collections::{HashMap, HashSet};
5+
6+
struct KeywordMeta {
7+
category: KeywordCategory,
8+
label: KeywordLabel,
9+
}
10+
11+
enum KeywordLabel {
12+
As,
13+
Bare,
14+
}
15+
16+
/// related:
17+
/// - [postgres/src/backend/utils/adt/misc.c](https://github.com/postgres/postgres/blob/08691ea958c2646b6aadefff878539eb0b860bb0/src/backend/utils/adt/misc.c#L452-L467/)
18+
/// - [postgres docs: sql keywords appendix](https://www.postgresql.org/docs/17/sql-keywords-appendix.html)
19+
///
20+
/// The header file isn't enough though because `json_scalar` can be a function
21+
/// name, but `between` cannot be
22+
///
23+
/// The Postgres parser special cases certain calls like `json_scalar`:
24+
/// <https://github.com/postgres/postgres/blob/028b4b21df26fee67b3ce75c6f14fcfd3c7cf2ee/src/backend/parser/gram.y#L15684C8-L16145>
25+
///
26+
/// | Category | Column | Table | Function | Type |
27+
/// |--------------|--------|-------|----------|------|
28+
/// | Unreserved | Y | Y | Y | Y |
29+
/// | Reserved | N | N | N | N |
30+
/// | ColName | Y | Y | N | Y |
31+
/// | TypeFuncName | N | N | Y | Y |
32+
///
33+
#[derive(Clone, Copy)]
34+
enum KeywordCategory {
35+
Unreserved,
36+
Reserved,
37+
ColName,
38+
TypeFuncName,
39+
}
40+
41+
#[derive(Sequence, PartialEq)]
42+
enum KWType {
43+
ColumnTable,
44+
Type,
45+
}
46+
47+
impl std::fmt::Display for KWType {
48+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49+
f.write_str(match self {
50+
KWType::ColumnTable => "COLUMN_OR_TABLE_KEYWORDS",
51+
KWType::Type => "TYPE_KEYWORDS",
52+
})
53+
}
54+
}
55+
56+
fn keyword_allowed(cat: KeywordCategory, kw_type: KWType) -> bool {
57+
match cat {
58+
KeywordCategory::Unreserved => match kw_type {
59+
KWType::ColumnTable => true,
60+
KWType::Type => true,
61+
},
62+
KeywordCategory::Reserved => match kw_type {
63+
KWType::ColumnTable => false,
64+
KWType::Type => false,
65+
},
66+
KeywordCategory::ColName => match kw_type {
67+
KWType::ColumnTable => true,
68+
KWType::Type => true,
69+
},
70+
KeywordCategory::TypeFuncName => match kw_type {
71+
KWType::ColumnTable => false,
72+
KWType::Type => true,
73+
},
74+
}
75+
}
76+
77+
pub(crate) fn generate_keywords() -> Result<()> {
78+
let keywords = parse_header()?;
79+
80+
update_syntax_kind(&keywords)
81+
}
82+
83+
fn update_syntax_kind(keywords: &HashMap<String, KeywordMeta>) -> Result<()> {
84+
let path = "crates/parser/src/syntax_kind.rs";
85+
86+
let data = std::fs::read_to_string(path)?;
87+
88+
let mut keys: Vec<_> = keywords.keys().collect();
89+
keys.sort();
90+
91+
let keywords_start = "// keywords";
92+
let keywords_end = "// literals";
93+
let mut in_keywords = false;
94+
95+
let from_kw_start = "pub(crate) fn from_keyword";
96+
let from_kw_end = "} else {";
97+
let mut in_from_keyword = false;
98+
let mut is_first_from_keyword_case = true;
99+
100+
let token_set_start = "// Generated TokenSet start";
101+
let token_set_end = "// Generated TokenSet end";
102+
let mut in_token_sets = false;
103+
104+
let mut allowed_col_table_tokens = HashSet::new();
105+
let mut allowed_type_tokens = HashSet::new();
106+
let mut bare_label_keywords = keywords
107+
.iter()
108+
.filter(|(_key, value)| match value.label {
109+
KeywordLabel::As => false,
110+
KeywordLabel::Bare => true,
111+
})
112+
.map(|(key, _value)| key)
113+
.collect::<Vec<_>>();
114+
bare_label_keywords.sort();
115+
116+
let mut unreserved_keywords = keywords
117+
.iter()
118+
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Unreserved))
119+
.map(|(key, _value)| key)
120+
.collect::<Vec<_>>();
121+
unreserved_keywords.sort();
122+
123+
let mut reserved_keywords = keywords
124+
.iter()
125+
.filter(|(_key, value)| matches!(value.category, KeywordCategory::Reserved))
126+
.map(|(key, _value)| key)
127+
.collect::<Vec<_>>();
128+
reserved_keywords.sort();
129+
130+
let mut all_keywords = keywords.iter().map(|(key, _value)| key).collect::<Vec<_>>();
131+
all_keywords.sort();
132+
133+
for (key, meta) in keywords {
134+
for variant in all::<KWType>() {
135+
match variant {
136+
KWType::ColumnTable => {
137+
if keyword_allowed(meta.category, variant) {
138+
allowed_col_table_tokens.insert(key);
139+
}
140+
}
141+
KWType::Type => {
142+
if keyword_allowed(meta.category, variant) {
143+
allowed_type_tokens.insert(key);
144+
}
145+
}
146+
}
147+
}
148+
}
149+
150+
let mut out = vec![];
151+
152+
for line in data.lines() {
153+
if line.contains(keywords_end) {
154+
for kw in &keys {
155+
// /// `column`
156+
// COLUMN_KW,
157+
let comment = format!(" /// `{}`\n", kw);
158+
let ident = format!(" {},", kw.to_uppercase() + "_KW");
159+
out.push(comment + &ident);
160+
}
161+
out.push("".to_string());
162+
163+
in_keywords = false;
164+
} else if line.contains(from_kw_end) {
165+
let mut keys: Vec<_> = keywords.keys().collect();
166+
keys.sort();
167+
for kw in keys {
168+
// } else if ident.eq_ignore_ascii_case("when") {
169+
// SyntaxKind::WHEN_KW
170+
let cond_op = if is_first_from_keyword_case {
171+
"let kw = if"
172+
} else {
173+
"} else if"
174+
};
175+
176+
let cond = format!(
177+
r#" {} ident.eq_ignore_ascii_case("{}") {{"#,
178+
cond_op, kw
179+
) + "\n";
180+
let ident = format!(" SyntaxKind::{}", kw.to_uppercase() + "_KW");
181+
out.push(cond + &ident);
182+
183+
is_first_from_keyword_case = false;
184+
}
185+
186+
in_from_keyword = false;
187+
} else if line.contains(token_set_end) {
188+
for variant in all::<KWType>() {
189+
out.push(format!(
190+
"pub(crate) const {}: TokenSet = TokenSet::new(&[",
191+
variant
192+
));
193+
let mut tokens = match variant {
194+
KWType::ColumnTable => &allowed_col_table_tokens,
195+
KWType::Type => &allowed_type_tokens,
196+
}
197+
.iter()
198+
.collect::<Vec<_>>();
199+
200+
tokens.sort();
201+
202+
for tk in tokens {
203+
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
204+
}
205+
out.push("]);".to_string());
206+
out.push("".to_string());
207+
}
208+
209+
// all keywords
210+
{
211+
out.push("pub(crate) const ALL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string());
212+
let tokens = &all_keywords;
213+
for tk in tokens {
214+
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
215+
}
216+
out.push("]);".to_string());
217+
out.push("".to_string());
218+
}
219+
220+
{
221+
out.push(
222+
"pub(crate) const BARE_LABEL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
223+
);
224+
for tk in &bare_label_keywords {
225+
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
226+
}
227+
out.push("]);".to_string());
228+
out.push("".to_string());
229+
}
230+
231+
{
232+
out.push(
233+
"pub(crate) const UNRESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
234+
);
235+
let tokens = &unreserved_keywords;
236+
for tk in tokens {
237+
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
238+
}
239+
out.push("]);".to_string());
240+
out.push("".to_string());
241+
}
242+
243+
{
244+
out.push(
245+
"pub(crate) const RESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(),
246+
);
247+
let tokens = &reserved_keywords;
248+
for tk in tokens {
249+
out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW"));
250+
}
251+
out.push("]);".to_string());
252+
out.push("".to_string());
253+
}
254+
255+
out.push(line.to_string());
256+
}
257+
if !in_keywords && !in_from_keyword && !in_token_sets {
258+
out.push(line.to_string());
259+
}
260+
if line.contains(keywords_start) {
261+
in_keywords = true;
262+
} else if line.contains(from_kw_start) {
263+
in_from_keyword = true;
264+
} else if line.contains(token_set_start) {
265+
in_token_sets = true;
266+
}
267+
}
268+
269+
std::fs::write(path, out.join("\n") + "\n").context("writing to syntax_kind.rs")
270+
}
271+
272+
fn parse_header() -> Result<HashMap<String, KeywordMeta>> {
273+
cwd_to_workspace_root().context("Failed to cwd to root")?;
274+
275+
let data = std::fs::read_to_string("postgres/kwlist.h").context("Failed to read kwlist.h")?;
276+
277+
let mut keywords = HashMap::new();
278+
279+
for line in data.lines() {
280+
if line.starts_with("PG_KEYWORD") {
281+
let line = line
282+
.split(&['(', ')'])
283+
.nth(1)
284+
.context("Invalid kwlist.h structure")?;
285+
286+
let row_items: Vec<&str> = line.split(',').collect();
287+
288+
match row_items[..] {
289+
[name, _value, category, is_bare_label] => {
290+
let label = match is_bare_label.trim() {
291+
"AS_LABEL" => KeywordLabel::As,
292+
"BARE_LABEL" => KeywordLabel::Bare,
293+
unexpected => anyhow::bail!("Unexpected label: {}", unexpected),
294+
};
295+
296+
let category = match category.trim() {
297+
"UNRESERVED_KEYWORD" => KeywordCategory::Unreserved,
298+
"RESERVED_KEYWORD" => KeywordCategory::Reserved,
299+
"COL_NAME_KEYWORD" => KeywordCategory::ColName,
300+
"TYPE_FUNC_NAME_KEYWORD" => KeywordCategory::TypeFuncName,
301+
unexpected => anyhow::bail!("Unexpected category: {}", unexpected),
302+
};
303+
304+
let meta = KeywordMeta { category, label };
305+
let name = name.trim().replace('\"', "");
306+
keywords.insert(name, meta);
307+
}
308+
_ => anyhow::bail!("Problem reading kwlist.h row"),
309+
}
310+
}
311+
}
312+
313+
Ok(keywords)
314+
}

0 commit comments

Comments
 (0)