|
| 1 | +use crate::path_util::cwd_to_workspace_root; |
| 2 | +use anyhow::{Context, Ok, Result}; |
| 3 | +use enum_iterator::{all, Sequence}; |
| 4 | +use std::collections::{HashMap, HashSet}; |
| 5 | + |
| 6 | +struct KeywordMeta { |
| 7 | + category: KeywordCategory, |
| 8 | + label: KeywordLabel, |
| 9 | +} |
| 10 | + |
| 11 | +enum KeywordLabel { |
| 12 | + As, |
| 13 | + Bare, |
| 14 | +} |
| 15 | + |
| 16 | +/// related: |
| 17 | +/// - [postgres/src/backend/utils/adt/misc.c](https://github.com/postgres/postgres/blob/08691ea958c2646b6aadefff878539eb0b860bb0/src/backend/utils/adt/misc.c#L452-L467/) |
| 18 | +/// - [postgres docs: sql keywords appendix](https://www.postgresql.org/docs/17/sql-keywords-appendix.html) |
| 19 | +/// |
| 20 | +/// The header file isn't enough though because `json_scalar` can be a function |
| 21 | +/// name, but `between` cannot be |
| 22 | +/// |
| 23 | +/// The Postgres parser special cases certain calls like `json_scalar`: |
| 24 | +/// <https://github.com/postgres/postgres/blob/028b4b21df26fee67b3ce75c6f14fcfd3c7cf2ee/src/backend/parser/gram.y#L15684C8-L16145> |
| 25 | +/// |
| 26 | +/// | Category | Column | Table | Function | Type | |
| 27 | +/// |--------------|--------|-------|----------|------| |
| 28 | +/// | Unreserved | Y | Y | Y | Y | |
| 29 | +/// | Reserved | N | N | N | N | |
| 30 | +/// | ColName | Y | Y | N | Y | |
| 31 | +/// | TypeFuncName | N | N | Y | Y | |
| 32 | +/// |
| 33 | +#[derive(Clone, Copy)] |
| 34 | +enum KeywordCategory { |
| 35 | + Unreserved, |
| 36 | + Reserved, |
| 37 | + ColName, |
| 38 | + TypeFuncName, |
| 39 | +} |
| 40 | + |
| 41 | +#[derive(Sequence, PartialEq)] |
| 42 | +enum KWType { |
| 43 | + ColumnTable, |
| 44 | + Type, |
| 45 | +} |
| 46 | + |
| 47 | +impl std::fmt::Display for KWType { |
| 48 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 49 | + f.write_str(match self { |
| 50 | + KWType::ColumnTable => "COLUMN_OR_TABLE_KEYWORDS", |
| 51 | + KWType::Type => "TYPE_KEYWORDS", |
| 52 | + }) |
| 53 | + } |
| 54 | +} |
| 55 | + |
| 56 | +fn keyword_allowed(cat: KeywordCategory, kw_type: KWType) -> bool { |
| 57 | + match cat { |
| 58 | + KeywordCategory::Unreserved => match kw_type { |
| 59 | + KWType::ColumnTable => true, |
| 60 | + KWType::Type => true, |
| 61 | + }, |
| 62 | + KeywordCategory::Reserved => match kw_type { |
| 63 | + KWType::ColumnTable => false, |
| 64 | + KWType::Type => false, |
| 65 | + }, |
| 66 | + KeywordCategory::ColName => match kw_type { |
| 67 | + KWType::ColumnTable => true, |
| 68 | + KWType::Type => true, |
| 69 | + }, |
| 70 | + KeywordCategory::TypeFuncName => match kw_type { |
| 71 | + KWType::ColumnTable => false, |
| 72 | + KWType::Type => true, |
| 73 | + }, |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +pub(crate) fn generate_keywords() -> Result<()> { |
| 78 | + let keywords = parse_header()?; |
| 79 | + |
| 80 | + update_syntax_kind(&keywords) |
| 81 | +} |
| 82 | + |
| 83 | +fn update_syntax_kind(keywords: &HashMap<String, KeywordMeta>) -> Result<()> { |
| 84 | + let path = "crates/parser/src/syntax_kind.rs"; |
| 85 | + |
| 86 | + let data = std::fs::read_to_string(path)?; |
| 87 | + |
| 88 | + let mut keys: Vec<_> = keywords.keys().collect(); |
| 89 | + keys.sort(); |
| 90 | + |
| 91 | + let keywords_start = "// keywords"; |
| 92 | + let keywords_end = "// literals"; |
| 93 | + let mut in_keywords = false; |
| 94 | + |
| 95 | + let from_kw_start = "pub(crate) fn from_keyword"; |
| 96 | + let from_kw_end = "} else {"; |
| 97 | + let mut in_from_keyword = false; |
| 98 | + let mut is_first_from_keyword_case = true; |
| 99 | + |
| 100 | + let token_set_start = "// Generated TokenSet start"; |
| 101 | + let token_set_end = "// Generated TokenSet end"; |
| 102 | + let mut in_token_sets = false; |
| 103 | + |
| 104 | + let mut allowed_col_table_tokens = HashSet::new(); |
| 105 | + let mut allowed_type_tokens = HashSet::new(); |
| 106 | + let mut bare_label_keywords = keywords |
| 107 | + .iter() |
| 108 | + .filter(|(_key, value)| match value.label { |
| 109 | + KeywordLabel::As => false, |
| 110 | + KeywordLabel::Bare => true, |
| 111 | + }) |
| 112 | + .map(|(key, _value)| key) |
| 113 | + .collect::<Vec<_>>(); |
| 114 | + bare_label_keywords.sort(); |
| 115 | + |
| 116 | + let mut unreserved_keywords = keywords |
| 117 | + .iter() |
| 118 | + .filter(|(_key, value)| matches!(value.category, KeywordCategory::Unreserved)) |
| 119 | + .map(|(key, _value)| key) |
| 120 | + .collect::<Vec<_>>(); |
| 121 | + unreserved_keywords.sort(); |
| 122 | + |
| 123 | + let mut reserved_keywords = keywords |
| 124 | + .iter() |
| 125 | + .filter(|(_key, value)| matches!(value.category, KeywordCategory::Reserved)) |
| 126 | + .map(|(key, _value)| key) |
| 127 | + .collect::<Vec<_>>(); |
| 128 | + reserved_keywords.sort(); |
| 129 | + |
| 130 | + let mut all_keywords = keywords.iter().map(|(key, _value)| key).collect::<Vec<_>>(); |
| 131 | + all_keywords.sort(); |
| 132 | + |
| 133 | + for (key, meta) in keywords { |
| 134 | + for variant in all::<KWType>() { |
| 135 | + match variant { |
| 136 | + KWType::ColumnTable => { |
| 137 | + if keyword_allowed(meta.category, variant) { |
| 138 | + allowed_col_table_tokens.insert(key); |
| 139 | + } |
| 140 | + } |
| 141 | + KWType::Type => { |
| 142 | + if keyword_allowed(meta.category, variant) { |
| 143 | + allowed_type_tokens.insert(key); |
| 144 | + } |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + let mut out = vec![]; |
| 151 | + |
| 152 | + for line in data.lines() { |
| 153 | + if line.contains(keywords_end) { |
| 154 | + for kw in &keys { |
| 155 | + // /// `column` |
| 156 | + // COLUMN_KW, |
| 157 | + let comment = format!(" /// `{}`\n", kw); |
| 158 | + let ident = format!(" {},", kw.to_uppercase() + "_KW"); |
| 159 | + out.push(comment + &ident); |
| 160 | + } |
| 161 | + out.push("".to_string()); |
| 162 | + |
| 163 | + in_keywords = false; |
| 164 | + } else if line.contains(from_kw_end) { |
| 165 | + let mut keys: Vec<_> = keywords.keys().collect(); |
| 166 | + keys.sort(); |
| 167 | + for kw in keys { |
| 168 | + // } else if ident.eq_ignore_ascii_case("when") { |
| 169 | + // SyntaxKind::WHEN_KW |
| 170 | + let cond_op = if is_first_from_keyword_case { |
| 171 | + "let kw = if" |
| 172 | + } else { |
| 173 | + "} else if" |
| 174 | + }; |
| 175 | + |
| 176 | + let cond = format!( |
| 177 | + r#" {} ident.eq_ignore_ascii_case("{}") {{"#, |
| 178 | + cond_op, kw |
| 179 | + ) + "\n"; |
| 180 | + let ident = format!(" SyntaxKind::{}", kw.to_uppercase() + "_KW"); |
| 181 | + out.push(cond + &ident); |
| 182 | + |
| 183 | + is_first_from_keyword_case = false; |
| 184 | + } |
| 185 | + |
| 186 | + in_from_keyword = false; |
| 187 | + } else if line.contains(token_set_end) { |
| 188 | + for variant in all::<KWType>() { |
| 189 | + out.push(format!( |
| 190 | + "pub(crate) const {}: TokenSet = TokenSet::new(&[", |
| 191 | + variant |
| 192 | + )); |
| 193 | + let mut tokens = match variant { |
| 194 | + KWType::ColumnTable => &allowed_col_table_tokens, |
| 195 | + KWType::Type => &allowed_type_tokens, |
| 196 | + } |
| 197 | + .iter() |
| 198 | + .collect::<Vec<_>>(); |
| 199 | + |
| 200 | + tokens.sort(); |
| 201 | + |
| 202 | + for tk in tokens { |
| 203 | + out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW")); |
| 204 | + } |
| 205 | + out.push("]);".to_string()); |
| 206 | + out.push("".to_string()); |
| 207 | + } |
| 208 | + |
| 209 | + // all keywords |
| 210 | + { |
| 211 | + out.push("pub(crate) const ALL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string()); |
| 212 | + let tokens = &all_keywords; |
| 213 | + for tk in tokens { |
| 214 | + out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW")); |
| 215 | + } |
| 216 | + out.push("]);".to_string()); |
| 217 | + out.push("".to_string()); |
| 218 | + } |
| 219 | + |
| 220 | + { |
| 221 | + out.push( |
| 222 | + "pub(crate) const BARE_LABEL_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(), |
| 223 | + ); |
| 224 | + for tk in &bare_label_keywords { |
| 225 | + out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW")); |
| 226 | + } |
| 227 | + out.push("]);".to_string()); |
| 228 | + out.push("".to_string()); |
| 229 | + } |
| 230 | + |
| 231 | + { |
| 232 | + out.push( |
| 233 | + "pub(crate) const UNRESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(), |
| 234 | + ); |
| 235 | + let tokens = &unreserved_keywords; |
| 236 | + for tk in tokens { |
| 237 | + out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW")); |
| 238 | + } |
| 239 | + out.push("]);".to_string()); |
| 240 | + out.push("".to_string()); |
| 241 | + } |
| 242 | + |
| 243 | + { |
| 244 | + out.push( |
| 245 | + "pub(crate) const RESERVED_KEYWORDS: TokenSet = TokenSet::new(&[".to_string(), |
| 246 | + ); |
| 247 | + let tokens = &reserved_keywords; |
| 248 | + for tk in tokens { |
| 249 | + out.push(format!(" SyntaxKind::{},", tk.to_uppercase() + "_KW")); |
| 250 | + } |
| 251 | + out.push("]);".to_string()); |
| 252 | + out.push("".to_string()); |
| 253 | + } |
| 254 | + |
| 255 | + out.push(line.to_string()); |
| 256 | + } |
| 257 | + if !in_keywords && !in_from_keyword && !in_token_sets { |
| 258 | + out.push(line.to_string()); |
| 259 | + } |
| 260 | + if line.contains(keywords_start) { |
| 261 | + in_keywords = true; |
| 262 | + } else if line.contains(from_kw_start) { |
| 263 | + in_from_keyword = true; |
| 264 | + } else if line.contains(token_set_start) { |
| 265 | + in_token_sets = true; |
| 266 | + } |
| 267 | + } |
| 268 | + |
| 269 | + std::fs::write(path, out.join("\n") + "\n").context("writing to syntax_kind.rs") |
| 270 | +} |
| 271 | + |
| 272 | +fn parse_header() -> Result<HashMap<String, KeywordMeta>> { |
| 273 | + cwd_to_workspace_root().context("Failed to cwd to root")?; |
| 274 | + |
| 275 | + let data = std::fs::read_to_string("postgres/kwlist.h").context("Failed to read kwlist.h")?; |
| 276 | + |
| 277 | + let mut keywords = HashMap::new(); |
| 278 | + |
| 279 | + for line in data.lines() { |
| 280 | + if line.starts_with("PG_KEYWORD") { |
| 281 | + let line = line |
| 282 | + .split(&['(', ')']) |
| 283 | + .nth(1) |
| 284 | + .context("Invalid kwlist.h structure")?; |
| 285 | + |
| 286 | + let row_items: Vec<&str> = line.split(',').collect(); |
| 287 | + |
| 288 | + match row_items[..] { |
| 289 | + [name, _value, category, is_bare_label] => { |
| 290 | + let label = match is_bare_label.trim() { |
| 291 | + "AS_LABEL" => KeywordLabel::As, |
| 292 | + "BARE_LABEL" => KeywordLabel::Bare, |
| 293 | + unexpected => anyhow::bail!("Unexpected label: {}", unexpected), |
| 294 | + }; |
| 295 | + |
| 296 | + let category = match category.trim() { |
| 297 | + "UNRESERVED_KEYWORD" => KeywordCategory::Unreserved, |
| 298 | + "RESERVED_KEYWORD" => KeywordCategory::Reserved, |
| 299 | + "COL_NAME_KEYWORD" => KeywordCategory::ColName, |
| 300 | + "TYPE_FUNC_NAME_KEYWORD" => KeywordCategory::TypeFuncName, |
| 301 | + unexpected => anyhow::bail!("Unexpected category: {}", unexpected), |
| 302 | + }; |
| 303 | + |
| 304 | + let meta = KeywordMeta { category, label }; |
| 305 | + let name = name.trim().replace('\"', ""); |
| 306 | + keywords.insert(name, meta); |
| 307 | + } |
| 308 | + _ => anyhow::bail!("Problem reading kwlist.h row"), |
| 309 | + } |
| 310 | + } |
| 311 | + } |
| 312 | + |
| 313 | + Ok(keywords) |
| 314 | +} |
0 commit comments