diff --git a/Cargo.toml b/Cargo.toml index c4d0094f4..2c1538b53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ visitor = ["sqlparser_derive"] [dependencies] bigdecimal = { version = "0.4.1", features = ["serde"], optional = true } log = "0.4" +phf = { version = "0.11", default-features = false } serde = { version = "1.0", features = ["derive"], optional = true } # serde_json is only used in examples/cli, but we have to put it outside # of dev-dependencies because of @@ -58,6 +59,10 @@ simple_logger = "5.0" matches = "0.1" pretty_assertions = "1" +[build-dependencies] +phf = { version = "0.11", default-features = false } +phf_codegen = "0.11" + [package.metadata.docs.rs] # Document these features on docs.rs features = ["serde", "visitor"] diff --git a/build.rs b/build.rs new file mode 100644 index 000000000..f15b25b0f --- /dev/null +++ b/build.rs @@ -0,0 +1,101 @@ +use std::env; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn read_keywords() -> Vec<(String, Option)> { + let path = Path::new("src").join("keywords.txt"); + if !path.is_file() { + panic!("Missing src/keywords.txt"); + } + + let data = std::fs::read_to_string(path).expect("Error reading src/keywords.txt"); + + data.lines() + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + return None; + } + + let parts = line.split_ascii_whitespace().collect::>(); + if parts.len() == 1 { + Some((parts[0].to_string(), None)) + } else if parts.len() == 2 { + Some((parts[0].to_string(), Some(parts[1].to_string()))) + } else { + panic!("Invalid keyword: {}", line); + } + }) + .collect::>() +} + +fn write_keyword_enum(file: &mut BufWriter, keywords: &[(String, Option)]) +where + W: ?Sized + Write, +{ + let header = &[ + "#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]\n", + "#[cfg_attr(feature = \"serde\", derive(Serialize, Deserialize))]\n", + "#[cfg_attr(feature = \"visitor\", derive(Visit, VisitMut))]\n", + "#[allow(non_camel_case_types)]\n", + "pub enum Keyword {\n", + " NoKeyword,\n", + ]; + let header = header.join(""); + write!(file, "{}", header).unwrap(); + + keywords.iter().for_each(|kw| { + writeln!(file, " {},", kw.0).unwrap(); + }); + + writeln!(file, "}}\n").unwrap(); +} + +fn write_all_keywords(file: &mut BufWriter, keywords: &[(String, Option)]) +where + W: ?Sized + Write, +{ + writeln!(file, "pub const ALL_KEYWORDS: &[&str] = &[").unwrap(); + keywords.iter().for_each(|kw| { + if kw.1.is_some() { + writeln!(file, " \"{}\",", kw.1.as_ref().unwrap()).unwrap(); + } else { + writeln!(file, " \"{}\",", kw.0).unwrap(); + } + }); + writeln!(file, "];\n").unwrap(); +} + +fn write_phf_map(file: &mut BufWriter, keywords: &[(String, Option)]) +where + W: ?Sized + Write, +{ + let map = phf_codegen::Map::new(); + let map = keywords.iter().fold(map, |mut map, kw| { + if kw.1.is_some() { + map.entry(kw.1.as_ref().unwrap(), &format!("Keyword::{}", kw.0)); + } else { + map.entry(&kw.0, &format!("Keyword::{}", kw.0)); + } + map + }); + + write!( + file, + "static KEYWORD_MAP: phf::Map<&'static str, Keyword> = {}", + map.build() + ) + .unwrap(); + writeln!(file, ";").unwrap(); +} + +fn main() { + let keywords = read_keywords(); + let path = Path::new(&env::var("OUT_DIR").unwrap()).join("keyword_gen.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + + write_keyword_enum(&mut file, &keywords); + write_all_keywords(&mut file, &keywords); + write_phf_map(&mut file, &keywords); +} diff --git a/src/keywords.rs b/src/keywords.rs index 25a719d25..d0f2fae90 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -33,845 +33,13 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -/// Defines a string constant for a single keyword: `kw_def!(SELECT);` -/// expands to `pub const SELECT = "SELECT";` -macro_rules! kw_def { - ($ident:ident = $string_keyword:expr) => { - pub const $ident: &'static str = $string_keyword; - }; - ($ident:ident) => { - kw_def!($ident = stringify!($ident)); - }; -} - -/// Expands to a list of `kw_def!()` invocations for each keyword -/// and defines an ALL_KEYWORDS array of the defined constants. -macro_rules! define_keywords { - ($( - $ident:ident $(= $string_keyword:expr)? - ),*) => { - #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] - #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] - #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] - #[allow(non_camel_case_types)] - pub enum Keyword { - NoKeyword, - $($ident),* - } +include!(concat!(env!("OUT_DIR"), "/keyword_gen.rs")); - pub const ALL_KEYWORDS_INDEX: &[Keyword] = &[ - $(Keyword::$ident),* - ]; - - $(kw_def!($ident $(= $string_keyword)?);)* - pub const ALL_KEYWORDS: &[&str] = &[ - $($ident),* - ]; - }; +pub fn lookup(keyword: &str) -> Keyword { + let keyword = keyword.to_ascii_uppercase(); + *KEYWORD_MAP.get(&keyword).unwrap_or(&Keyword::NoKeyword) } -// The following keywords should be sorted to be able to match using binary search -define_keywords!( - ABORT, - ABS, - ABSENT, - ABSOLUTE, - ACCESS, - ACCOUNT, - ACTION, - ADD, - ADMIN, - AFTER, - AGAINST, - AGGREGATION, - ALIAS, - ALL, - ALLOCATE, - ALTER, - ALWAYS, - ANALYZE, - AND, - ANTI, - ANY, - APPLICATION, - APPLY, - ARCHIVE, - ARE, - ARRAY, - ARRAY_MAX_CARDINALITY, - AS, - ASC, - ASENSITIVE, - ASOF, - ASSERT, - ASYMMETRIC, - AT, - ATOMIC, - ATTACH, - AUTHORIZATION, - AUTO, - AUTOINCREMENT, - AUTO_INCREMENT, - AVG, - AVRO, - BACKWARD, - BASE64, - BEFORE, - BEGIN, - BEGIN_FRAME, - BEGIN_PARTITION, - BETWEEN, - BIGDECIMAL, - BIGINT, - BIGNUMERIC, - BINARY, - BINDING, - BIT, - BLOB, - BLOOMFILTER, - BOOL, - BOOLEAN, - BOTH, - BROWSE, - BTREE, - BUCKETS, - BY, - BYPASSRLS, - BYTEA, - BYTES, - CACHE, - CALL, - CALLED, - CARDINALITY, - CASCADE, - CASCADED, - CASE, - CAST, - CATALOG, - CEIL, - CEILING, - CENTURY, - CHAIN, - CHANGE, - CHANGE_TRACKING, - CHANNEL, - CHAR, - CHARACTER, - CHARACTERS, - CHARACTER_LENGTH, - CHARSET, - CHAR_LENGTH, - CHECK, - CLEAR, - CLOB, - CLONE, - CLOSE, - CLUSTER, - CLUSTERED, - CLUSTERING, - COALESCE, - COLLATE, - COLLATION, - COLLECT, - COLLECTION, - COLUMN, - COLUMNS, - COLUMNSTORE, - COMMENT, - COMMIT, - COMMITTED, - COMPRESSION, - COMPUTE, - CONCURRENTLY, - CONDITION, - CONFLICT, - CONNECT, - CONNECTION, - CONSTRAINT, - CONTAINS, - CONTINUE, - CONVERT, - COPY, - COPY_OPTIONS, - CORR, - CORRESPONDING, - COUNT, - COVAR_POP, - COVAR_SAMP, - CREATE, - CREATEDB, - CREATEROLE, - CREDENTIALS, - CROSS, - CSV, - CUBE, - CUME_DIST, - CURRENT, - CURRENT_CATALOG, - CURRENT_DATE, - CURRENT_DEFAULT_TRANSFORM_GROUP, - CURRENT_PATH, - CURRENT_ROLE, - CURRENT_ROW, - CURRENT_SCHEMA, - CURRENT_TIME, - CURRENT_TIMESTAMP, - CURRENT_TRANSFORM_GROUP_FOR_TYPE, - CURRENT_USER, - CURSOR, - CYCLE, - DATA, - DATABASE, - DATABASES, - DATA_RETENTION_TIME_IN_DAYS, - DATE, - DATE32, - DATETIME, - DATETIME64, - DAY, - DAYOFWEEK, - DAYOFYEAR, - DEALLOCATE, - DEC, - DECADE, - DECIMAL, - DECLARE, - DEDUPLICATE, - DEFAULT, - DEFAULT_DDL_COLLATION, - DEFERRABLE, - DEFERRED, - DEFINE, - DEFINED, - DELAYED, - DELETE, - DELIMITED, - DELIMITER, - DELTA, - DENSE_RANK, - DEREF, - DESC, - DESCRIBE, - DETACH, - DETAIL, - DETERMINISTIC, - DIRECTORY, - DISABLE, - DISCARD, - DISCONNECT, - DISTINCT, - DISTRIBUTE, - DIV, - DO, - DOUBLE, - DOW, - DOY, - DROP, - DRY, - DUPLICATE, - DYNAMIC, - EACH, - ELEMENT, - ELEMENTS, - ELSE, - EMPTY, - ENABLE, - ENABLE_SCHEMA_EVOLUTION, - ENCODING, - ENCRYPTION, - END, - END_EXEC = "END-EXEC", - ENDPOINT, - END_FRAME, - END_PARTITION, - ENFORCED, - ENGINE, - ENUM, - ENUM16, - ENUM8, - EPHEMERAL, - EPOCH, - EQUALS, - ERROR, - ESCAPE, - ESCAPED, - EVENT, - EVERY, - EXCEPT, - EXCEPTION, - EXCLUDE, - EXCLUSIVE, - EXEC, - EXECUTE, - EXISTS, - EXP, - EXPANSION, - EXPLAIN, - EXPLICIT, - EXPORT, - EXTENDED, - EXTENSION, - EXTERNAL, - EXTRACT, - FAIL, - FALSE, - FETCH, - FIELDS, - FILE, - FILES, - FILE_FORMAT, - FILL, - FILTER, - FINAL, - FIRST, - FIRST_VALUE, - FIXEDSTRING, - FLOAT, - FLOAT32, - FLOAT4, - FLOAT64, - FLOAT8, - FLOOR, - FLUSH, - FOLLOWING, - FOR, - FORCE, - FORCE_NOT_NULL, - FORCE_NULL, - FORCE_QUOTE, - FOREIGN, - FORMAT, - FORMATTED, - FORWARD, - FRAME_ROW, - FREE, - FREEZE, - FROM, - FSCK, - FULL, - FULLTEXT, - FUNCTION, - FUNCTIONS, - FUSION, - GENERAL, - GENERATE, - GENERATED, - GEOGRAPHY, - GET, - GLOBAL, - GRANT, - GRANTED, - GRANTS, - GRAPHVIZ, - GROUP, - GROUPING, - GROUPS, - HASH, - HAVING, - HEADER, - HEAP, - HIGH_PRIORITY, - HISTORY, - HIVEVAR, - HOLD, - HOSTS, - HOUR, - HOURS, - ID, - IDENTITY, - IF, - IGNORE, - ILIKE, - IMMEDIATE, - IMMUTABLE, - IN, - INCLUDE, - INCLUDE_NULL_VALUES, - INCREMENT, - INDEX, - INDICATOR, - INHERIT, - INITIALLY, - INNER, - INOUT, - INPATH, - INPUT, - INPUTFORMAT, - INSENSITIVE, - INSERT, - INSTALL, - INSTEAD, - INT, - INT128, - INT16, - INT2, - INT256, - INT32, - INT4, - INT64, - INT8, - INTEGER, - INTERPOLATE, - INTERSECT, - INTERSECTION, - INTERVAL, - INTO, - IS, - ISODOW, - ISOLATION, - ISOWEEK, - ISOYEAR, - ITEMS, - JAR, - JOIN, - JSON, - JSONB, - JSONFILE, - JSON_TABLE, - JULIAN, - KEY, - KEYS, - KILL, - LAG, - LANGUAGE, - LARGE, - LAST, - LAST_VALUE, - LATERAL, - LEAD, - LEADING, - LEFT, - LEVEL, - LIKE, - LIKE_REGEX, - LIMIT, - LINES, - LISTEN, - LN, - LOAD, - LOCAL, - LOCALTIME, - LOCALTIMESTAMP, - LOCATION, - LOCK, - LOCKED, - LOGIN, - LOGS, - LONGBLOB, - LONGTEXT, - LOWCARDINALITY, - LOWER, - LOW_PRIORITY, - MACRO, - MANAGEDLOCATION, - MAP, - MASKING, - MATCH, - MATCHED, - MATCHES, - MATCH_CONDITION, - MATCH_RECOGNIZE, - MATERIALIZE, - MATERIALIZED, - MAX, - MAXVALUE, - MAX_DATA_EXTENSION_TIME_IN_DAYS, - MEASURES, - MEDIUMBLOB, - MEDIUMINT, - MEDIUMTEXT, - MEMBER, - MERGE, - METADATA, - METHOD, - MICROSECOND, - MICROSECONDS, - MILLENIUM, - MILLENNIUM, - MILLISECOND, - MILLISECONDS, - MIN, - MINUTE, - MINVALUE, - MOD, - MODE, - MODIFIES, - MODIFY, - MODULE, - MONTH, - MSCK, - MULTISET, - MUTATION, - NAME, - NANOSECOND, - NANOSECONDS, - NATIONAL, - NATURAL, - NCHAR, - NCLOB, - NESTED, - NEW, - NEXT, - NO, - NOBYPASSRLS, - NOCREATEDB, - NOCREATEROLE, - NOINHERIT, - NOLOGIN, - NONE, - NOORDER, - NOREPLICATION, - NORMALIZE, - NOSCAN, - NOSUPERUSER, - NOT, - NOTHING, - NOTIFY, - NOWAIT, - NO_WRITE_TO_BINLOG, - NTH_VALUE, - NTILE, - NULL, - NULLABLE, - NULLIF, - NULLS, - NUMERIC, - NVARCHAR, - OBJECT, - OCCURRENCES_REGEX, - OCTETS, - OCTET_LENGTH, - OF, - OFFSET, - OLD, - OMIT, - ON, - ONE, - ONLY, - OPEN, - OPENJSON, - OPERATOR, - OPTIMIZE, - OPTIMIZER_COSTS, - OPTION, - OPTIONS, - OR, - ORC, - ORDER, - ORDINALITY, - OUT, - OUTER, - OUTPUTFORMAT, - OVER, - OVERFLOW, - OVERLAPS, - OVERLAY, - OVERWRITE, - OWNED, - OWNER, - PARALLEL, - PARAMETER, - PARQUET, - PART, - PARTITION, - PARTITIONED, - PARTITIONS, - PASSWORD, - PAST, - PATH, - PATTERN, - PER, - PERCENT, - PERCENTILE_CONT, - PERCENTILE_DISC, - PERCENT_RANK, - PERIOD, - PERMISSIVE, - PERSISTENT, - PIVOT, - PLACING, - PLAN, - PLANS, - POLICY, - PORTION, - POSITION, - POSITION_REGEX, - POWER, - PRAGMA, - PRECEDES, - PRECEDING, - PRECISION, - PREPARE, - PRESERVE, - PREWHERE, - PRIMARY, - PRIOR, - PRIVILEGES, - PROCEDURE, - PROGRAM, - PROJECTION, - PURGE, - QUALIFY, - QUARTER, - QUERY, - QUOTE, - RANGE, - RANK, - RAW, - RCFILE, - READ, - READS, - READ_ONLY, - REAL, - RECLUSTER, - RECURSIVE, - REF, - REFERENCES, - REFERENCING, - REGCLASS, - REGEXP, - REGR_AVGX, - REGR_AVGY, - REGR_COUNT, - REGR_INTERCEPT, - REGR_R2, - REGR_SLOPE, - REGR_SXX, - REGR_SXY, - REGR_SYY, - RELATIVE, - RELAY, - RELEASE, - REMOTE, - RENAME, - REORG, - REPAIR, - REPEATABLE, - REPLACE, - REPLICA, - REPLICATION, - RESET, - RESPECT, - RESTART, - RESTRICT, - RESTRICTED, - RESTRICTIVE, - RESULT, - RESULTSET, - RESUME, - RETAIN, - RETURN, - RETURNING, - RETURNS, - REVOKE, - RIGHT, - RLIKE, - ROLE, - ROLES, - ROLLBACK, - ROLLUP, - ROOT, - ROW, - ROWID, - ROWS, - ROW_NUMBER, - RULE, - RUN, - SAFE, - SAFE_CAST, - SAVEPOINT, - SCHEMA, - SCHEMAS, - SCOPE, - SCROLL, - SEARCH, - SECOND, - SECONDARY, - SECRET, - SECURITY, - SELECT, - SEMI, - SENSITIVE, - SEPARATOR, - SEQUENCE, - SEQUENCEFILE, - SEQUENCES, - SERDE, - SERDEPROPERTIES, - SERIALIZABLE, - SESSION, - SESSION_USER, - SET, - SETS, - SETTINGS, - SHARE, - SHOW, - SIMILAR, - SKIP, - SLOW, - SMALLINT, - SNAPSHOT, - SOME, - SORT, - SORTED, - SOURCE, - SPATIAL, - SPECIFIC, - SPECIFICTYPE, - SQL, - SQLEXCEPTION, - SQLSTATE, - SQLWARNING, - SQRT, - STABLE, - STAGE, - START, - STARTS, - STATEMENT, - STATIC, - STATISTICS, - STATUS, - STDDEV_POP, - STDDEV_SAMP, - STDIN, - STDOUT, - STEP, - STORAGE_INTEGRATION, - STORED, - STRICT, - STRING, - STRUCT, - SUBMULTISET, - SUBSTRING, - SUBSTRING_REGEX, - SUCCEEDS, - SUM, - SUPER, - SUPERUSER, - SUSPEND, - SWAP, - SYMMETRIC, - SYNC, - SYSTEM, - SYSTEM_TIME, - SYSTEM_USER, - TABLE, - TABLES, - TABLESAMPLE, - TAG, - TARGET, - TBLPROPERTIES, - TEMP, - TEMPORARY, - TERMINATED, - TERSE, - TEXT, - TEXTFILE, - THEN, - TIES, - TIME, - TIMESTAMP, - TIMESTAMPTZ, - TIMETZ, - TIMEZONE, - TIMEZONE_ABBR, - TIMEZONE_HOUR, - TIMEZONE_MINUTE, - TIMEZONE_REGION, - TINYBLOB, - TINYINT, - TINYTEXT, - TO, - TOP, - TOTALS, - TRAILING, - TRANSACTION, - TRANSIENT, - TRANSLATE, - TRANSLATE_REGEX, - TRANSLATION, - TREAT, - TRIGGER, - TRIM, - TRIM_ARRAY, - TRUE, - TRUNCATE, - TRY_CAST, - TRY_CONVERT, - TUPLE, - TYPE, - UESCAPE, - UINT128, - UINT16, - UINT256, - UINT32, - UINT64, - UINT8, - UNBOUNDED, - UNCACHE, - UNCOMMITTED, - UNFREEZE, - UNION, - UNIQUE, - UNKNOWN, - UNLISTEN, - UNLOAD, - UNLOCK, - UNLOGGED, - UNMATCHED, - UNNEST, - UNPIVOT, - UNSAFE, - UNSIGNED, - UNTIL, - UPDATE, - UPPER, - URL, - USAGE, - USE, - USER, - USER_RESOURCES, - USING, - UUID, - VACUUM, - VALID, - VALIDATION_MODE, - VALUE, - VALUES, - VALUE_OF, - VARBINARY, - VARCHAR, - VARIABLES, - VARYING, - VAR_POP, - VAR_SAMP, - VERBOSE, - VERSION, - VERSIONING, - VIEW, - VIEWS, - VIRTUAL, - VOLATILE, - WAREHOUSE, - WEEK, - WHEN, - WHENEVER, - WHERE, - WIDTH_BUCKET, - WINDOW, - WITH, - WITHIN, - WITHOUT, - WITHOUT_ARRAY_WRAPPER, - WORK, - WRITE, - XML, - XOR, - YEAR, - ZONE, - ZORDER -); - /// These keywords can't be used as a table alias, so that `FROM table_name alias` /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ diff --git a/src/keywords.txt b/src/keywords.txt new file mode 100644 index 000000000..d2611b4e1 --- /dev/null +++ b/src/keywords.txt @@ -0,0 +1,821 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# The list of keywords for the parser. +# +# The format of this is two columns (even though it doesn't look like it). If +# the second column is non-empty, that's the format used in actual SQL parsing +# while the first column is used by the Keyword enum for Rust symbols. + +ABORT +ABS +ABSENT +ABSOLUTE +ACCESS +ACCOUNT +ACTION +ADD +ADMIN +AFTER +AGAINST +AGGREGATION +ALIAS +ALL +ALLOCATE +ALTER +ALWAYS +ANALYZE +AND +ANTI +ANY +APPLICATION +APPLY +ARCHIVE +ARE +ARRAY +ARRAY_MAX_CARDINALITY +AS +ASC +ASENSITIVE +ASOF +ASSERT +ASYMMETRIC +AT +ATOMIC +ATTACH +AUTHORIZATION +AUTO +AUTOINCREMENT +AUTO_INCREMENT +AVG +AVRO +BACKWARD +BASE64 +BEFORE +BEGIN +BEGIN_FRAME +BEGIN_PARTITION +BETWEEN +BIGDECIMAL +BIGINT +BIGNUMERIC +BINARY +BINDING +BIT +BLOB +BLOOMFILTER +BOOL +BOOLEAN +BOTH +BROWSE +BTREE +BUCKETS +BY +BYPASSRLS +BYTEA +BYTES +CACHE +CALL +CALLED +CARDINALITY +CASCADE +CASCADED +CASE +CAST +CATALOG +CEIL +CEILING +CENTURY +CHAIN +CHANGE +CHANGE_TRACKING +CHANNEL +CHAR +CHARACTER +CHARACTERS +CHARACTER_LENGTH +CHARSET +CHAR_LENGTH +CHECK +CLEAR +CLOB +CLONE +CLOSE +CLUSTER +CLUSTERED +CLUSTERING +COALESCE +COLLATE +COLLATION +COLLECT +COLLECTION +COLUMN +COLUMNS +COLUMNSTORE +COMMENT +COMMIT +COMMITTED +COMPRESSION +COMPUTE +CONCURRENTLY +CONDITION +CONFLICT +CONNECT +CONNECTION +CONSTRAINT +CONTAINS +CONTINUE +CONVERT +COPY +COPY_OPTIONS +CORR +CORRESPONDING +COUNT +COVAR_POP +COVAR_SAMP +CREATE +CREATEDB +CREATEROLE +CREDENTIALS +CROSS +CSV +CUBE +CUME_DIST +CURRENT +CURRENT_CATALOG +CURRENT_DATE +CURRENT_DEFAULT_TRANSFORM_GROUP +CURRENT_PATH +CURRENT_ROLE +CURRENT_ROW +CURRENT_SCHEMA +CURRENT_TIME +CURRENT_TIMESTAMP +CURRENT_TRANSFORM_GROUP_FOR_TYPE +CURRENT_USER +CURSOR +CYCLE +DATA +DATABASE +DATABASES +DATA_RETENTION_TIME_IN_DAYS +DATE +DATE32 +DATETIME +DATETIME64 +DAY +DAYOFWEEK +DAYOFYEAR +DEALLOCATE +DEC +DECADE +DECIMAL +DECLARE +DEDUPLICATE +DEFAULT +DEFAULT_DDL_COLLATION +DEFERRABLE +DEFERRED +DEFINE +DEFINED +DELAYED +DELETE +DELIMITED +DELIMITER +DELTA +DENSE_RANK +DEREF +DESC +DESCRIBE +DETACH +DETAIL +DETERMINISTIC +DIRECTORY +DISABLE +DISCARD +DISCONNECT +DISTINCT +DISTRIBUTE +DIV +DO +DOUBLE +DOW +DOY +DROP +DRY +DUPLICATE +DYNAMIC +EACH +ELEMENT +ELEMENTS +ELSE +EMPTY +ENABLE +ENABLE_SCHEMA_EVOLUTION +ENCODING +ENCRYPTION +END +END_EXEC END-EXEC +ENDPOINT +END_FRAME +END_PARTITION +ENFORCED +ENGINE +ENUM +ENUM16 +ENUM8 +EPHEMERAL +EPOCH +EQUALS +ERROR +ESCAPE +ESCAPED +EVENT +EVERY +EXCEPT +EXCEPTION +EXCLUDE +EXCLUSIVE +EXEC +EXECUTE +EXISTS +EXP +EXPANSION +EXPLAIN +EXPLICIT +EXPORT +EXTENDED +EXTENSION +EXTERNAL +EXTRACT +FAIL +FALSE +FETCH +FIELDS +FILE +FILES +FILE_FORMAT +FILL +FILTER +FINAL +FIRST +FIRST_VALUE +FIXEDSTRING +FLOAT +FLOAT32 +FLOAT4 +FLOAT64 +FLOAT8 +FLOOR +FLUSH +FOLLOWING +FOR +FORCE +FORCE_NOT_NULL +FORCE_NULL +FORCE_QUOTE +FOREIGN +FORMAT +FORMATTED +FORWARD +FRAME_ROW +FREE +FREEZE +FROM +FSCK +FULL +FULLTEXT +FUNCTION +FUNCTIONS +FUSION +GENERAL +GENERATE +GENERATED +GEOGRAPHY +GET +GLOBAL +GRANT +GRANTED +GRANTS +GRAPHVIZ +GROUP +GROUPING +GROUPS +HASH +HAVING +HEADER +HEAP +HIGH_PRIORITY +HISTORY +HIVEVAR +HOLD +HOSTS +HOUR +HOURS +ID +IDENTITY +IF +IGNORE +ILIKE +IMMEDIATE +IMMUTABLE +IN +INCLUDE +INCLUDE_NULL_VALUES +INCREMENT +INDEX +INDICATOR +INHERIT +INITIALLY +INNER +INOUT +INPATH +INPUT +INPUTFORMAT +INSENSITIVE +INSERT +INSTALL +INSTEAD +INT +INT128 +INT16 +INT2 +INT256 +INT32 +INT4 +INT64 +INT8 +INTEGER +INTERPOLATE +INTERSECT +INTERSECTION +INTERVAL +INTO +IS +ISODOW +ISOLATION +ISOWEEK +ISOYEAR +ITEMS +JAR +JOIN +JSON +JSONB +JSONFILE +JSON_TABLE +JULIAN +KEY +KEYS +KILL +LAG +LANGUAGE +LARGE +LAST +LAST_VALUE +LATERAL +LEAD +LEADING +LEFT +LEVEL +LIKE +LIKE_REGEX +LIMIT +LINES +LISTEN +LN +LOAD +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOCATION +LOCK +LOCKED +LOGIN +LOGS +LONGBLOB +LONGTEXT +LOWCARDINALITY +LOWER +LOW_PRIORITY +MACRO +MANAGEDLOCATION +MAP +MASKING +MATCH +MATCHED +MATCHES +MATCH_CONDITION +MATCH_RECOGNIZE +MATERIALIZE +MATERIALIZED +MAX +MAXVALUE +MAX_DATA_EXTENSION_TIME_IN_DAYS +MEASURES +MEDIUMBLOB +MEDIUMINT +MEDIUMTEXT +MEMBER +MERGE +METADATA +METHOD +MICROSECOND +MICROSECONDS +MILLENIUM +MILLENNIUM +MILLISECOND +MILLISECONDS +MIN +MINUTE +MINVALUE +MOD +MODE +MODIFIES +MODIFY +MODULE +MONTH +MSCK +MULTISET +MUTATION +NAME +NANOSECOND +NANOSECONDS +NATIONAL +NATURAL +NCHAR +NCLOB +NESTED +NEW +NEXT +NO +NOBYPASSRLS +NOCREATEDB +NOCREATEROLE +NOINHERIT +NOLOGIN +NONE +NOORDER +NOREPLICATION +NORMALIZE +NOSCAN +NOSUPERUSER +NOT +NOTHING +NOTIFY +NOWAIT +NO_WRITE_TO_BINLOG +NTH_VALUE +NTILE +NULL +NULLABLE +NULLIF +NULLS +NUMERIC +NVARCHAR +OBJECT +OCCURRENCES_REGEX +OCTETS +OCTET_LENGTH +OF +OFFSET +OLD +OMIT +ON +ONE +ONLY +OPEN +OPENJSON +OPERATOR +OPTIMIZE +OPTIMIZER_COSTS +OPTION +OPTIONS +OR +ORC +ORDER +ORDINALITY +OUT +OUTER +OUTPUTFORMAT +OVER +OVERFLOW +OVERLAPS +OVERLAY +OVERWRITE +OWNED +OWNER +PARALLEL +PARAMETER +PARQUET +PART +PARTITION +PARTITIONED +PARTITIONS +PASSWORD +PAST +PATH +PATTERN +PER +PERCENT +PERCENTILE_CONT +PERCENTILE_DISC +PERCENT_RANK +PERIOD +PERMISSIVE +PERSISTENT +PIVOT +PLACING +PLAN +PLANS +POLICY +PORTION +POSITION +POSITION_REGEX +POWER +PRAGMA +PRECEDES +PRECEDING +PRECISION +PREPARE +PRESERVE +PREWHERE +PRIMARY +PRIOR +PRIVILEGES +PROCEDURE +PROGRAM +PROJECTION +PURGE +QUALIFY +QUARTER +QUERY +QUOTE +RANGE +RANK +RAW +RCFILE +READ +READS +READ_ONLY +REAL +RECLUSTER +RECURSIVE +REF +REFERENCES +REFERENCING +REGCLASS +REGEXP +REGR_AVGX +REGR_AVGY +REGR_COUNT +REGR_INTERCEPT +REGR_R2 +REGR_SLOPE +REGR_SXX +REGR_SXY +REGR_SYY +RELATIVE +RELAY +RELEASE +REMOTE +RENAME +REORG +REPAIR +REPEATABLE +REPLACE +REPLICA +REPLICATION +RESET +RESPECT +RESTART +RESTRICT +RESTRICTED +RESTRICTIVE +RESULT +RESULTSET +RESUME +RETAIN +RETURN +RETURNING +RETURNS +REVOKE +RIGHT +RLIKE +ROLE +ROLES +ROLLBACK +ROLLUP +ROOT +ROW +ROWID +ROWS +ROW_NUMBER +RULE +RUN +SAFE +SAFE_CAST +SAVEPOINT +SCHEMA +SCHEMAS +SCOPE +SCROLL +SEARCH +SECOND +SECONDARY +SECRET +SECURITY +SELECT +SEMI +SENSITIVE +SEPARATOR +SEQUENCE +SEQUENCEFILE +SEQUENCES +SERDE +SERDEPROPERTIES +SERIALIZABLE +SESSION +SESSION_USER +SET +SETS +SETTINGS +SHARE +SHOW +SIMILAR +SKIP +SLOW +SMALLINT +SNAPSHOT +SOME +SORT +SORTED +SOURCE +SPATIAL +SPECIFIC +SPECIFICTYPE +SQL +SQLEXCEPTION +SQLSTATE +SQLWARNING +SQRT +STABLE +STAGE +START +STARTS +STATEMENT +STATIC +STATISTICS +STATUS +STDDEV_POP +STDDEV_SAMP +STDIN +STDOUT +STEP +STORAGE_INTEGRATION +STORED +STRICT +STRING +STRUCT +SUBMULTISET +SUBSTRING +SUBSTRING_REGEX +SUCCEEDS +SUM +SUPER +SUPERUSER +SUSPEND +SWAP +SYMMETRIC +SYNC +SYSTEM +SYSTEM_TIME +SYSTEM_USER +TABLE +TABLES +TABLESAMPLE +TAG +TARGET +TBLPROPERTIES +TEMP +TEMPORARY +TERMINATED +TERSE +TEXT +TEXTFILE +THEN +TIES +TIME +TIMESTAMP +TIMESTAMPTZ +TIMETZ +TIMEZONE +TIMEZONE_ABBR +TIMEZONE_HOUR +TIMEZONE_MINUTE +TIMEZONE_REGION +TINYBLOB +TINYINT +TINYTEXT +TO +TOP +TOTALS +TRAILING +TRANSACTION +TRANSIENT +TRANSLATE +TRANSLATE_REGEX +TRANSLATION +TREAT +TRIGGER +TRIM +TRIM_ARRAY +TRUE +TRUNCATE +TRY_CAST +TRY_CONVERT +TUPLE +TYPE +UESCAPE +UINT128 +UINT16 +UINT256 +UINT32 +UINT64 +UINT8 +UNBOUNDED +UNCACHE +UNCOMMITTED +UNFREEZE +UNION +UNIQUE +UNKNOWN +UNLISTEN +UNLOAD +UNLOCK +UNLOGGED +UNMATCHED +UNNEST +UNPIVOT +UNSAFE +UNSIGNED +UNTIL +UPDATE +UPPER +URL +USAGE +USE +USER +USER_RESOURCES +USING +UUID +VACUUM +VALID +VALIDATION_MODE +VALUE +VALUES +VALUE_OF +VARBINARY +VARCHAR +VARIABLES +VARYING +VAR_POP +VAR_SAMP +VERBOSE +VERSION +VERSIONING +VIEW +VIEWS +VIRTUAL +VOLATILE +WAREHOUSE +WEEK +WHEN +WHENEVER +WHERE +WIDTH_BUCKET +WINDOW +WITH +WITHIN +WITHOUT +WITHOUT_ARRAY_WRAPPER +WORK +WRITE +XML +XOR +YEAR +ZONE +ZORDER diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..2279160d3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -46,7 +46,7 @@ use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SnowflakeDialect, }; -use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; +use crate::keywords::{self, Keyword}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -344,13 +344,11 @@ impl Token { } pub fn make_word(word: &str, quote_style: Option) -> Self { - let word_uppercase = word.to_uppercase(); Token::Word(Word { value: word.to_string(), quote_style, keyword: if quote_style.is_none() { - let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str()); - keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) + keywords::lookup(word) } else { Keyword::NoKeyword },