Skip to content

Commit 09dfc94

Browse files
authored
Revert "Support underscore separators in numbers for Clickhouse. Fixes apache#1659 (apache#1677)"
This reverts commit 8fb7a02.
1 parent 34bbeb4 commit 09dfc94

File tree

5 files changed

+3
-99
lines changed

5 files changed

+3
-99
lines changed

src/dialect/clickhouse.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,6 @@ impl Dialect for ClickHouseDialect {
5959
true
6060
}
6161

62-
fn supports_numeric_literal_underscores(&self) -> bool {
63-
true
64-
}
65-
6662
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
6763
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
6864
//

src/dialect/mod.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,6 @@ pub trait Dialect: Debug + Any {
304304
false
305305
}
306306

307-
/// Returns true if the dialect supports numbers containing underscores, e.g. `10_000_000`
308-
fn supports_numeric_literal_underscores(&self) -> bool {
309-
false
310-
}
311-
312307
/// Returns true if the dialects supports specifying null treatment
313308
/// as part of a window function's parameter list as opposed
314309
/// to after the parameter list.

src/dialect/postgresql.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,6 @@ impl Dialect for PostgreSqlDialect {
249249
fn supports_string_escape_constant(&self) -> bool {
250250
true
251251
}
252-
253-
fn supports_numeric_literal_underscores(&self) -> bool {
254-
true
255-
}
256252
}
257253

258254
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

src/tokenizer.rs

Lines changed: 3 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,24 +1136,12 @@ impl<'a> Tokenizer<'a> {
11361136
}
11371137
// numbers and period
11381138
'0'..='9' | '.' => {
1139-
// Some dialects support underscore as number separator
1140-
// There can only be one at a time and it must be followed by another digit
1141-
let is_number_separator = |ch: char, next_char: Option<char>| {
1142-
self.dialect.supports_numeric_literal_underscores()
1143-
&& ch == '_'
1144-
&& next_char.is_some_and(|next_ch| next_ch.is_ascii_hexdigit())
1145-
};
1146-
1147-
let mut s = peeking_next_take_while(chars, |ch, next_ch| {
1148-
ch.is_ascii_digit() || is_number_separator(ch, next_ch)
1149-
});
1139+
let mut s = peeking_take_while(chars, |ch| ch.is_ascii_digit());
11501140

11511141
// match binary literal that starts with 0x
11521142
if s == "0" && chars.peek() == Some(&'x') {
11531143
chars.next();
1154-
let s2 = peeking_next_take_while(chars, |ch, next_ch| {
1155-
ch.is_ascii_hexdigit() || is_number_separator(ch, next_ch)
1156-
});
1144+
let s2 = peeking_take_while(chars, |ch| ch.is_ascii_hexdigit());
11571145
return Ok(Some(Token::HexStringLiteral(s2)));
11581146
}
11591147

@@ -1162,10 +1150,7 @@ impl<'a> Tokenizer<'a> {
11621150
s.push('.');
11631151
chars.next();
11641152
}
1165-
1166-
s += &peeking_next_take_while(chars, |ch, next_ch| {
1167-
ch.is_ascii_digit() || is_number_separator(ch, next_ch)
1168-
});
1153+
s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
11691154

11701155
// No number -> Token::Period
11711156
if s == "." {
@@ -1961,24 +1946,6 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool
19611946
s
19621947
}
19631948

1964-
/// Same as peeking_take_while, but also passes the next character to the predicate.
1965-
fn peeking_next_take_while(
1966-
chars: &mut State,
1967-
mut predicate: impl FnMut(char, Option<char>) -> bool,
1968-
) -> String {
1969-
let mut s = String::new();
1970-
while let Some(&ch) = chars.peek() {
1971-
let next_char = chars.peekable.clone().nth(1);
1972-
if predicate(ch, next_char) {
1973-
chars.next(); // consume
1974-
s.push(ch);
1975-
} else {
1976-
break;
1977-
}
1978-
}
1979-
s
1980-
}
1981-
19821949
fn unescape_single_quoted_string(chars: &mut State<'_>) -> Option<String> {
19831950
Unescape::new(chars).unescape()
19841951
}
@@ -2260,41 +2227,6 @@ mod tests {
22602227
compare(expected, tokens);
22612228
}
22622229

2263-
#[test]
2264-
fn tokenize_numeric_literal_underscore() {
2265-
let dialect = GenericDialect {};
2266-
let sql = String::from("SELECT 10_000");
2267-
let mut tokenizer = Tokenizer::new(&dialect, &sql);
2268-
let tokens = tokenizer.tokenize().unwrap();
2269-
let expected = vec![
2270-
Token::make_keyword("SELECT"),
2271-
Token::Whitespace(Whitespace::Space),
2272-
Token::Number("10".to_string(), false),
2273-
Token::make_word("_000", None),
2274-
];
2275-
compare(expected, tokens);
2276-
2277-
all_dialects_where(|dialect| dialect.supports_numeric_literal_underscores()).tokenizes_to(
2278-
"SELECT 10_000, _10_000, 10_00_, 10___0",
2279-
vec![
2280-
Token::make_keyword("SELECT"),
2281-
Token::Whitespace(Whitespace::Space),
2282-
Token::Number("10_000".to_string(), false),
2283-
Token::Comma,
2284-
Token::Whitespace(Whitespace::Space),
2285-
Token::make_word("_10_000", None), // leading underscore tokenizes as a word (parsed as column identifier)
2286-
Token::Comma,
2287-
Token::Whitespace(Whitespace::Space),
2288-
Token::Number("10_00".to_string(), false),
2289-
Token::make_word("_", None), // trailing underscores tokenizes as a word (syntax error in some dialects)
2290-
Token::Comma,
2291-
Token::Whitespace(Whitespace::Space),
2292-
Token::Number("10".to_string(), false),
2293-
Token::make_word("___0", None), // multiple underscores tokenizes as a word (syntax error in some dialects)
2294-
],
2295-
);
2296-
}
2297-
22982230
#[test]
22992231
fn tokenize_select_exponent() {
23002232
let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10");

tests/sqlparser_clickhouse.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,21 +1649,6 @@ fn parse_table_sample() {
16491649
clickhouse().verified_stmt("SELECT * FROM tbl SAMPLE 1 / 10 OFFSET 1 / 2");
16501650
}
16511651

1652-
#[test]
1653-
fn parse_numbers_with_underscore() {
1654-
let canonical = if cfg!(feature = "bigdecimal") {
1655-
"SELECT 10000"
1656-
} else {
1657-
"SELECT 10_000"
1658-
};
1659-
let select = clickhouse().verified_only_select_with_canonical("SELECT 10_000", canonical);
1660-
1661-
assert_eq!(
1662-
select.projection,
1663-
vec![SelectItem::UnnamedExpr(Expr::Value(number("10_000")))]
1664-
)
1665-
}
1666-
16671652
fn clickhouse() -> TestedDialects {
16681653
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
16691654
}

0 commit comments

Comments
 (0)