Skip to content

Commit 55e5314

Browse files
majetideepakfacebook-github-bot
authored andcommitted
feat: Support Presto quoted_identifier semantics in parseTypeSignature (facebookincubator#13658)
Summary: Presto supports ``` QUOTED_IDENTIFIER : '"' ( ~'"' | '""' )* '"' ; ``` The semantics are ``` Any character other than double quote is allowed between double quotes And double double quote can be used to "escape" double quote ``` Fixes: facebookincubator#13648 Pull Request resolved: facebookincubator#13658 Reviewed By: mbasmanova, kagamiori Differential Revision: D76071088 Pulled By: kKPulla fbshipit-source-id: c1af839d997092fbaa075911d426359e236ecaf2
1 parent 7e299a6 commit 55e5314

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

velox/expression/signature_parser/SignatureParser.ll

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,30 @@
55
#include "velox/expression/signature_parser/SignatureParser.yy.h" // @manual
66
#include "velox/expression/signature_parser/Scanner.h"
77
#define YY_DECL int facebook::velox::exec::Scanner::lex(facebook::velox::exec::Parser::semantic_type *yylval)
8+
9+
std::string unescape_doublequote(const char* yytext) {
10+
size_t len = strlen(yytext);
11+
std::string output;
12+
output.resize(len);
13+
14+
int i = 0;
15+
int j = 0;
16+
17+
while (i < len - 1) {
18+
if (yytext[i] == '"' && yytext[i+1] == '"') {
19+
output[j++] = '"';
20+
i += 2;
21+
} else {
22+
output[j++] = yytext[i++];
23+
}
24+
}
25+
// Check if the last character needs to be added.
26+
if (i < len) {
27+
output[j++] = yytext[i++];
28+
}
29+
output.resize(j);
30+
return output;
31+
}
832
%}
933
1034
%option c++ noyywrap noyylineno nodefault caseless
@@ -34,7 +58,7 @@ Y [Y|y]
3458
Z [Z|z]
3559
3660
WORD ([[:alnum:]_]*)
37-
QUOTED_ID (['"'][[:alnum:][:space:]_]*['"'])
61+
QUOTED_ID (\"([^\"\n]|\"\")*\")
3862
ROW (ROW|STRUCT)
3963
4064
%%
@@ -48,7 +72,7 @@ ROW (ROW|STRUCT)
4872
(DECIMAL) yylval->build<std::string>(YYText()); return Parser::token::DECIMAL;
4973
{ROW} return Parser::token::ROW;
5074
{WORD} yylval->build<std::string>(YYText()); return Parser::token::WORD;
51-
{QUOTED_ID} yylval->build<std::string>(YYText()); return Parser::token::QUOTED_ID;
75+
{QUOTED_ID} {auto val = unescape_doublequote(YYText()); yylval->build<std::string>(val.c_str()); return Parser::token::QUOTED_ID;}
5276
<<EOF>> return Parser::token::YYEOF;
5377
. /* no action on unmatched input */
5478

velox/expression/signature_parser/tests/ParseTypeSignatureTest.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,35 @@ TEST_F(ParseTypeSignatureTest, row) {
230230
ASSERT_EQ(rowfield.rowFieldName(), "bla");
231231
ASSERT_EQ(rowfield.parameters().size(), 0);
232232
}
233+
234+
{
235+
auto signature = parseTypeSignature("row(\"a (b)\" INTEGER)");
236+
ASSERT_EQ(signature.baseName(), "row");
237+
ASSERT_EQ(signature.parameters().size(), 1);
238+
auto field0 = signature.parameters()[0];
239+
ASSERT_EQ(field0.baseName(), "INTEGER");
240+
ASSERT_EQ(field0.rowFieldName(), "a (b)");
241+
}
242+
243+
// Test double double quote escape
244+
{
245+
auto signature = parseTypeSignature(
246+
"row(\"a\"\"b\" INTEGER, \"\"\"ab\" INTEGER, \"ab\"\"\"\"\" INTEGER)");
247+
ASSERT_EQ(signature.baseName(), "row");
248+
ASSERT_EQ(signature.parameters().size(), 3);
249+
auto field0 = signature.parameters()[0];
250+
ASSERT_EQ(field0.baseName(), "INTEGER");
251+
ASSERT_EQ(field0.rowFieldName(), "a\"b");
252+
auto field1 = signature.parameters()[1];
253+
ASSERT_EQ(field1.baseName(), "INTEGER");
254+
ASSERT_EQ(field1.rowFieldName(), "\"ab");
255+
auto field2 = signature.parameters()[2];
256+
ASSERT_EQ(field2.baseName(), "INTEGER");
257+
ASSERT_EQ(field2.rowFieldName(), "ab\"\"");
258+
}
259+
260+
// Single double quote is an error.
261+
EXPECT_THROW(parseTypeSignature("row(\"a\"b\" INTEGER)"), VeloxRuntimeError);
233262
}
234263

235264
TEST_F(ParseTypeSignatureTest, tdigest) {

0 commit comments

Comments
 (0)