Skip to content

Commit e1d7df2

Browse files
committed
[chore] add "copy from file" grammay.
Signed-off-by: clundro <[email protected]>
1 parent 578c927 commit e1d7df2

File tree

4 files changed

+129
-27
lines changed

4 files changed

+129
-27
lines changed

build_support/generate_flex.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# use flex to generate the scanner file for the parser
2+
# the following version of bison is used:
3+
# flex 2.5.35 Apple(flex-32)
4+
import os
5+
import subprocess
6+
import re
7+
from sys import platform
8+
import sys
9+
10+
def open_utf8(fpath, flags):
11+
import sys
12+
if sys.version_info[0] < 3:
13+
return open(fpath, flags)
14+
else:
15+
return open(fpath, flags, encoding="utf8")
16+
17+
flex_bin = 'flex'
18+
for arg in sys.argv[1:]:
19+
if arg.startswith("--flex="):
20+
flex_bin = arg.replace("--flex=", "")
21+
22+
pg_path = os.path.join('third_party', 'libpg_query')
23+
flex_file_path = os.path.join(pg_path, 'scan.l')
24+
target_file = os.path.join(pg_path, 'src_backend_parser_scan.cpp')
25+
26+
proc = subprocess.Popen([flex_bin, '--nounistd', '-o', target_file,
27+
flex_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
28+
stdout = proc.stdout.read().decode('utf8')
29+
stderr = proc.stderr.read().decode('utf8')
30+
if proc.returncode != None or len(stderr) > 0:
31+
print("Flex failed")
32+
print("stdout: ", stdout)
33+
print("stderr: ", stderr)
34+
exit(1)
35+
36+
with open_utf8(target_file, 'r') as f:
37+
text = f.read()
38+
39+
# convert this from 'int' to 'yy_size_t' to avoid triggering a warning
40+
text = text.replace('int yy_buf_size;\n', 'yy_size_t yy_buf_size;\n')
41+
42+
# add the libpg_query namespace
43+
text = text.replace('''
44+
#ifndef FLEXINT_H
45+
#define FLEXINT_H
46+
''', '''
47+
#ifndef FLEXINT_H
48+
#define FLEXINT_H
49+
namespace duckdb_libpgquery {
50+
''')
51+
text = text.replace('register ', '')
52+
53+
text = text + "\n} /* duckdb_libpgquery */\n"
54+
55+
text = re.sub('(?:[(]void[)][ ]*)?fprintf', '//', text)
56+
text = re.sub('exit[(]', 'throw std::runtime_error(msg); //', text)
57+
text = re.sub(
58+
r'\n\s*if\s*[(]\s*!\s*yyin\s*[)]\s*\n\s*yyin\s*=\s*stdin;\s*\n', '\n', text)
59+
text = re.sub(
60+
r'\n\s*if\s*[(]\s*!\s*yyout\s*[)]\s*\n\s*yyout\s*=\s*stdout;\s*\n', '\n', text)
61+
62+
file_null = 'NULL' if platform == 'linux' else '[(]FILE [*][)] 0'
63+
64+
text = re.sub(
65+
rf'[#]ifdef\s*YY_STDINIT\n\s*yyin = stdin;\n\s*yyout = stdout;\n[#]else\n\s*yyin = {file_null};\n\s*yyout = {file_null};\n[#]endif', ' yyin = (FILE *) 0;\n yyout = (FILE *) 0;', text)
66+
67+
if 'stdin;' in text:
68+
print("STDIN not removed!")
69+
# exit(1)
70+
71+
if 'stdout' in text:
72+
print("STDOUT not removed!")
73+
# exit(1)
74+
75+
if 'fprintf(' in text:
76+
print("PRINTF not removed!")
77+
# exit(1)
78+
79+
if 'exit(' in text:
80+
print("EXIT not removed!")
81+
# exit(1)
82+
83+
with open_utf8(target_file, 'w+') as f:
84+
f.write(text)

test/binder/binder_test.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ TEST(BinderTest, BindBinaryOp) {
173173
PrintStatements(statements);
174174
}
175175

176+
TEST(BinderTest, BindCopyFrom) {
177+
auto statements = TryBind("copy from");
178+
PrintStatements(statements);
179+
}
180+
176181
// TODO(chi): subquery is not supported yet
177182
TEST(BinderTest, DISABLED_BindUncorrelatedSubquery) {
178183
auto statements = TryBind("select * from (select * from a) INNER JOIN (select * from b) ON a.x = b.y");

third_party/libpg_query/grammar/statements/copy.y

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,23 @@
1-
CopyStmt: COPY opt_binary qualified_name opt_column_list opt_oids
1+
CopyStmt: COPY FROM copy_file_name
2+
{
3+
PGCopyStmt *n = makeNode(PGCopyStmt);
4+
n->relation = NULL;
5+
n->query = NULL;
6+
n->attlist = NIL;
7+
n->is_from = true;
8+
n->is_program = true;
9+
n->filename = $3;
10+
n->options = NIL;
11+
12+
if (n->is_program && n->filename == NULL)
13+
ereport(ERROR,
14+
(errcode(PG_ERRCODE_SYNTAX_ERROR),
15+
errmsg("COPYFROMFILE not allowed with NULL"),
16+
parser_errposition(@3)));
17+
18+
$$ = (PGNode *)n;
19+
}
20+
| COPY opt_binary qualified_name opt_column_list opt_oids
221
copy_from opt_program copy_file_name copy_delimiter opt_with copy_options
322
{
423
PGCopyStmt *n = makeNode(PGCopyStmt);

third_party/libpg_query/src_backend_parser_scan.cpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#line 1 "third_party/libpg_query/src_backend_parser_scan.cpp"
1+
#line 2 "third_party/libpg_query/src_backend_parser_scan.cpp"
22
/*-------------------------------------------------------------------------
33
*
44
* scan.l
@@ -40,7 +40,7 @@
4040

4141
#include <stdexcept>
4242

43-
#line 43 "third_party/libpg_query/src_backend_parser_scan.cpp"
43+
#line 44 "third_party/libpg_query/src_backend_parser_scan.cpp"
4444

4545
#define YY_INT_ALIGNED short int
4646

@@ -322,7 +322,6 @@ typedef int16_t flex_int16_t;
322322
typedef uint16_t flex_uint16_t;
323323
typedef int32_t flex_int32_t;
324324
typedef uint32_t flex_uint32_t;
325-
typedef uint64_t flex_uint64_t;
326325
#else
327326
typedef signed char flex_int8_t;
328327
typedef short int flex_int16_t;
@@ -482,12 +481,12 @@ struct yy_buffer_state
482481
/* Size of input buffer in bytes, not including room for EOB
483482
* characters.
484483
*/
485-
int yy_buf_size;
484+
yy_size_t yy_buf_size;
486485

487486
/* Number of characters read into yy_ch_buf, not including EOB
488487
* characters.
489488
*/
490-
yy_size_t yy_n_chars;
489+
int yy_n_chars;
491490

492491
/* Whether we "own" the buffer - i.e., we know we created it,
493492
* and can realloc() it to grow it, and should free() it to
@@ -564,7 +563,7 @@ static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner
564563

565564
YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner );
566565
YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner );
567-
YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, yy_size_t len , yyscan_t yyscanner );
566+
YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len , yyscan_t yyscanner );
568567

569568
void *yyalloc ( yy_size_t , yyscan_t yyscanner );
570569
void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner );
@@ -611,7 +610,7 @@ static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner );
611610
*/
612611
#define YY_DO_BEFORE_ACTION \
613612
yyg->yytext_ptr = yy_bp; \
614-
yyleng = (yy_size_t) (yy_cp - yy_bp); \
613+
yyleng = (int) (yy_cp - yy_bp); \
615614
yyg->yy_hold_char = *yy_cp; \
616615
*yy_cp = '\0'; \
617616
yyg->yy_c_buf_p = yy_cp;
@@ -1336,8 +1335,8 @@ struct yyguts_t
13361335
size_t yy_buffer_stack_max; /**< capacity of stack. */
13371336
YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
13381337
char yy_hold_char;
1339-
yy_size_t yy_n_chars;
1340-
yy_size_t yyleng_r;
1338+
int yy_n_chars;
1339+
int yyleng_r;
13411340
char *yy_c_buf_p;
13421341
int yy_init;
13431342
int yy_start;
@@ -1394,7 +1393,7 @@ FILE *yyget_out ( yyscan_t yyscanner );
13941393

13951394
void yyset_out ( FILE * _out_str , yyscan_t yyscanner );
13961395

1397-
yy_size_t yyget_leng ( yyscan_t yyscanner );
1396+
int yyget_leng ( yyscan_t yyscanner );
13981397

13991398
char *yyget_text ( yyscan_t yyscanner );
14001399

@@ -1473,7 +1472,7 @@ static int input ( yyscan_t yyscanner );
14731472
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
14741473
{ \
14751474
int c = '*'; \
1476-
yy_size_t n; \
1475+
int n; \
14771476
for ( n = 0; n < max_size && \
14781477
(c = getc( yyin )) != EOF && c != '\n'; ++n ) \
14791478
buf[n] = (char) c; \
@@ -2839,7 +2838,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
28392838

28402839
else
28412840
{
2842-
yy_size_t num_to_read =
2841+
int num_to_read =
28432842
YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
28442843

28452844
while ( num_to_read <= 0 )
@@ -2853,7 +2852,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
28532852

28542853
if ( b->yy_is_our_buffer )
28552854
{
2856-
yy_size_t new_size = b->yy_buf_size * 2;
2855+
int new_size = b->yy_buf_size * 2;
28572856

28582857
if ( new_size <= 0 )
28592858
b->yy_buf_size += b->yy_buf_size / 8;
@@ -2911,7 +2910,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
29112910

29122911
if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
29132912
/* Extend the array by 50%, plus the number we really need. */
2914-
yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
2913+
int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
29152914
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc(
29162915
(void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner );
29172916
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
@@ -3018,7 +3017,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
30183017

30193018
else
30203019
{ /* need more input */
3021-
yy_size_t offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
3020+
int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr);
30223021
++yyg->yy_c_buf_p;
30233022

30243023
switch ( yy_get_next_buffer( yyscanner ) )
@@ -3396,12 +3395,12 @@ YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner)
33963395
* @param yyscanner The scanner object.
33973396
* @return the newly allocated buffer state object.
33983397
*/
3399-
YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner)
3398+
YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len , yyscan_t yyscanner)
34003399
{
34013400
YY_BUFFER_STATE b;
34023401
char *buf;
34033402
yy_size_t n;
3404-
yy_size_t i;
3403+
int i;
34053404

34063405
/* Get memory for full buffer, including space for trailing EOB's. */
34073406
n = (yy_size_t) (_yybytes_len + 2);
@@ -3445,7 +3444,7 @@ static void yynoreturn yy_fatal_error (const char* msg , yyscan_t yyscanner)
34453444
do \
34463445
{ \
34473446
/* Undo effects of setting up yytext. */ \
3448-
yy_size_t yyless_macro_arg = (n); \
3447+
int yyless_macro_arg = (n); \
34493448
YY_LESS_LINENO(yyless_macro_arg);\
34503449
yytext[yyleng] = yyg->yy_hold_char; \
34513450
yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
@@ -3513,7 +3512,7 @@ FILE *yyget_out (yyscan_t yyscanner)
35133512
/** Get the length of the current token.
35143513
* @param yyscanner The scanner object.
35153514
*/
3516-
yy_size_t yyget_leng (yyscan_t yyscanner)
3515+
int yyget_leng (yyscan_t yyscanner)
35173516
{
35183517
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
35193518
return yyleng;
@@ -3704,13 +3703,8 @@ static int yy_init_globals (yyscan_t yyscanner)
37043703
yyg->yy_start_stack = NULL;
37053704

37063705
/* Defined in main.c */
3707-
#ifdef YY_STDINIT
3708-
yyin = stdin;
3709-
yyout = stdout;
3710-
#else
3711-
yyin = NULL;
3712-
yyout = NULL;
3713-
#endif
3706+
yyin = (FILE *) 0;
3707+
yyout = (FILE *) 0;
37143708

37153709
/* For future reference: Set errno on error, since we are called by
37163710
* yylex_init()

0 commit comments

Comments
 (0)