88static int
99init_normalization (Parser * p )
1010{
11+ if (p -> normalize ) {
12+ return 1 ;
13+ }
1114 PyObject * m = PyImport_ImportModuleNoBlock ("unicodedata" );
1215 if (!m )
1316 {
@@ -36,7 +39,7 @@ _PyPegen_new_identifier(Parser *p, char *n)
3639 if (!PyUnicode_IS_ASCII (id ))
3740 {
3841 PyObject * id2 ;
39- if (!p -> normalize && ! init_normalization (p ))
42+ if (!init_normalization (p ))
4043 {
4144 Py_DECREF (id );
4245 goto error ;
@@ -88,6 +91,9 @@ static inline Py_ssize_t
8891byte_offset_to_character_offset (PyObject * line , int col_offset )
8992{
9093 const char * str = PyUnicode_AsUTF8 (line );
94+ if (!str ) {
95+ return 0 ;
96+ }
9197 PyObject * text = PyUnicode_DecodeUTF8 (str , col_offset , NULL );
9298 if (!text ) {
9399 return 0 ;
@@ -171,9 +177,10 @@ _PyPegen_get_expr_name(expr_ty e)
171177 }
172178}
173179
174- static void
180+ static int
175181raise_decode_error (Parser * p )
176182{
183+ assert (PyErr_Occurred ());
177184 const char * errtype = NULL ;
178185 if (PyErr_ExceptionMatches (PyExc_UnicodeError )) {
179186 errtype = "unicode error" ;
@@ -197,6 +204,8 @@ raise_decode_error(Parser *p)
197204 Py_XDECREF (value );
198205 Py_XDECREF (tback );
199206 }
207+
208+ return -1 ;
200209}
201210
202211static void
@@ -207,27 +216,33 @@ raise_tokenizer_init_error(PyObject *filename)
207216 || PyErr_ExceptionMatches (PyExc_UnicodeDecodeError ))) {
208217 return ;
209218 }
210- PyObject * type , * value , * tback , * errstr ;
219+ PyObject * errstr = NULL ;
220+ PyObject * tuple = NULL ;
221+ PyObject * type , * value , * tback ;
211222 PyErr_Fetch (& type , & value , & tback );
212223 errstr = PyObject_Str (value );
224+ if (!errstr ) {
225+ goto error ;
226+ }
213227
214- Py_INCREF (Py_None );
215- PyObject * tmp = Py_BuildValue ("(OiiN)" , filename , 0 , -1 , Py_None );
228+ PyObject * tmp = Py_BuildValue ("(OiiO)" , filename , 0 , -1 , Py_None );
216229 if (!tmp ) {
217230 goto error ;
218231 }
219232
220- value = PyTuple_Pack (2 , errstr , tmp );
233+ tuple = PyTuple_Pack (2 , errstr , tmp );
221234 Py_DECREF (tmp );
222235 if (!value ) {
223236 goto error ;
224237 }
225- PyErr_SetObject (PyExc_SyntaxError , value );
238+ PyErr_SetObject (PyExc_SyntaxError , tuple );
226239
227240error :
228241 Py_XDECREF (type );
229242 Py_XDECREF (value );
230243 Py_XDECREF (tback );
244+ Py_XDECREF (errstr );
245+ Py_XDECREF (tuple );
231246}
232247
233248static inline PyObject *
@@ -337,9 +352,6 @@ tokenizer_error(Parser *p)
337352 errtype = PyExc_IndentationError ;
338353 msg = "too many levels of indentation" ;
339354 break ;
340- case E_DECODE :
341- raise_decode_error (p );
342- return -1 ;
343355 case E_LINECONT :
344356 msg = "unexpected character after line continuation character" ;
345357 break ;
@@ -513,7 +525,12 @@ _PyPegen_fill_token(Parser *p)
513525 const char * start , * end ;
514526 int type = PyTokenizer_Get (p -> tok , & start , & end );
515527 if (type == ERRORTOKEN ) {
516- return tokenizer_error (p );
528+ if (p -> tok -> done == E_DECODE ) {
529+ return raise_decode_error (p );
530+ }
531+ else {
532+ return tokenizer_error (p );
533+ }
517534 }
518535 if (type == ENDMARKER && p -> start_rule == Py_single_input && p -> parsing_started ) {
519536 type = NEWLINE ; /* Add an extra newline */
@@ -530,13 +547,21 @@ _PyPegen_fill_token(Parser *p)
530547
531548 if (p -> fill == p -> size ) {
532549 int newsize = p -> size * 2 ;
533- p -> tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
534- if (p -> tokens == NULL ) {
535- PyErr_Format ( PyExc_MemoryError , "Realloc tokens failed" );
550+ Token * * new_tokens = PyMem_Realloc (p -> tokens , newsize * sizeof (Token * ));
551+ if (new_tokens == NULL ) {
552+ PyErr_NoMemory ( );
536553 return -1 ;
537554 }
555+ else {
556+ p -> tokens = new_tokens ;
557+ }
538558 for (int i = p -> size ; i < newsize ; i ++ ) {
539559 p -> tokens [i ] = PyMem_Malloc (sizeof (Token ));
560+ if (p -> tokens [i ] == NULL ) {
561+ p -> size = i ; // Needed, in order to cleanup correctly after parser fails
562+ PyErr_NoMemory ();
563+ return -1 ;
564+ }
540565 memset (p -> tokens [i ], '\0' , sizeof (Token ));
541566 }
542567 p -> size = newsize ;
@@ -566,8 +591,6 @@ _PyPegen_fill_token(Parser *p)
566591 t -> end_lineno = p -> starting_lineno + end_lineno ;
567592 t -> end_col_offset = p -> tok -> lineno == 1 ? p -> starting_col_offset + end_col_offset : end_col_offset ;
568593
569- // if (p->fill % 100 == 0) fprintf(stderr, "Filled at %d: %s \"%s\"\n", p->fill,
570- // token_name(type), PyBytes_AsString(t->bytes));
571594 p -> fill += 1 ;
572595 return 0 ;
573596}
@@ -614,6 +637,7 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
614637{
615638 if (p -> mark == p -> fill ) {
616639 if (_PyPegen_fill_token (p ) < 0 ) {
640+ p -> error_indicator = 1 ;
617641 return -1 ;
618642 }
619643 }
@@ -632,11 +656,9 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
632656 }
633657 p -> mark = m -> mark ;
634658 * (void * * )(pres ) = m -> node ;
635- // fprintf(stderr, "%d < %d: memoized!\n", p->mark, p->fill);
636659 return 1 ;
637660 }
638661 }
639- // fprintf(stderr, "%d < %d: not memoized\n", p->mark, p->fill);
640662 return 0 ;
641663}
642664
@@ -683,18 +705,15 @@ _PyPegen_expect_token(Parser *p, int type)
683705{
684706 if (p -> mark == p -> fill ) {
685707 if (_PyPegen_fill_token (p ) < 0 ) {
708+ p -> error_indicator = 1 ;
686709 return NULL ;
687710 }
688711 }
689712 Token * t = p -> tokens [p -> mark ];
690713 if (t -> type != type ) {
691- // fprintf(stderr, "No %s at %d\n", token_name(type), p->mark);
692714 return NULL ;
693715 }
694716 p -> mark += 1 ;
695- // fprintf(stderr, "Got %s at %d: %s\n", token_name(type), p->mark,
696- // PyBytes_AsString(t->bytes));
697-
698717 return t ;
699718}
700719
@@ -888,8 +907,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
888907{
889908 Parser * p = PyMem_Malloc (sizeof (Parser ));
890909 if (p == NULL ) {
891- PyErr_Format (PyExc_MemoryError , "Out of memory for Parser" );
892- return NULL ;
910+ return (Parser * ) PyErr_NoMemory ();
893911 }
894912 assert (tok != NULL );
895913 p -> tok = tok ;
@@ -898,10 +916,14 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena
898916 p -> tokens = PyMem_Malloc (sizeof (Token * ));
899917 if (!p -> tokens ) {
900918 PyMem_Free (p );
901- PyErr_Format (PyExc_MemoryError , "Out of memory for tokens" );
902- return NULL ;
919+ return (Parser * ) PyErr_NoMemory ();
903920 }
904921 p -> tokens [0 ] = PyMem_Malloc (sizeof (Token ));
922+ if (!p -> tokens ) {
923+ PyMem_Free (p -> tokens );
924+ PyMem_Free (p );
925+ return (Parser * ) PyErr_NoMemory ();
926+ }
905927 memset (p -> tokens [0 ], '\0' , sizeof (Token ));
906928 p -> mark = 0 ;
907929 p -> fill = 0 ;
@@ -1187,7 +1209,7 @@ _PyPegen_seq_count_dots(asdl_seq *seq)
11871209 number_of_dots += 1 ;
11881210 break ;
11891211 default :
1190- assert ( current_expr -> type == ELLIPSIS || current_expr -> type == DOT );
1212+ Py_UNREACHABLE ( );
11911213 }
11921214 }
11931215
0 commit comments