1313from cdd .shared .ast_utils import deduplicate
1414from cdd .shared .pure_utils import (
1515 count_iter_items ,
16+ pp ,
1617 simple_types ,
1718 sliding_window ,
1819 type_to_name ,
5657 ("List" , " " , "of" ): "List" ,
5758 ("Tuple" , " " , "of" ): "Tuple" ,
5859 ("Dictionary" , " " , "of" ): "Mapping" ,
60+ ("One" , " " , "of" ): "Union" ,
5961}
6062
6163
@@ -369,8 +371,19 @@ def _parse_adhoc_doc_for_typ_phase0(doc, words):
369371 word_chars : str = "{0}{1}`'\" /|" .format (string .digits , string .ascii_letters )
370372 sentence_ends : int = - 1
371373 break_the_union : bool = False # lincoln
372- for i , ch in enumerate (doc ):
373- if (
374+ counter = Counter (doc ) # Imperfect because won't catch escaped quote marks
375+ balanced_single : bool = counter ["'" ] > 0 and counter ["'" ] & 1 == 0
376+ balanced_double : bool = counter ['"' ] > 0 and counter ['"' ] & 1 == 0
377+
378+ i : int = 0
379+ n : int = len (doc )
380+ while i < n :
381+ ch : str = doc [i ]
382+ if (ch == "'" and balanced_single or ch == '"' and balanced_double ) and (
383+ i == 0 or doc [i - 1 ] != "\\ "
384+ ):
385+ i = eat_quoted (ch , doc , i , words , n )
386+ elif (
374387 ch in word_chars
375388 or ch == "."
376389 and len (doc ) > (i + 1 )
@@ -380,14 +393,20 @@ def _parse_adhoc_doc_for_typ_phase0(doc, words):
380393 ):
381394 words [- 1 ].append (ch )
382395 elif ch in frozenset (("." , ";" , "," )) or ch .isspace ():
383- words [- 1 ] = "" .join (words [- 1 ])
384- words .append (ch )
396+ if words [- 1 ]:
397+ words [- 1 ] = "" .join (words [- 1 ])
398+ words .append (ch )
399+ else :
400+ words [- 1 ] = ch
385401 if ch == "." and sentence_ends == - 1 :
386402 sentence_ends : int = len (words )
387403 elif ch == ";" :
388404 break_the_union = True
389405 words .append ([])
406+ i += 1
390407 words [- 1 ] = "" .join (words [- 1 ])
408+ if not words [- 1 ]:
409+ del words [- 1 ]
391410 candidate_type : Optional [str ] = next (
392411 map (
393412 adhoc_type_to_type .__getitem__ ,
@@ -414,4 +433,47 @@ def _parse_adhoc_doc_for_typ_phase0(doc, words):
414433 return candidate_type , fst_sentence , sentence
415434
416435
436+ def eat_quoted (ch , doc , chomp_start_idx , words , n ):
437+ """
438+ Chomp from quoted character `ch` to quoted character `ch`
439+
440+ :param ch: Character of `'` or `"`
441+ :type ch: ```Literal["'", '"']```
442+
443+ :param doc: Possibly ambiguous docstring for argument, that *might* hint as to the type
444+ :type doc: ```str```
445+
446+ :param chomp_start_idx: chomp_start_idx
447+ :type chomp_start_idx: ```int```
448+
449+ :param words: Words
450+ :type words: ```List[Union[List[str], str]]```
451+
452+ :param n: Length of `doc`
453+ :type n: ```int```
454+
455+ :return: chomp_end_idx
456+ :rtype: ```int```
457+ """
458+ chomp_end_idx : int = next (
459+ filter (
460+ lambda _chomp_end_idx : doc [_chomp_end_idx + 1 ] != ch
461+ or doc [_chomp_end_idx ] == "\\ " ,
462+ range (chomp_start_idx , n ),
463+ ),
464+ chomp_start_idx ,
465+ )
466+ quoted_str : str = doc [chomp_start_idx :chomp_end_idx ]
467+ from operator import iadd
468+
469+ pp ({"b4::words" : words , '"".join(words[-1])' : "" .join (words [- 1 ])})
470+ (
471+ iadd (words , (quoted_str , []))
472+ if len (words [- 1 ]) == 1 and words [- 1 ][- 1 ] == "`"
473+ else iadd (words , ("" .join (words [- 1 ]), quoted_str , []))
474+ )
475+ pp ({"words" : words })
476+ return chomp_end_idx
477+
478+
417479__all__ = ["parse_adhoc_doc_for_typ" ] # type: list[str]
0 commit comments