@@ -750,20 +750,13 @@ def chomsky_normal_form(self, new_token_padding="@$@", flexible=False):
750750 "Grammar has Empty rules. " "Cannot deal with them at the moment"
751751 )
752752
753- # check for mixed rules
754- for rule in self .productions ():
755- if rule .is_lexical () and len (rule .rhs ()) > 1 :
756- raise ValueError (
757- f"Cannot handled mixed rule { rule .lhs ()} => { rule .rhs ()} "
758- )
759-
760753 step1 = CFG .eliminate_start (self )
761754 step2 = CFG .binarize (step1 , new_token_padding )
755+ step3 = CFG .remove_mixed_rules (step2 , new_token_padding )
762756 if flexible :
763- return step2
764- step3 = CFG .remove_unitary_rules (step2 )
765- step4 = CFG (step3 .start (), list (set (step3 .productions ())))
766- return step4
757+ return step3
758+ step4 = CFG .remove_unitary_rules (step3 )
759+ return CFG (step4 .start (), list (set (step4 .productions ())))
767760
768761 @classmethod
769762 def remove_unitary_rules (cls , grammar ):
@@ -845,6 +838,48 @@ def eliminate_start(cls, grammar):
845838 return n_grammar
846839 return grammar
847840
841+ @classmethod
842+ def remove_mixed_rules (cls , grammar , padding = "@$@" ):
843+ """
844+ Convert all mixed rules containing terminals and non-terminals
845+ into dummy non-terminals.
846+ Example::
847+
848+ Original:
849+ A => term B
850+ After Conversion:
851+ A => TERM@$@TERM B
852+ TERM@$@TERM => term
853+ """
854+ result = []
855+ dummy_nonterms = {}
856+ for rule in grammar .productions ():
857+ if not rule .is_lexical () or len (rule .rhs ()) <= 1 :
858+ result .append (rule )
859+ continue
860+
861+ new_rhs = []
862+ for item in rule .rhs ():
863+ if is_nonterminal (item ):
864+ new_rhs .append (item )
865+ else :
866+ if item not in dummy_nonterms :
867+ sanitized_term = "" .join (
868+ _STANDARD_NONTERM_RE .findall (item .upper ())
869+ )
870+ dummy_nonterm_symbol = (
871+ f"{ sanitized_term } { padding } { sanitized_term } "
872+ )
873+ dummy_nonterms [item ] = Nonterminal (dummy_nonterm_symbol )
874+
875+ new_rhs .append (dummy_nonterms [item ])
876+ result .append (Production (dummy_nonterms [item ], rhs = [item ]))
877+
878+ result .append (Production (rule .lhs (), new_rhs ))
879+
880+ n_grammar = CFG (grammar .start (), result )
881+ return n_grammar
882+
848883 def __repr__ (self ):
849884 return "<Grammar with %d productions>" % len (self ._productions )
850885
0 commit comments