@@ -221,6 +221,16 @@ public class Tokenizer implements Locator, Locator2 {
221
221
222
222
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223
223
224
+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
225
+
226
+ public static final int COMMENT_LESSTHAN = 76 ;
227
+
228
+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
229
+
230
+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
231
+
232
+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
233
+
224
234
/**
225
235
* Magic value for UTF-16 operations.
226
236
*/
@@ -1029,9 +1039,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1029
1039
1030
1040
// ]NOCPP]
1031
1041
1032
- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1042
+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
1033
1043
throws SAXException {
1034
- errConsecutiveHyphens ();
1035
1044
// [NOCPP[
1036
1045
switch (commentPolicy ) {
1037
1046
case ALTER_INFOSET :
@@ -1042,7 +1051,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1042
1051
appendStrBuf ('-' );
1043
1052
// CPPONLY: MOZ_FALLTHROUGH;
1044
1053
case ALLOW :
1045
- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054
+ if (!reportedConsecutiveHyphens ) {
1055
+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1056
+ }
1046
1057
// ]NOCPP]
1047
1058
appendStrBuf (c );
1048
1059
// [NOCPP[
@@ -1464,6 +1475,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1464
1475
@ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
1465
1476
int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
1466
1477
int endPos ) throws SAXException {
1478
+ boolean reportedConsecutiveHyphens = false ;
1467
1479
/*
1468
1480
* Idioms used in this code:
1469
1481
*
@@ -2540,6 +2552,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2540
2552
}
2541
2553
// CPPONLY: MOZ_FALLTHROUGH;
2542
2554
case COMMENT_START :
2555
+ reportedConsecutiveHyphens = false ;
2543
2556
commentstartloop : for (;;) {
2544
2557
if (++pos == endPos ) {
2545
2558
break stateloop ;
@@ -2572,6 +2585,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2572
2585
*/
2573
2586
state = transition (state , Tokenizer .DATA , reconsume , pos );
2574
2587
continue stateloop ;
2588
+ case '<' :
2589
+ appendStrBuf (c );
2590
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2591
+ continue stateloop ;
2575
2592
case '\r' :
2576
2593
appendStrBufCarriageReturn ();
2577
2594
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2617,6 +2634,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2617
2634
state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2618
2635
break commentloop ;
2619
2636
// continue stateloop;
2637
+ case '<' :
2638
+ appendStrBuf (c );
2639
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2640
+ continue stateloop ;
2620
2641
case '\r' :
2621
2642
appendStrBufCarriageReturn ();
2622
2643
break stateloop ;
@@ -2659,6 +2680,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2659
2680
state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2660
2681
break commentenddashloop ;
2661
2682
// continue stateloop;
2683
+ case '<' :
2684
+ appendStrBuf (c );
2685
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2686
+ continue stateloop ;
2662
2687
case '\r' :
2663
2688
appendStrBufCarriageReturn ();
2664
2689
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2713,11 +2738,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2713
2738
* Append a U+002D HYPHEN-MINUS (-) character to
2714
2739
* the comment token's data.
2715
2740
*/
2716
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2741
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2742
+ reportedConsecutiveHyphens = true ;
2717
2743
/*
2718
2744
* Stay in the comment end state.
2719
2745
*/
2720
2746
continue ;
2747
+ case '<' :
2748
+ appendStrBuf (c );
2749
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2750
+ continue stateloop ;
2721
2751
case '\r' :
2722
2752
adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
2723
2753
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2727,7 +2757,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2727
2757
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2728
2758
continue stateloop ;
2729
2759
case '!' :
2730
- errHyphenHyphenBang ();
2731
2760
appendStrBuf (c );
2732
2761
state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2733
2762
continue stateloop ;
@@ -2740,7 +2769,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2740
2769
* and the input character to the comment
2741
2770
* token's data.
2742
2771
*/
2743
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2772
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2773
+ reportedConsecutiveHyphens = true ;
2744
2774
/*
2745
2775
* Switch to the comment state.
2746
2776
*/
@@ -2810,6 +2840,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2810
2840
continue stateloop ;
2811
2841
}
2812
2842
}
2843
+ case COMMENT_LESSTHAN :
2844
+ for (;;) {
2845
+ if (++pos == endPos ) {
2846
+ break stateloop ;
2847
+ }
2848
+ c = checkChar (buf , pos );
2849
+ switch (c ) {
2850
+ case '!' :
2851
+ appendStrBuf (c );
2852
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2853
+ continue stateloop ;
2854
+ case '<' :
2855
+ appendStrBuf (c );
2856
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2857
+ continue stateloop ;
2858
+ case '-' :
2859
+ appendStrBuf (c );
2860
+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2861
+ continue stateloop ;
2862
+ case '\r' :
2863
+ appendStrBufCarriageReturn ();
2864
+ break stateloop ;
2865
+ case '\n' :
2866
+ appendStrBufLineFeed ();
2867
+ continue ;
2868
+ case '\u0000' :
2869
+ c = '\uFFFD' ;
2870
+ // fall thru
2871
+ default :
2872
+ appendStrBuf (c );
2873
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2874
+ continue stateloop ;
2875
+ }
2876
+ }
2877
+ case COMMENT_LESSTHAN_BANG :
2878
+ for (;;) {
2879
+ if (++pos == endPos ) {
2880
+ break stateloop ;
2881
+ }
2882
+ c = checkChar (buf , pos );
2883
+ switch (c ) {
2884
+ case '-' :
2885
+ appendStrBuf (c );
2886
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2887
+ continue stateloop ;
2888
+ case '<' :
2889
+ appendStrBuf (c );
2890
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2891
+ continue stateloop ;
2892
+ case '\r' :
2893
+ appendStrBufCarriageReturn ();
2894
+ break stateloop ;
2895
+ case '\n' :
2896
+ appendStrBufLineFeed ();
2897
+ continue ;
2898
+ case '\u0000' :
2899
+ c = '\uFFFD' ;
2900
+ // fall thru
2901
+ default :
2902
+ appendStrBuf (c );
2903
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2904
+ continue stateloop ;
2905
+ }
2906
+ }
2907
+ case COMMENT_LESSTHAN_BANG_DASH :
2908
+ for (;;) {
2909
+ if (++pos == endPos ) {
2910
+ break stateloop ;
2911
+ }
2912
+ c = checkChar (buf , pos );
2913
+ switch (c ) {
2914
+ case '-' :
2915
+ appendStrBuf (c );
2916
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2917
+ continue stateloop ;
2918
+ case '<' :
2919
+ appendStrBuf (c );
2920
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2921
+ continue stateloop ;
2922
+ case '\r' :
2923
+ appendStrBufCarriageReturn ();
2924
+ break stateloop ;
2925
+ case '\n' :
2926
+ appendStrBufLineFeed ();
2927
+ continue ;
2928
+ case '\u0000' :
2929
+ c = '\uFFFD' ;
2930
+ // fall thru
2931
+ default :
2932
+ appendStrBuf (c );
2933
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2934
+ continue stateloop ;
2935
+ }
2936
+ }
2937
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2938
+ for (;;) {
2939
+ if (++pos == endPos ) {
2940
+ break stateloop ;
2941
+ }
2942
+ c = checkChar (buf , pos );
2943
+ switch (c ) {
2944
+ case '>' :
2945
+ appendStrBuf (c );
2946
+ emitComment (3 , pos );
2947
+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2948
+ continue stateloop ;
2949
+ case '-' :
2950
+ errNestedComment ();
2951
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2952
+ reportedConsecutiveHyphens = true ;
2953
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2954
+ continue stateloop ;
2955
+ case '\r' :
2956
+ errNestedComment ();
2957
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2958
+ reportedConsecutiveHyphens = true ;
2959
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2960
+ break stateloop ;
2961
+ case '\n' :
2962
+ errNestedComment ();
2963
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2964
+ reportedConsecutiveHyphens = true ;
2965
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2966
+ continue ;
2967
+ case '\u0000' :
2968
+ c = '\uFFFD' ;
2969
+ // fall thru
2970
+ case '!' :
2971
+ errNestedComment ();
2972
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2973
+ reportedConsecutiveHyphens = true ;
2974
+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2975
+ continue stateloop ;
2976
+ default :
2977
+ errNestedComment ();
2978
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2979
+ reportedConsecutiveHyphens = true ;
2980
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2981
+ continue stateloop ;
2982
+ }
2983
+ }
2984
+ // XXX reorder point
2813
2985
case COMMENT_START_DASH :
2814
2986
if (++pos == endPos ) {
2815
2987
break stateloop ;
@@ -2838,6 +3010,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2838
3010
*/
2839
3011
state = transition (state , Tokenizer .DATA , reconsume , pos );
2840
3012
continue stateloop ;
3013
+ case '<' :
3014
+ appendStrBuf (c );
3015
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3016
+ continue stateloop ;
2841
3017
case '\r' :
2842
3018
appendStrBufCarriageReturn ();
2843
3019
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5957,13 +6133,13 @@ private void initDoctypeFields() {
5957
6133
@ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
5958
6134
throws SAXException {
5959
6135
silentCarriageReturn ();
5960
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6136
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5961
6137
}
5962
6138
5963
6139
@ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
5964
6140
throws SAXException {
5965
6141
silentLineFeed ();
5966
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6142
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5967
6143
}
5968
6144
5969
6145
@ Inline private void appendStrBufLineFeed () {
@@ -6268,6 +6444,8 @@ public void eof() throws SAXException {
6268
6444
break eofloop ;
6269
6445
case COMMENT_START :
6270
6446
case COMMENT :
6447
+ case COMMENT_LESSTHAN :
6448
+ case COMMENT_LESSTHAN_BANG :
6271
6449
/*
6272
6450
* EOF Parse error.
6273
6451
*/
@@ -6279,6 +6457,7 @@ public void eof() throws SAXException {
6279
6457
*/
6280
6458
break eofloop ;
6281
6459
case COMMENT_END :
6460
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
6282
6461
errEofInComment ();
6283
6462
/* Emit the comment token. */
6284
6463
emitComment (2 , 0 );
@@ -6288,6 +6467,7 @@ public void eof() throws SAXException {
6288
6467
break eofloop ;
6289
6468
case COMMENT_END_DASH :
6290
6469
case COMMENT_START_DASH :
6470
+ case COMMENT_LESSTHAN_BANG_DASH :
6291
6471
errEofInComment ();
6292
6472
/* Emit the comment token. */
6293
6473
emitComment (1 , 0 );
@@ -6917,7 +7097,7 @@ protected void errGtInPublicId() throws SAXException {
6917
7097
protected void errNamelessDoctype () throws SAXException {
6918
7098
}
6919
7099
6920
- protected void errConsecutiveHyphens () throws SAXException {
7100
+ protected void errNestedComment () throws SAXException {
6921
7101
}
6922
7102
6923
7103
protected void errPrematureEndOfComment () throws SAXException {
@@ -7060,9 +7240,6 @@ protected void errExpectedSystemId() throws SAXException {
7060
7240
protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
7061
7241
}
7062
7242
7063
- protected void errHyphenHyphenBang () throws SAXException {
7064
- }
7065
-
7066
7243
protected void errNcrControlChar () throws SAXException {
7067
7244
}
7068
7245
0 commit comments