diff --git a/HtmlParser-compile b/HtmlParser-compile deleted file mode 100755 index 3e867827..00000000 --- a/HtmlParser-compile +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/HtmlParser-compile-detailed b/HtmlParser-compile-detailed deleted file mode 100755 index a4102d64..00000000 --- a/HtmlParser-compile-detailed +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTCompiler -style DETAILED -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser; diff --git a/HtmlParser-compile-detailed.launch b/HtmlParser-compile-detailed.launch deleted file mode 100644 index 0347fd6c..00000000 --- a/HtmlParser-compile-detailed.launch +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/HtmlParser-compile.launch b/HtmlParser-compile.launch deleted file mode 100644 index 54e7bc33..00000000 --- a/HtmlParser-compile.launch +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/HtmlParser-linux b/HtmlParser-linux deleted file mode 100755 index 0a9e9def..00000000 --- a/HtmlParser-linux +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/home/hsivonen/gwt-linux-1.5.1/gwt-user.jar:/home/hsivonen/gwt-linux-1.5.1/gwt-dev-linux.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/HtmlParser-shell b/HtmlParser-shell deleted file mode 100755 index ffcf2e29..00000000 --- a/HtmlParser-shell +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -APPDIR=`dirname $0`; -java -XstartOnFirstThread -Xmx256M -cp "$APPDIR/src:$APPDIR/gwt-src:$APPDIR/super:$APPDIR/bin:/Developer/gwt-mac-1.5.1/gwt-user.jar:/Developer/gwt-mac-1.5.1/gwt-dev-mac.jar" com.google.gwt.dev.GWTShell -out "$APPDIR/www" "$@" nu.validator.htmlparser.HtmlParser/HtmlParser.html; diff --git a/HtmlParser.launch b/HtmlParser.launch deleted file mode 100644 index 9335abf6..00000000 --- a/HtmlParser.launch +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/README b/doc/README deleted file mode 100644 index e0132a41..00000000 --- a/doc/README +++ /dev/null @@ -1,15 +0,0 @@ -tokenization.txt represents the state of the spec implemented in Tokenizer.java. - -To get a diffable version corresponding to the current spec: -lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html > current.txt - -tree-construction.txt represents the state of the spec implemented in TreeBuilder.java. - -To get a diffable version corresponding to the current spec: -lynx -display_charset=utf-8 -dump -nolist http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html > current.txt - - -The text of the files in this directory comes from the WHATWG HTML 5 spec -which carries the following notice: -© Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera Software ASA. -You are granted a license to use, reproduce and create derivative works of this document. diff --git a/doc/named-character-references.html b/doc/named-character-references.html deleted file mode 100644 index 5f05a991..00000000 --- a/doc/named-character-references.html +++ /dev/null @@ -1,4 +0,0 @@ - - -
Name Character(s) Glyph
AElig; U+000C6 Æ
AMP; U+00026 &
Aacute; U+000C1 Á
Abreve; U+00102 Ă
Acirc; U+000C2 Â
Acy; U+00410 А
Afr; U+1D504 𝔄
Agrave; U+000C0 À
Alpha; U+00391 Α
Amacr; U+00100 Ā
And; U+02A53
Aogon; U+00104 Ą
Aopf; U+1D538 𝔸
ApplyFunction; U+02061
Aring; U+000C5 Å
Ascr; U+1D49C 𝒜
Assign; U+02254
Atilde; U+000C3 Ã
Auml; U+000C4 Ä
Backslash; U+02216
Barv; U+02AE7
Barwed; U+02306
Bcy; U+00411 Б
Because; U+02235
Bernoullis; U+0212C
Beta; U+00392 Β
Bfr; U+1D505 𝔅
Bopf; U+1D539 𝔹
Breve; U+002D8 ˘
Bscr; U+0212C
Bumpeq; U+0224E
CHcy; U+00427 Ч
COPY; U+000A9 ©
Cacute; U+00106 Ć
Cap; U+022D2
CapitalDifferentialD; U+02145
Cayleys; U+0212D
Ccaron; U+0010C Č
Ccedil; U+000C7 Ç
Ccirc; U+00108 Ĉ
Cconint; U+02230
Cdot; U+0010A Ċ
Cedilla; U+000B8 ¸
CenterDot; U+000B7 ·
Cfr; U+0212D
Chi; U+003A7 Χ
CircleDot; U+02299
CircleMinus; U+02296
CirclePlus; U+02295
CircleTimes; U+02297
ClockwiseContourIntegral; U+02232
CloseCurlyDoubleQuote; U+0201D
CloseCurlyQuote; U+02019
Colon; U+02237
Colone; U+02A74
Congruent; U+02261
Conint; U+0222F
ContourIntegral; U+0222E
Copf; U+02102
Coproduct; U+02210
CounterClockwiseContourIntegral; U+02233
Cross; U+02A2F
Cscr; U+1D49E 𝒞
Cup; U+022D3
CupCap; U+0224D
DD; U+02145
DDotrahd; U+02911
DJcy; U+00402 Ђ
DScy; U+00405 Ѕ
DZcy; U+0040F Џ
Dagger; U+02021
Darr; U+021A1
Dashv; U+02AE4
Dcaron; U+0010E Ď
Dcy; U+00414 Д
Del; U+02207
Delta; U+00394 Δ
Dfr; U+1D507 𝔇
DiacriticalAcute; U+000B4 ´
DiacriticalDot; U+002D9 ˙
DiacriticalDoubleAcute; U+002DD ˝
DiacriticalGrave; U+00060 `
DiacriticalTilde; U+002DC ˜
Diamond; U+022C4
DifferentialD; U+02146
Dopf; U+1D53B 𝔻
Dot; U+000A8 ¨
DotDot; U+020DC ◌⃜
DotEqual; U+02250
DoubleContourIntegral; U+0222F
DoubleDot; U+000A8 ¨
DoubleDownArrow; U+021D3
DoubleLeftArrow; U+021D0
DoubleLeftRightArrow; U+021D4
DoubleLeftTee; U+02AE4
DoubleLongLeftArrow; U+027F8
DoubleLongLeftRightArrow; U+027FA
DoubleLongRightArrow; U+027F9
DoubleRightArrow; U+021D2
DoubleRightTee; U+022A8
DoubleUpArrow; U+021D1
DoubleUpDownArrow; U+021D5
DoubleVerticalBar; U+02225
DownArrow; U+02193
DownArrowBar; U+02913
DownArrowUpArrow; U+021F5
DownBreve; U+00311 ◌̑
DownLeftRightVector; U+02950
DownLeftTeeVector; U+0295E
DownLeftVector; U+021BD
DownLeftVectorBar; U+02956
DownRightTeeVector; U+0295F
DownRightVector; U+021C1
DownRightVectorBar; U+02957
DownTee; U+022A4
DownTeeArrow; U+021A7
Downarrow; U+021D3
Dscr; U+1D49F 𝒟
Dstrok; U+00110 Đ
ENG; U+0014A Ŋ
ETH; U+000D0 Ð
Eacute; U+000C9 É
Ecaron; U+0011A Ě
Ecirc; U+000CA Ê
Ecy; U+0042D Э
Edot; U+00116 Ė
Efr; U+1D508 𝔈
Egrave; U+000C8 È
Element; U+02208
Emacr; U+00112 Ē
EmptySmallSquare; U+025FB
EmptyVerySmallSquare; U+025AB
Eogon; U+00118 Ę
Eopf; U+1D53C 𝔼
Epsilon; U+00395 Ε
Equal; U+02A75
EqualTilde; U+02242
Equilibrium; U+021CC
Escr; U+02130
Esim; U+02A73
Eta; U+00397 Η
Euml; U+000CB Ë
Exists; U+02203
ExponentialE; U+02147
Fcy; U+00424 Ф
Ffr; U+1D509 𝔉
FilledSmallSquare; U+025FC
FilledVerySmallSquare; U+025AA
Fopf; U+1D53D 𝔽
ForAll; U+02200
Fouriertrf; U+02131
Fscr; U+02131
GJcy; U+00403 Ѓ
GT; U+0003E >
Gamma; U+00393 Γ
Gammad; U+003DC Ϝ
Gbreve; U+0011E Ğ
Gcedil; U+00122 Ģ
Gcirc; U+0011C Ĝ
Gcy; U+00413 Г
Gdot; U+00120 Ġ
Gfr; U+1D50A 𝔊
Gg; U+022D9
Gopf; U+1D53E 𝔾
GreaterEqual; U+02265
GreaterEqualLess; U+022DB
GreaterFullEqual; U+02267
GreaterGreater; U+02AA2
GreaterLess; U+02277
GreaterSlantEqual; U+02A7E
GreaterTilde; U+02273
Gscr; U+1D4A2 𝒢
Gt; U+0226B
HARDcy; U+0042A Ъ
Hacek; U+002C7 ˇ
Hat; U+0005E ^
Hcirc; U+00124 Ĥ
Hfr; U+0210C
HilbertSpace; U+0210B
Hopf; U+0210D
HorizontalLine; U+02500
Hscr; U+0210B
Hstrok; U+00126 Ħ
HumpDownHump; U+0224E
HumpEqual; U+0224F
IEcy; U+00415 Е
IJlig; U+00132 IJ
IOcy; U+00401 Ё
Iacute; U+000CD Í
Icirc; U+000CE Î
Icy; U+00418 И
Idot; U+00130 İ
Ifr; U+02111
Igrave; U+000CC Ì
Im; U+02111
Imacr; U+0012A Ī
ImaginaryI; U+02148
Implies; U+021D2
Int; U+0222C
Integral; U+0222B
Intersection; U+022C2
InvisibleComma; U+02063
InvisibleTimes; U+02062
Iogon; U+0012E Į
Iopf; U+1D540 𝕀
Iota; U+00399 Ι
Iscr; U+02110
Itilde; U+00128 Ĩ
Iukcy; U+00406 І
Iuml; U+000CF Ï
Jcirc; U+00134 Ĵ
Jcy; U+00419 Й
Jfr; U+1D50D 𝔍
Jopf; U+1D541 𝕁
Jscr; U+1D4A5 𝒥
Jsercy; U+00408 Ј
Jukcy; U+00404 Є
KHcy; U+00425 Х
KJcy; U+0040C Ќ
Kappa; U+0039A Κ
Kcedil; U+00136 Ķ
Kcy; U+0041A К
Kfr; U+1D50E 𝔎
Kopf; U+1D542 𝕂
Kscr; U+1D4A6 𝒦
LJcy; U+00409 Љ
LT; U+0003C <
Lacute; U+00139 Ĺ
Lambda; U+0039B Λ
Lang; U+027EA
Laplacetrf; U+02112
Larr; U+0219E
Lcaron; U+0013D Ľ
Lcedil; U+0013B Ļ
Lcy; U+0041B Л
LeftAngleBracket; U+027E8
LeftArrow; U+02190
LeftArrowBar; U+021E4
LeftArrowRightArrow; U+021C6
LeftCeiling; U+02308
LeftDoubleBracket; U+027E6
LeftDownTeeVector; U+02961
LeftDownVector; U+021C3
LeftDownVectorBar; U+02959
LeftFloor; U+0230A
LeftRightArrow; U+02194
LeftRightVector; U+0294E
LeftTee; U+022A3
LeftTeeArrow; U+021A4
LeftTeeVector; U+0295A
LeftTriangle; U+022B2
LeftTriangleBar; U+029CF
LeftTriangleEqual; U+022B4
LeftUpDownVector; U+02951
LeftUpTeeVector; U+02960
LeftUpVector; U+021BF
LeftUpVectorBar; U+02958
LeftVector; U+021BC
LeftVectorBar; U+02952
Leftarrow; U+021D0
Leftrightarrow; U+021D4
LessEqualGreater; U+022DA
LessFullEqual; U+02266
LessGreater; U+02276
LessLess; U+02AA1
LessSlantEqual; U+02A7D
LessTilde; U+02272
Lfr; U+1D50F 𝔏
Ll; U+022D8
Lleftarrow; U+021DA
Lmidot; U+0013F Ŀ
LongLeftArrow; U+027F5
LongLeftRightArrow; U+027F7
LongRightArrow; U+027F6
Longleftarrow; U+027F8
Longleftrightarrow; U+027FA
Longrightarrow; U+027F9
Lopf; U+1D543 𝕃
LowerLeftArrow; U+02199
LowerRightArrow; U+02198
Lscr; U+02112
Lsh; U+021B0
Lstrok; U+00141 Ł
Lt; U+0226A
Map; U+02905
Mcy; U+0041C М
MediumSpace; U+0205F
Mellintrf; U+02133
Mfr; U+1D510 𝔐
MinusPlus; U+02213
Mopf; U+1D544 𝕄
Mscr; U+02133
Mu; U+0039C Μ
NJcy; U+0040A Њ
Nacute; U+00143 Ń
Ncaron; U+00147 Ň
Ncedil; U+00145 Ņ
Ncy; U+0041D Н
NegativeMediumSpace; U+0200B
NegativeThickSpace; U+0200B
NegativeThinSpace; U+0200B
NegativeVeryThinSpace; U+0200B
NestedGreaterGreater; U+0226B
NestedLessLess; U+0226A
NewLine; U+0000A
Nfr; U+1D511 𝔑
NoBreak; U+02060
NonBreakingSpace; U+000A0  
Nopf; U+02115
Not; U+02AEC
NotCongruent; U+02262
NotCupCap; U+0226D
NotDoubleVerticalBar; U+02226
NotElement; U+02209
NotEqual; U+02260
NotEqualTilde; U+02242 U+00338 ≂̸
NotExists; U+02204
NotGreater; U+0226F
NotGreaterEqual; U+02271
NotGreaterFullEqual; U+02267 U+00338 ≧̸
NotGreaterGreater; U+0226B U+00338 ≫̸
NotGreaterLess; U+02279
NotGreaterSlantEqual; U+02A7E U+00338 ⩾̸
NotGreaterTilde; U+02275
NotHumpDownHump; U+0224E U+00338 ≎̸
NotHumpEqual; U+0224F U+00338 ≏̸
NotLeftTriangle; U+022EA
NotLeftTriangleBar; U+029CF U+00338 ⧏̸
NotLeftTriangleEqual; U+022EC
NotLess; U+0226E
NotLessEqual; U+02270
NotLessGreater; U+02278
NotLessLess; U+0226A U+00338 ≪̸
NotLessSlantEqual; U+02A7D U+00338 ⩽̸
NotLessTilde; U+02274
NotNestedGreaterGreater; U+02AA2 U+00338 ⪢̸
NotNestedLessLess; U+02AA1 U+00338 ⪡̸
NotPrecedes; U+02280
NotPrecedesEqual; U+02AAF U+00338 ⪯̸
NotPrecedesSlantEqual; U+022E0
NotReverseElement; U+0220C
NotRightTriangle; U+022EB
NotRightTriangleBar; U+029D0 U+00338 ⧐̸
NotRightTriangleEqual; U+022ED
NotSquareSubset; U+0228F U+00338 ⊏̸
NotSquareSubsetEqual; U+022E2
NotSquareSuperset; U+02290 U+00338 ⊐̸
NotSquareSupersetEqual; U+022E3
NotSubset; U+02282 U+020D2 ⊂⃒
NotSubsetEqual; U+02288
NotSucceeds; U+02281
NotSucceedsEqual; U+02AB0 U+00338 ⪰̸
NotSucceedsSlantEqual; U+022E1
NotSucceedsTilde; U+0227F U+00338 ≿̸
NotSuperset; U+02283 U+020D2 ⊃⃒
NotSupersetEqual; U+02289
NotTilde; U+02241
NotTildeEqual; U+02244
NotTildeFullEqual; U+02247
NotTildeTilde; U+02249
NotVerticalBar; U+02224
Nscr; U+1D4A9 𝒩
Ntilde; U+000D1 Ñ
Nu; U+0039D Ν
OElig; U+00152 Œ
Oacute; U+000D3 Ó
Ocirc; U+000D4 Ô
Ocy; U+0041E О
Odblac; U+00150 Ő
Ofr; U+1D512 𝔒
Ograve; U+000D2 Ò
Omacr; U+0014C Ō
Omega; U+003A9 Ω
Omicron; U+0039F Ο
Oopf; U+1D546 𝕆
OpenCurlyDoubleQuote; U+0201C
OpenCurlyQuote; U+02018
Or; U+02A54
Oscr; U+1D4AA 𝒪
Oslash; U+000D8 Ø
Otilde; U+000D5 Õ
Otimes; U+02A37
Ouml; U+000D6 Ö
OverBar; U+0203E
OverBrace; U+023DE
OverBracket; U+023B4
OverParenthesis; U+023DC
PartialD; U+02202
Pcy; U+0041F П
Pfr; U+1D513 𝔓
Phi; U+003A6 Φ
Pi; U+003A0 Π
PlusMinus; U+000B1 ±
Poincareplane; U+0210C
Popf; U+02119
Pr; U+02ABB
Precedes; U+0227A
PrecedesEqual; U+02AAF
PrecedesSlantEqual; U+0227C
PrecedesTilde; U+0227E
Prime; U+02033
Product; U+0220F
Proportion; U+02237
Proportional; U+0221D
Pscr; U+1D4AB 𝒫
Psi; U+003A8 Ψ
QUOT; U+00022 "
Qfr; U+1D514 𝔔
Qopf; U+0211A
Qscr; U+1D4AC 𝒬
RBarr; U+02910
REG; U+000AE ®
Racute; U+00154 Ŕ
Rang; U+027EB
Rarr; U+021A0
Rarrtl; U+02916
Rcaron; U+00158 Ř
Rcedil; U+00156 Ŗ
Rcy; U+00420 Р
Re; U+0211C
ReverseElement; U+0220B
ReverseEquilibrium; U+021CB
ReverseUpEquilibrium; U+0296F
Rfr; U+0211C
Rho; U+003A1 Ρ
RightAngleBracket; U+027E9
RightArrow; U+02192
RightArrowBar; U+021E5
RightArrowLeftArrow; U+021C4
RightCeiling; U+02309
RightDoubleBracket; U+027E7
RightDownTeeVector; U+0295D
RightDownVector; U+021C2
RightDownVectorBar; U+02955
RightFloor; U+0230B
RightTee; U+022A2
RightTeeArrow; U+021A6
RightTeeVector; U+0295B
RightTriangle; U+022B3
RightTriangleBar; U+029D0
RightTriangleEqual; U+022B5
RightUpDownVector; U+0294F
RightUpTeeVector; U+0295C
RightUpVector; U+021BE
RightUpVectorBar; U+02954
RightVector; U+021C0
RightVectorBar; U+02953
Rightarrow; U+021D2
Ropf; U+0211D
RoundImplies; U+02970
Rrightarrow; U+021DB
Rscr; U+0211B
Rsh; U+021B1
RuleDelayed; U+029F4
SHCHcy; U+00429 Щ
SHcy; U+00428 Ш
SOFTcy; U+0042C Ь
Sacute; U+0015A Ś
Sc; U+02ABC
Scaron; U+00160 Š
Scedil; U+0015E Ş
Scirc; U+0015C Ŝ
Scy; U+00421 С
Sfr; U+1D516 𝔖
ShortDownArrow; U+02193
ShortLeftArrow; U+02190
ShortRightArrow; U+02192
ShortUpArrow; U+02191
Sigma; U+003A3 Σ
SmallCircle; U+02218
Sopf; U+1D54A 𝕊
Sqrt; U+0221A
Square; U+025A1
SquareIntersection; U+02293
SquareSubset; U+0228F
SquareSubsetEqual; U+02291
SquareSuperset; U+02290
SquareSupersetEqual; U+02292
SquareUnion; U+02294
Sscr; U+1D4AE 𝒮
Star; U+022C6
Sub; U+022D0
Subset; U+022D0
SubsetEqual; U+02286
Succeeds; U+0227B
SucceedsEqual; U+02AB0
SucceedsSlantEqual; U+0227D
SucceedsTilde; U+0227F
SuchThat; U+0220B
Sum; U+02211
Sup; U+022D1
Superset; U+02283
SupersetEqual; U+02287
Supset; U+022D1
THORN; U+000DE Þ
TRADE; U+02122
TSHcy; U+0040B Ћ
TScy; U+00426 Ц
Tab; U+00009
Tau; U+003A4 Τ
Tcaron; U+00164 Ť
Tcedil; U+00162 Ţ
Tcy; U+00422 Т
Tfr; U+1D517 𝔗
Therefore; U+02234
Theta; U+00398 Θ
ThickSpace; U+0205F U+0200A   
ThinSpace; U+02009
Tilde; U+0223C
TildeEqual; U+02243
TildeFullEqual; U+02245
TildeTilde; U+02248
Topf; U+1D54B 𝕋
TripleDot; U+020DB ◌⃛
Tscr; U+1D4AF 𝒯
Tstrok; U+00166 Ŧ
Uacute; U+000DA Ú
Uarr; U+0219F
Uarrocir; U+02949
Ubrcy; U+0040E Ў
Ubreve; U+0016C Ŭ
Ucirc; U+000DB Û
Ucy; U+00423 У
Udblac; U+00170 Ű
Ufr; U+1D518 𝔘
Ugrave; U+000D9 Ù
Umacr; U+0016A Ū
UnderBar; U+0005F _
UnderBrace; U+023DF
UnderBracket; U+023B5
UnderParenthesis; U+023DD
Union; U+022C3
UnionPlus; U+0228E
Uogon; U+00172 Ų
Uopf; U+1D54C 𝕌
UpArrow; U+02191
UpArrowBar; U+02912
UpArrowDownArrow; U+021C5
UpDownArrow; U+02195
UpEquilibrium; U+0296E
UpTee; U+022A5
UpTeeArrow; U+021A5
Uparrow; U+021D1
Updownarrow; U+021D5
UpperLeftArrow; U+02196
UpperRightArrow; U+02197
Upsi; U+003D2 ϒ
Upsilon; U+003A5 Υ
Uring; U+0016E Ů
Uscr; U+1D4B0 𝒰
Utilde; U+00168 Ũ
Uuml; U+000DC Ü
VDash; U+022AB
Vbar; U+02AEB
Vcy; U+00412 В
Vdash; U+022A9
Vdashl; U+02AE6
Vee; U+022C1
Verbar; U+02016
Vert; U+02016
VerticalBar; U+02223
VerticalLine; U+0007C |
VerticalSeparator; U+02758
VerticalTilde; U+02240
VeryThinSpace; U+0200A
Vfr; U+1D519 𝔙
Vopf; U+1D54D 𝕍
Vscr; U+1D4B1 𝒱
Vvdash; U+022AA
Wcirc; U+00174 Ŵ
Wedge; U+022C0
Wfr; U+1D51A 𝔚
Wopf; U+1D54E 𝕎
Wscr; U+1D4B2 𝒲
Xfr; U+1D51B 𝔛
Xi; U+0039E Ξ
Xopf; U+1D54F 𝕏
Xscr; U+1D4B3 𝒳
YAcy; U+0042F Я
YIcy; U+00407 Ї
YUcy; U+0042E Ю
Yacute; U+000DD Ý
Ycirc; U+00176 Ŷ
Ycy; U+0042B Ы
Yfr; U+1D51C 𝔜
Yopf; U+1D550 𝕐
Yscr; U+1D4B4 𝒴
Yuml; U+00178 Ÿ
ZHcy; U+00416 Ж
Zacute; U+00179 Ź
Zcaron; U+0017D Ž
Zcy; U+00417 З
Zdot; U+0017B Ż
ZeroWidthSpace; U+0200B
Zeta; U+00396 Ζ
Zfr; U+02128
Zopf; U+02124
Zscr; U+1D4B5 𝒵
aacute; U+000E1 á
abreve; U+00103 ă
ac; U+0223E
acE; U+0223E U+00333 ∾̳
acd; U+0223F
acirc; U+000E2 â
acute; U+000B4 ´
acy; U+00430 а
aelig; U+000E6 æ
af; U+02061
afr; U+1D51E 𝔞
agrave; U+000E0 à
alefsym; U+02135
aleph; U+02135
alpha; U+003B1 α
amacr; U+00101 ā
amalg; U+02A3F ⨿
amp; U+00026 &
and; U+02227
andand; U+02A55
andd; U+02A5C
andslope; U+02A58
andv; U+02A5A
ang; U+02220
ange; U+029A4
angle; U+02220
angmsd; U+02221
angmsdaa; U+029A8
angmsdab; U+029A9
angmsdac; U+029AA
angmsdad; U+029AB
angmsdae; U+029AC
angmsdaf; U+029AD
angmsdag; U+029AE
angmsdah; U+029AF
angrt; U+0221F
angrtvb; U+022BE
angrtvbd; U+0299D
angsph; U+02222
angst; U+000C5 Å
angzarr; U+0237C
aogon; U+00105 ą
aopf; U+1D552 𝕒
ap; U+02248
apE; U+02A70
apacir; U+02A6F
ape; U+0224A
apid; U+0224B
apos; U+00027 '
approx; U+02248
approxeq; U+0224A
aring; U+000E5 å
ascr; U+1D4B6 𝒶
ast; U+0002A *
asymp; U+02248
asympeq; U+0224D
atilde; U+000E3 ã
auml; U+000E4 ä
awconint; U+02233
awint; U+02A11
bNot; U+02AED
backcong; U+0224C
backepsilon; U+003F6 ϶
backprime; U+02035
backsim; U+0223D
backsimeq; U+022CD
barvee; U+022BD
barwed; U+02305
barwedge; U+02305
bbrk; U+023B5
bbrktbrk; U+023B6
bcong; U+0224C
bcy; U+00431 б
bdquo; U+0201E
becaus; U+02235
because; U+02235
bemptyv; U+029B0
bepsi; U+003F6 ϶
bernou; U+0212C
beta; U+003B2 β
beth; U+02136
between; U+0226C
bfr; U+1D51F 𝔟
bigcap; U+022C2
bigcirc; U+025EF
bigcup; U+022C3
bigodot; U+02A00
bigoplus; U+02A01
bigotimes; U+02A02
bigsqcup; U+02A06
bigstar; U+02605
bigtriangledown; U+025BD
bigtriangleup; U+025B3
biguplus; U+02A04
bigvee; U+022C1
bigwedge; U+022C0
bkarow; U+0290D
blacklozenge; U+029EB
blacksquare; U+025AA
blacktriangle; U+025B4
blacktriangledown; U+025BE
blacktriangleleft; U+025C2
blacktriangleright; U+025B8
blank; U+02423
blk12; U+02592
blk14; U+02591
blk34; U+02593
block; U+02588
bne; U+0003D U+020E5 =⃥
bnequiv; U+02261 U+020E5 ≡⃥
bnot; U+02310
bopf; U+1D553 𝕓
bot; U+022A5
bottom; U+022A5
bowtie; U+022C8
boxDL; U+02557
boxDR; U+02554
boxDl; U+02556
boxDr; U+02553
boxH; U+02550
boxHD; U+02566
boxHU; U+02569
boxHd; U+02564
boxHu; U+02567
boxUL; U+0255D
boxUR; U+0255A
boxUl; U+0255C
boxUr; U+02559
boxV; U+02551
boxVH; U+0256C
boxVL; U+02563
boxVR; U+02560
boxVh; U+0256B
boxVl; U+02562
boxVr; U+0255F
boxbox; U+029C9
boxdL; U+02555
boxdR; U+02552
boxdl; U+02510
boxdr; U+0250C
boxh; U+02500
boxhD; U+02565
boxhU; U+02568
boxhd; U+0252C
boxhu; U+02534
boxminus; U+0229F
boxplus; U+0229E
boxtimes; U+022A0
boxuL; U+0255B
boxuR; U+02558
boxul; U+02518
boxur; U+02514
boxv; U+02502
boxvH; U+0256A
boxvL; U+02561
boxvR; U+0255E
boxvh; U+0253C
boxvl; U+02524
boxvr; U+0251C
bprime; U+02035
breve; U+002D8 ˘
brvbar; U+000A6 ¦
bscr; U+1D4B7 𝒷
bsemi; U+0204F
bsim; U+0223D
bsime; U+022CD
bsol; U+0005C \
bsolb; U+029C5
bsolhsub; U+027C8
bull; U+02022
bullet; U+02022
bump; U+0224E
bumpE; U+02AAE
bumpe; U+0224F
bumpeq; U+0224F
cacute; U+00107 ć
cap; U+02229
capand; U+02A44
capbrcup; U+02A49
capcap; U+02A4B
capcup; U+02A47
capdot; U+02A40
caps; U+02229 U+0FE00 ∩︀
caret; U+02041
caron; U+002C7 ˇ
ccaps; U+02A4D
ccaron; U+0010D č
ccedil; U+000E7 ç
ccirc; U+00109 ĉ
ccups; U+02A4C
ccupssm; U+02A50
cdot; U+0010B ċ
cedil; U+000B8 ¸
cemptyv; U+029B2
cent; U+000A2 ¢
centerdot; U+000B7 ·
cfr; U+1D520 𝔠
chcy; U+00447 ч
check; U+02713
checkmark; U+02713
chi; U+003C7 χ
cir; U+025CB
cirE; U+029C3
circ; U+002C6 ˆ
circeq; U+02257
circlearrowleft; U+021BA
circlearrowright; U+021BB
circledR; U+000AE ®
circledS; U+024C8
circledast; U+0229B
circledcirc; U+0229A
circleddash; U+0229D
cire; U+02257
cirfnint; U+02A10
cirmid; U+02AEF
cirscir; U+029C2
clubs; U+02663
clubsuit; U+02663
colon; U+0003A :
colone; U+02254
coloneq; U+02254
comma; U+0002C ,
commat; U+00040 @
comp; U+02201
compfn; U+02218
complement; U+02201
complexes; U+02102
cong; U+02245
congdot; U+02A6D
conint; U+0222E
copf; U+1D554 𝕔
coprod; U+02210
copy; U+000A9 ©
copysr; U+02117
crarr; U+021B5
cross; U+02717
cscr; U+1D4B8 𝒸
csub; U+02ACF
csube; U+02AD1
csup; U+02AD0
csupe; U+02AD2
ctdot; U+022EF
cudarrl; U+02938
cudarrr; U+02935
cuepr; U+022DE
cuesc; U+022DF
cularr; U+021B6
cularrp; U+0293D
cup; U+0222A
cupbrcap; U+02A48
cupcap; U+02A46
cupcup; U+02A4A
cupdot; U+0228D
cupor; U+02A45
cups; U+0222A U+0FE00 ∪︀
curarr; U+021B7
curarrm; U+0293C
curlyeqprec; U+022DE
curlyeqsucc; U+022DF
curlyvee; U+022CE
curlywedge; U+022CF
curren; U+000A4 ¤
curvearrowleft; U+021B6
curvearrowright; U+021B7
cuvee; U+022CE
cuwed; U+022CF
cwconint; U+02232
cwint; U+02231
cylcty; U+0232D
dArr; U+021D3
dHar; U+02965
dagger; U+02020
daleth; U+02138
darr; U+02193
dash; U+02010
dashv; U+022A3
dbkarow; U+0290F
dblac; U+002DD ˝
dcaron; U+0010F ď
dcy; U+00434 д
dd; U+02146
ddagger; U+02021
ddarr; U+021CA
ddotseq; U+02A77
deg; U+000B0 °
delta; U+003B4 δ
demptyv; U+029B1
dfisht; U+0297F ⥿
dfr; U+1D521 𝔡
dharl; U+021C3
dharr; U+021C2
diam; U+022C4
diamond; U+022C4
diamondsuit; U+02666
diams; U+02666
die; U+000A8 ¨
digamma; U+003DD ϝ
disin; U+022F2
div; U+000F7 ÷
divide; U+000F7 ÷
divideontimes; U+022C7
divonx; U+022C7
djcy; U+00452 ђ
dlcorn; U+0231E
dlcrop; U+0230D
dollar; U+00024 $
dopf; U+1D555 𝕕
dot; U+002D9 ˙
doteq; U+02250
doteqdot; U+02251
dotminus; U+02238
dotplus; U+02214
dotsquare; U+022A1
doublebarwedge; U+02306
downarrow; U+02193
downdownarrows; U+021CA
downharpoonleft; U+021C3
downharpoonright; U+021C2
drbkarow; U+02910
drcorn; U+0231F
drcrop; U+0230C
dscr; U+1D4B9 𝒹
dscy; U+00455 ѕ
dsol; U+029F6
dstrok; U+00111 đ
dtdot; U+022F1
dtri; U+025BF
dtrif; U+025BE
duarr; U+021F5
duhar; U+0296F
dwangle; U+029A6
dzcy; U+0045F џ
dzigrarr; U+027FF
eDDot; U+02A77
eDot; U+02251
eacute; U+000E9 é
easter; U+02A6E
ecaron; U+0011B ě
ecir; U+02256
ecirc; U+000EA ê
ecolon; U+02255
ecy; U+0044D э
edot; U+00117 ė
ee; U+02147
efDot; U+02252
efr; U+1D522 𝔢
eg; U+02A9A
egrave; U+000E8 è
egs; U+02A96
egsdot; U+02A98
el; U+02A99
elinters; U+023E7
ell; U+02113
els; U+02A95
elsdot; U+02A97
emacr; U+00113 ē
empty; U+02205
emptyset; U+02205
emptyv; U+02205
emsp; U+02003
emsp13; U+02004
emsp14; U+02005
eng; U+0014B ŋ
ensp; U+02002
eogon; U+00119 ę
eopf; U+1D556 𝕖
epar; U+022D5
eparsl; U+029E3
eplus; U+02A71
epsi; U+003B5 ε
epsilon; U+003B5 ε
epsiv; U+003F5 ϵ
eqcirc; U+02256
eqcolon; U+02255
eqsim; U+02242
eqslantgtr; U+02A96
eqslantless; U+02A95
equals; U+0003D =
equest; U+0225F
equiv; U+02261
equivDD; U+02A78
eqvparsl; U+029E5
erDot; U+02253
erarr; U+02971
escr; U+0212F
esdot; U+02250
esim; U+02242
eta; U+003B7 η
eth; U+000F0 ð
euml; U+000EB ë
euro; U+020AC
excl; U+00021 !
exist; U+02203
expectation; U+02130
exponentiale; U+02147
fallingdotseq; U+02252
fcy; U+00444 ф
female; U+02640
ffilig; U+0FB03
fflig; U+0FB00
ffllig; U+0FB04
ffr; U+1D523 𝔣
filig; U+0FB01
fjlig; U+00066 U+0006A fj
flat; U+0266D
fllig; U+0FB02
fltns; U+025B1
fnof; U+00192 ƒ
fopf; U+1D557 𝕗
forall; U+02200
fork; U+022D4
forkv; U+02AD9
fpartint; U+02A0D
frac12; U+000BD ½
frac13; U+02153
frac14; U+000BC ¼
frac15; U+02155
frac16; U+02159
frac18; U+0215B
frac23; U+02154
frac25; U+02156
frac34; U+000BE ¾
frac35; U+02157
frac38; U+0215C
frac45; U+02158
frac56; U+0215A
frac58; U+0215D
frac78; U+0215E
frasl; U+02044
frown; U+02322
fscr; U+1D4BB 𝒻
gE; U+02267
gEl; U+02A8C
gacute; U+001F5 ǵ
gamma; U+003B3 γ
gammad; U+003DD ϝ
gap; U+02A86
gbreve; U+0011F ğ
gcirc; U+0011D ĝ
gcy; U+00433 г
gdot; U+00121 ġ
ge; U+02265
gel; U+022DB
geq; U+02265
geqq; U+02267
geqslant; U+02A7E
ges; U+02A7E
gescc; U+02AA9
gesdot; U+02A80
gesdoto; U+02A82
gesdotol; U+02A84
gesl; U+022DB U+0FE00 ⋛︀
gesles; U+02A94
gfr; U+1D524 𝔤
gg; U+0226B
ggg; U+022D9
gimel; U+02137
gjcy; U+00453 ѓ
gl; U+02277
glE; U+02A92
gla; U+02AA5
glj; U+02AA4
gnE; U+02269
gnap; U+02A8A
gnapprox; U+02A8A
gne; U+02A88
gneq; U+02A88
gneqq; U+02269
gnsim; U+022E7
gopf; U+1D558 𝕘
grave; U+00060 `
gscr; U+0210A
gsim; U+02273
gsime; U+02A8E
gsiml; U+02A90
gt; U+0003E >
gtcc; U+02AA7
gtcir; U+02A7A
gtdot; U+022D7
gtlPar; U+02995
gtquest; U+02A7C
gtrapprox; U+02A86
gtrarr; U+02978
gtrdot; U+022D7
gtreqless; U+022DB
gtreqqless; U+02A8C
gtrless; U+02277
gtrsim; U+02273
gvertneqq; U+02269 U+0FE00 ≩︀
gvnE; U+02269 U+0FE00 ≩︀
hArr; U+021D4
hairsp; U+0200A
half; U+000BD ½
hamilt; U+0210B
hardcy; U+0044A ъ
harr; U+02194
harrcir; U+02948
harrw; U+021AD
hbar; U+0210F
hcirc; U+00125 ĥ
hearts; U+02665
heartsuit; U+02665
hellip; U+02026
hercon; U+022B9
hfr; U+1D525 𝔥
hksearow; U+02925
hkswarow; U+02926
hoarr; U+021FF
homtht; U+0223B
hookleftarrow; U+021A9
hookrightarrow; U+021AA
hopf; U+1D559 𝕙
horbar; U+02015
hscr; U+1D4BD 𝒽
hslash; U+0210F
hstrok; U+00127 ħ
hybull; U+02043
hyphen; U+02010
iacute; U+000ED í
ic; U+02063
icirc; U+000EE î
icy; U+00438 и
iecy; U+00435 е
iexcl; U+000A1 ¡
iff; U+021D4
ifr; U+1D526 𝔦
igrave; U+000EC ì
ii; U+02148
iiiint; U+02A0C
iiint; U+0222D
iinfin; U+029DC
iiota; U+02129
ijlig; U+00133 ij
imacr; U+0012B ī
image; U+02111
imagline; U+02110
imagpart; U+02111
imath; U+00131 ı
imof; U+022B7
imped; U+001B5 Ƶ
in; U+02208
incare; U+02105
infin; U+0221E
infintie; U+029DD
inodot; U+00131 ı
int; U+0222B
intcal; U+022BA
integers; U+02124
intercal; U+022BA
intlarhk; U+02A17
intprod; U+02A3C
iocy; U+00451 ё
iogon; U+0012F į
iopf; U+1D55A 𝕚
iota; U+003B9 ι
iprod; U+02A3C
iquest; U+000BF ¿
iscr; U+1D4BE 𝒾
isin; U+02208
isinE; U+022F9
isindot; U+022F5
isins; U+022F4
isinsv; U+022F3
isinv; U+02208
it; U+02062
itilde; U+00129 ĩ
iukcy; U+00456 і
iuml; U+000EF ï
jcirc; U+00135 ĵ
jcy; U+00439 й
jfr; U+1D527 𝔧
jmath; U+00237 ȷ
jopf; U+1D55B 𝕛
jscr; U+1D4BF 𝒿
jsercy; U+00458 ј
jukcy; U+00454 є
kappa; U+003BA κ
kappav; U+003F0 ϰ
kcedil; U+00137 ķ
kcy; U+0043A к
kfr; U+1D528 𝔨
kgreen; U+00138 ĸ
khcy; U+00445 х
kjcy; U+0045C ќ
kopf; U+1D55C 𝕜
kscr; U+1D4C0 𝓀
lAarr; U+021DA
lArr; U+021D0
lAtail; U+0291B
lBarr; U+0290E
lE; U+02266
lEg; U+02A8B
lHar; U+02962
lacute; U+0013A ĺ
laemptyv; U+029B4
lagran; U+02112
lambda; U+003BB λ
lang; U+027E8
langd; U+02991
langle; U+027E8
lap; U+02A85
laquo; U+000AB «
larr; U+02190
larrb; U+021E4
larrbfs; U+0291F
larrfs; U+0291D
larrhk; U+021A9
larrlp; U+021AB
larrpl; U+02939
larrsim; U+02973
larrtl; U+021A2
lat; U+02AAB
latail; U+02919
late; U+02AAD
lates; U+02AAD U+0FE00 ⪭︀
lbarr; U+0290C
lbbrk; U+02772
lbrace; U+0007B {
lbrack; U+0005B [
lbrke; U+0298B
lbrksld; U+0298F
lbrkslu; U+0298D
lcaron; U+0013E ľ
lcedil; U+0013C ļ
lceil; U+02308
lcub; U+0007B {
lcy; U+0043B л
ldca; U+02936
ldquo; U+0201C
ldquor; U+0201E
ldrdhar; U+02967
ldrushar; U+0294B
ldsh; U+021B2
le; U+02264
leftarrow; U+02190
leftarrowtail; U+021A2
leftharpoondown; U+021BD
leftharpoonup; U+021BC
leftleftarrows; U+021C7
leftrightarrow; U+02194
leftrightarrows; U+021C6
leftrightharpoons; U+021CB
leftrightsquigarrow; U+021AD
leftthreetimes; U+022CB
leg; U+022DA
leq; U+02264
leqq; U+02266
leqslant; U+02A7D
les; U+02A7D
lescc; U+02AA8
lesdot; U+02A7F ⩿
lesdoto; U+02A81
lesdotor; U+02A83
lesg; U+022DA U+0FE00 ⋚︀
lesges; U+02A93
lessapprox; U+02A85
lessdot; U+022D6
lesseqgtr; U+022DA
lesseqqgtr; U+02A8B
lessgtr; U+02276
lesssim; U+02272
lfisht; U+0297C
lfloor; U+0230A
lfr; U+1D529 𝔩
lg; U+02276
lgE; U+02A91
lhard; U+021BD
lharu; U+021BC
lharul; U+0296A
lhblk; U+02584
ljcy; U+00459 љ
ll; U+0226A
llarr; U+021C7
llcorner; U+0231E
llhard; U+0296B
lltri; U+025FA
lmidot; U+00140 ŀ
lmoust; U+023B0
lmoustache; U+023B0
lnE; U+02268
lnap; U+02A89
lnapprox; U+02A89
lne; U+02A87
lneq; U+02A87
lneqq; U+02268
lnsim; U+022E6
loang; U+027EC
loarr; U+021FD
lobrk; U+027E6
longleftarrow; U+027F5
longleftrightarrow; U+027F7
longmapsto; U+027FC
longrightarrow; U+027F6
looparrowleft; U+021AB
looparrowright; U+021AC
lopar; U+02985
lopf; U+1D55D 𝕝
loplus; U+02A2D
lotimes; U+02A34
lowast; U+02217
lowbar; U+0005F _
loz; U+025CA
lozenge; U+025CA
lozf; U+029EB
lpar; U+00028 (
lparlt; U+02993
lrarr; U+021C6
lrcorner; U+0231F
lrhar; U+021CB
lrhard; U+0296D
lrm; U+0200E
lrtri; U+022BF
lsaquo; U+02039
lscr; U+1D4C1 𝓁
lsh; U+021B0
lsim; U+02272
lsime; U+02A8D
lsimg; U+02A8F
lsqb; U+0005B [
lsquo; U+02018
lsquor; U+0201A
lstrok; U+00142 ł
lt; U+0003C <
ltcc; U+02AA6
ltcir; U+02A79
ltdot; U+022D6
lthree; U+022CB
ltimes; U+022C9
ltlarr; U+02976
ltquest; U+02A7B
ltrPar; U+02996
ltri; U+025C3
ltrie; U+022B4
ltrif; U+025C2
lurdshar; U+0294A
luruhar; U+02966
lvertneqq; U+02268 U+0FE00 ≨︀
lvnE; U+02268 U+0FE00 ≨︀
mDDot; U+0223A
macr; U+000AF ¯
male; U+02642
malt; U+02720
maltese; U+02720
map; U+021A6
mapsto; U+021A6
mapstodown; U+021A7
mapstoleft; U+021A4
mapstoup; U+021A5
marker; U+025AE
mcomma; U+02A29
mcy; U+0043C м
mdash; U+02014
measuredangle; U+02221
mfr; U+1D52A 𝔪
mho; U+02127
micro; U+000B5 µ
mid; U+02223
midast; U+0002A *
midcir; U+02AF0
middot; U+000B7 ·
minus; U+02212
minusb; U+0229F
minusd; U+02238
minusdu; U+02A2A
mlcp; U+02ADB
mldr; U+02026
mnplus; U+02213
models; U+022A7
mopf; U+1D55E 𝕞
mp; U+02213
mscr; U+1D4C2 𝓂
mstpos; U+0223E
mu; U+003BC μ
multimap; U+022B8
mumap; U+022B8
nGg; U+022D9 U+00338 ⋙̸
nGt; U+0226B U+020D2 ≫⃒
nGtv; U+0226B U+00338 ≫̸
nLeftarrow; U+021CD
nLeftrightarrow; U+021CE
nLl; U+022D8 U+00338 ⋘̸
nLt; U+0226A U+020D2 ≪⃒
nLtv; U+0226A U+00338 ≪̸
nRightarrow; U+021CF
nVDash; U+022AF
nVdash; U+022AE
nabla; U+02207
nacute; U+00144 ń
nang; U+02220 U+020D2 ∠⃒
nap; U+02249
napE; U+02A70 U+00338 ⩰̸
napid; U+0224B U+00338 ≋̸
napos; U+00149 ʼn
napprox; U+02249
natur; U+0266E
natural; U+0266E
naturals; U+02115
nbsp; U+000A0  
nbump; U+0224E U+00338 ≎̸
nbumpe; U+0224F U+00338 ≏̸
ncap; U+02A43
ncaron; U+00148 ň
ncedil; U+00146 ņ
ncong; U+02247
ncongdot; U+02A6D U+00338 ⩭̸
ncup; U+02A42
ncy; U+0043D н
ndash; U+02013
ne; U+02260
neArr; U+021D7
nearhk; U+02924
nearr; U+02197
nearrow; U+02197
nedot; U+02250 U+00338 ≐̸
nequiv; U+02262
nesear; U+02928
nesim; U+02242 U+00338 ≂̸
nexist; U+02204
nexists; U+02204
nfr; U+1D52B 𝔫
ngE; U+02267 U+00338 ≧̸
nge; U+02271
ngeq; U+02271
ngeqq; U+02267 U+00338 ≧̸
ngeqslant; U+02A7E U+00338 ⩾̸
nges; U+02A7E U+00338 ⩾̸
ngsim; U+02275
ngt; U+0226F
ngtr; U+0226F
nhArr; U+021CE
nharr; U+021AE
nhpar; U+02AF2
ni; U+0220B
nis; U+022FC
nisd; U+022FA
niv; U+0220B
njcy; U+0045A њ
nlArr; U+021CD
nlE; U+02266 U+00338 ≦̸
nlarr; U+0219A
nldr; U+02025
nle; U+02270
nleftarrow; U+0219A
nleftrightarrow; U+021AE
nleq; U+02270
nleqq; U+02266 U+00338 ≦̸
nleqslant; U+02A7D U+00338 ⩽̸
nles; U+02A7D U+00338 ⩽̸
nless; U+0226E
nlsim; U+02274
nlt; U+0226E
nltri; U+022EA
nltrie; U+022EC
nmid; U+02224
nopf; U+1D55F 𝕟
not; U+000AC ¬
notin; U+02209
notinE; U+022F9 U+00338 ⋹̸
notindot; U+022F5 U+00338 ⋵̸
notinva; U+02209
notinvb; U+022F7
notinvc; U+022F6
notni; U+0220C
notniva; U+0220C
notnivb; U+022FE
notnivc; U+022FD
npar; U+02226
nparallel; U+02226
nparsl; U+02AFD U+020E5 ⫽⃥
npart; U+02202 U+00338 ∂̸
npolint; U+02A14
npr; U+02280
nprcue; U+022E0
npre; U+02AAF U+00338 ⪯̸
nprec; U+02280
npreceq; U+02AAF U+00338 ⪯̸
nrArr; U+021CF
nrarr; U+0219B
nrarrc; U+02933 U+00338 ⤳̸
nrarrw; U+0219D U+00338 ↝̸
nrightarrow; U+0219B
nrtri; U+022EB
nrtrie; U+022ED
nsc; U+02281
nsccue; U+022E1
nsce; U+02AB0 U+00338 ⪰̸
nscr; U+1D4C3 𝓃
nshortmid; U+02224
nshortparallel; U+02226
nsim; U+02241
nsime; U+02244
nsimeq; U+02244
nsmid; U+02224
nspar; U+02226
nsqsube; U+022E2
nsqsupe; U+022E3
nsub; U+02284
nsubE; U+02AC5 U+00338 ⫅̸
nsube; U+02288
nsubset; U+02282 U+020D2 ⊂⃒
nsubseteq; U+02288
nsubseteqq; U+02AC5 U+00338 ⫅̸
nsucc; U+02281
nsucceq; U+02AB0 U+00338 ⪰̸
nsup; U+02285
nsupE; U+02AC6 U+00338 ⫆̸
nsupe; U+02289
nsupset; U+02283 U+020D2 ⊃⃒
nsupseteq; U+02289
nsupseteqq; U+02AC6 U+00338 ⫆̸
ntgl; U+02279
ntilde; U+000F1 ñ
ntlg; U+02278
ntriangleleft; U+022EA
ntrianglelefteq; U+022EC
ntriangleright; U+022EB
ntrianglerighteq; U+022ED
nu; U+003BD ν
num; U+00023 #
numero; U+02116
numsp; U+02007
nvDash; U+022AD
nvHarr; U+02904
nvap; U+0224D U+020D2 ≍⃒
nvdash; U+022AC
nvge; U+02265 U+020D2 ≥⃒
nvgt; U+0003E U+020D2 >⃒
nvinfin; U+029DE
nvlArr; U+02902
nvle; U+02264 U+020D2 ≤⃒
nvlt; U+0003C U+020D2 <⃒
nvltrie; U+022B4 U+020D2 ⊴⃒
nvrArr; U+02903
nvrtrie; U+022B5 U+020D2 ⊵⃒
nvsim; U+0223C U+020D2 ∼⃒
nwArr; U+021D6
nwarhk; U+02923
nwarr; U+02196
nwarrow; U+02196
nwnear; U+02927
oS; U+024C8
oacute; U+000F3 ó
oast; U+0229B
ocir; U+0229A
ocirc; U+000F4 ô
ocy; U+0043E о
odash; U+0229D
odblac; U+00151 ő
odiv; U+02A38
odot; U+02299
odsold; U+029BC
oelig; U+00153 œ
ofcir; U+029BF ⦿
ofr; U+1D52C 𝔬
ogon; U+002DB ˛
ograve; U+000F2 ò
ogt; U+029C1
ohbar; U+029B5
ohm; U+003A9 Ω
oint; U+0222E
olarr; U+021BA
olcir; U+029BE
olcross; U+029BB
oline; U+0203E
olt; U+029C0
omacr; U+0014D ō
omega; U+003C9 ω
omicron; U+003BF ο
omid; U+029B6
ominus; U+02296
oopf; U+1D560 𝕠
opar; U+029B7
operp; U+029B9
oplus; U+02295
or; U+02228
orarr; U+021BB
ord; U+02A5D
order; U+02134
orderof; U+02134
ordf; U+000AA ª
ordm; U+000BA º
origof; U+022B6
oror; U+02A56
orslope; U+02A57
orv; U+02A5B
oscr; U+02134
oslash; U+000F8 ø
osol; U+02298
otilde; U+000F5 õ
otimes; U+02297
otimesas; U+02A36
ouml; U+000F6 ö
ovbar; U+0233D
par; U+02225
para; U+000B6
parallel; U+02225
parsim; U+02AF3
parsl; U+02AFD
part; U+02202
pcy; U+0043F п
percnt; U+00025 %
period; U+0002E .
permil; U+02030
perp; U+022A5
pertenk; U+02031
pfr; U+1D52D 𝔭
phi; U+003C6 φ
phiv; U+003D5 ϕ
phmmat; U+02133
phone; U+0260E
pi; U+003C0 π
pitchfork; U+022D4
piv; U+003D6 ϖ
planck; U+0210F
planckh; U+0210E
plankv; U+0210F
plus; U+0002B +
plusacir; U+02A23
plusb; U+0229E
pluscir; U+02A22
plusdo; U+02214
plusdu; U+02A25
pluse; U+02A72
plusmn; U+000B1 ±
plussim; U+02A26
plustwo; U+02A27
pm; U+000B1 ±
pointint; U+02A15
popf; U+1D561 𝕡
pound; U+000A3 £
pr; U+0227A
prE; U+02AB3
prap; U+02AB7
prcue; U+0227C
pre; U+02AAF
prec; U+0227A
precapprox; U+02AB7
preccurlyeq; U+0227C
preceq; U+02AAF
precnapprox; U+02AB9
precneqq; U+02AB5
precnsim; U+022E8
precsim; U+0227E
prime; U+02032
primes; U+02119
prnE; U+02AB5
prnap; U+02AB9
prnsim; U+022E8
prod; U+0220F
profalar; U+0232E
profline; U+02312
profsurf; U+02313
prop; U+0221D
propto; U+0221D
prsim; U+0227E
prurel; U+022B0
pscr; U+1D4C5 𝓅
psi; U+003C8 ψ
puncsp; U+02008
qfr; U+1D52E 𝔮
qint; U+02A0C
qopf; U+1D562 𝕢
qprime; U+02057
qscr; U+1D4C6 𝓆
quaternions; U+0210D
quatint; U+02A16
quest; U+0003F ?
questeq; U+0225F
quot; U+00022 "
rAarr; U+021DB
rArr; U+021D2
rAtail; U+0291C
rBarr; U+0290F
rHar; U+02964
race; U+0223D U+00331 ∽̱
racute; U+00155 ŕ
radic; U+0221A
raemptyv; U+029B3
rang; U+027E9
rangd; U+02992
range; U+029A5
rangle; U+027E9
raquo; U+000BB »
rarr; U+02192
rarrap; U+02975
rarrb; U+021E5
rarrbfs; U+02920
rarrc; U+02933
rarrfs; U+0291E
rarrhk; U+021AA
rarrlp; U+021AC
rarrpl; U+02945
rarrsim; U+02974
rarrtl; U+021A3
rarrw; U+0219D
ratail; U+0291A
ratio; U+02236
rationals; U+0211A
rbarr; U+0290D
rbbrk; U+02773
rbrace; U+0007D }
rbrack; U+0005D ]
rbrke; U+0298C
rbrksld; U+0298E
rbrkslu; U+02990
rcaron; U+00159 ř
rcedil; U+00157 ŗ
rceil; U+02309
rcub; U+0007D }
rcy; U+00440 р
rdca; U+02937
rdldhar; U+02969
rdquo; U+0201D
rdquor; U+0201D
rdsh; U+021B3
real; U+0211C
realine; U+0211B
realpart; U+0211C
reals; U+0211D
rect; U+025AD
reg; U+000AE ®
rfisht; U+0297D
rfloor; U+0230B
rfr; U+1D52F 𝔯
rhard; U+021C1
rharu; U+021C0
rharul; U+0296C
rho; U+003C1 ρ
rhov; U+003F1 ϱ
rightarrow; U+02192
rightarrowtail; U+021A3
rightharpoondown; U+021C1
rightharpoonup; U+021C0
rightleftarrows; U+021C4
rightleftharpoons; U+021CC
rightrightarrows; U+021C9
rightsquigarrow; U+0219D
rightthreetimes; U+022CC
ring; U+002DA ˚
risingdotseq; U+02253
rlarr; U+021C4
rlhar; U+021CC
rlm; U+0200F
rmoust; U+023B1
rmoustache; U+023B1
rnmid; U+02AEE
roang; U+027ED
roarr; U+021FE
robrk; U+027E7
ropar; U+02986
ropf; U+1D563 𝕣
roplus; U+02A2E
rotimes; U+02A35
rpar; U+00029 )
rpargt; U+02994
rppolint; U+02A12
rrarr; U+021C9
rsaquo; U+0203A
rscr; U+1D4C7 𝓇
rsh; U+021B1
rsqb; U+0005D ]
rsquo; U+02019
rsquor; U+02019
rthree; U+022CC
rtimes; U+022CA
rtri; U+025B9
rtrie; U+022B5
rtrif; U+025B8
rtriltri; U+029CE
ruluhar; U+02968
rx; U+0211E
sacute; U+0015B ś
sbquo; U+0201A
sc; U+0227B
scE; U+02AB4
scap; U+02AB8
scaron; U+00161 š
sccue; U+0227D
sce; U+02AB0
scedil; U+0015F ş
scirc; U+0015D ŝ
scnE; U+02AB6
scnap; U+02ABA
scnsim; U+022E9
scpolint; U+02A13
scsim; U+0227F
scy; U+00441 с
sdot; U+022C5
sdotb; U+022A1
sdote; U+02A66
seArr; U+021D8
searhk; U+02925
searr; U+02198
searrow; U+02198
sect; U+000A7 §
semi; U+0003B ;
seswar; U+02929
setminus; U+02216
setmn; U+02216
sext; U+02736
sfr; U+1D530 𝔰
sfrown; U+02322
sharp; U+0266F
shchcy; U+00449 щ
shcy; U+00448 ш
shortmid; U+02223
shortparallel; U+02225
shy; U+000AD ­
sigma; U+003C3 σ
sigmaf; U+003C2 ς
sigmav; U+003C2 ς
sim; U+0223C
simdot; U+02A6A
sime; U+02243
simeq; U+02243
simg; U+02A9E
simgE; U+02AA0
siml; U+02A9D
simlE; U+02A9F
simne; U+02246
simplus; U+02A24
simrarr; U+02972
slarr; U+02190
smallsetminus; U+02216
smashp; U+02A33
smeparsl; U+029E4
smid; U+02223
smile; U+02323
smt; U+02AAA
smte; U+02AAC
smtes; U+02AAC U+0FE00 ⪬︀
softcy; U+0044C ь
sol; U+0002F /
solb; U+029C4
solbar; U+0233F
sopf; U+1D564 𝕤
spades; U+02660
spadesuit; U+02660
spar; U+02225
sqcap; U+02293
sqcaps; U+02293 U+0FE00 ⊓︀
sqcup; U+02294
sqcups; U+02294 U+0FE00 ⊔︀
sqsub; U+0228F
sqsube; U+02291
sqsubset; U+0228F
sqsubseteq; U+02291
sqsup; U+02290
sqsupe; U+02292
sqsupset; U+02290
sqsupseteq; U+02292
squ; U+025A1
square; U+025A1
squarf; U+025AA
squf; U+025AA
srarr; U+02192
sscr; U+1D4C8 𝓈
ssetmn; U+02216
ssmile; U+02323
sstarf; U+022C6
star; U+02606
starf; U+02605
straightepsilon; U+003F5 ϵ
straightphi; U+003D5 ϕ
strns; U+000AF ¯
sub; U+02282
subE; U+02AC5
subdot; U+02ABD
sube; U+02286
subedot; U+02AC3
submult; U+02AC1
subnE; U+02ACB
subne; U+0228A
subplus; U+02ABF ⪿
subrarr; U+02979
subset; U+02282
subseteq; U+02286
subseteqq; U+02AC5
subsetneq; U+0228A
subsetneqq; U+02ACB
subsim; U+02AC7
subsub; U+02AD5
subsup; U+02AD3
succ; U+0227B
succapprox; U+02AB8
succcurlyeq; U+0227D
succeq; U+02AB0
succnapprox; U+02ABA
succneqq; U+02AB6
succnsim; U+022E9
succsim; U+0227F
sum; U+02211
sung; U+0266A
sup; U+02283
sup1; U+000B9 ¹
sup2; U+000B2 ²
sup3; U+000B3 ³
supE; U+02AC6
supdot; U+02ABE
supdsub; U+02AD8
supe; U+02287
supedot; U+02AC4
suphsol; U+027C9
suphsub; U+02AD7
suplarr; U+0297B
supmult; U+02AC2
supnE; U+02ACC
supne; U+0228B
supplus; U+02AC0
supset; U+02283
supseteq; U+02287
supseteqq; U+02AC6
supsetneq; U+0228B
supsetneqq; U+02ACC
supsim; U+02AC8
supsub; U+02AD4
supsup; U+02AD6
swArr; U+021D9
swarhk; U+02926
swarr; U+02199
swarrow; U+02199
swnwar; U+0292A
szlig; U+000DF ß
target; U+02316
tau; U+003C4 τ
tbrk; U+023B4
tcaron; U+00165 ť
tcedil; U+00163 ţ
tcy; U+00442 т
tdot; U+020DB ◌⃛
telrec; U+02315
tfr; U+1D531 𝔱
there4; U+02234
therefore; U+02234
theta; U+003B8 θ
thetasym; U+003D1 ϑ
thetav; U+003D1 ϑ
thickapprox; U+02248
thicksim; U+0223C
thinsp; U+02009
thkap; U+02248
thksim; U+0223C
thorn; U+000FE þ
tilde; U+002DC ˜
times; U+000D7 ×
timesb; U+022A0
timesbar; U+02A31
timesd; U+02A30
tint; U+0222D
toea; U+02928
top; U+022A4
topbot; U+02336
topcir; U+02AF1
topf; U+1D565 𝕥
topfork; U+02ADA
tosa; U+02929
tprime; U+02034
trade; U+02122
triangle; U+025B5
triangledown; U+025BF
triangleleft; U+025C3
trianglelefteq; U+022B4
triangleq; U+0225C
triangleright; U+025B9
trianglerighteq; U+022B5
tridot; U+025EC
trie; U+0225C
triminus; U+02A3A
triplus; U+02A39
trisb; U+029CD
tritime; U+02A3B
trpezium; U+023E2
tscr; U+1D4C9 𝓉
tscy; U+00446 ц
tshcy; U+0045B ћ
tstrok; U+00167 ŧ
twixt; U+0226C
twoheadleftarrow; U+0219E
twoheadrightarrow; U+021A0
uArr; U+021D1
uHar; U+02963
uacute; U+000FA ú
uarr; U+02191
ubrcy; U+0045E ў
ubreve; U+0016D ŭ
ucirc; U+000FB û
ucy; U+00443 у
udarr; U+021C5
udblac; U+00171 ű
udhar; U+0296E
ufisht; U+0297E
ufr; U+1D532 𝔲
ugrave; U+000F9 ù
uharl; U+021BF
uharr; U+021BE
uhblk; U+02580
ulcorn; U+0231C
ulcorner; U+0231C
ulcrop; U+0230F
ultri; U+025F8
umacr; U+0016B ū
uml; U+000A8 ¨
uogon; U+00173 ų
uopf; U+1D566 𝕦
uparrow; U+02191
updownarrow; U+02195
upharpoonleft; U+021BF
upharpoonright; U+021BE
uplus; U+0228E
upsi; U+003C5 υ
upsih; U+003D2 ϒ
upsilon; U+003C5 υ
upuparrows; U+021C8
urcorn; U+0231D
urcorner; U+0231D
urcrop; U+0230E
uring; U+0016F ů
urtri; U+025F9
uscr; U+1D4CA 𝓊
utdot; U+022F0
utilde; U+00169 ũ
utri; U+025B5
utrif; U+025B4
uuarr; U+021C8
uuml; U+000FC ü
uwangle; U+029A7
vArr; U+021D5
vBar; U+02AE8
vBarv; U+02AE9
vDash; U+022A8
vangrt; U+0299C
varepsilon; U+003F5 ϵ
varkappa; U+003F0 ϰ
varnothing; U+02205
varphi; U+003D5 ϕ
varpi; U+003D6 ϖ
varpropto; U+0221D
varr; U+02195
varrho; U+003F1 ϱ
varsigma; U+003C2 ς
varsubsetneq; U+0228A U+0FE00 ⊊︀
varsubsetneqq; U+02ACB U+0FE00 ⫋︀
varsupsetneq; U+0228B U+0FE00 ⊋︀
varsupsetneqq; U+02ACC U+0FE00 ⫌︀
vartheta; U+003D1 ϑ
vartriangleleft; U+022B2
vartriangleright; U+022B3
vcy; U+00432 в
vdash; U+022A2
vee; U+02228
veebar; U+022BB
veeeq; U+0225A
vellip; U+022EE
verbar; U+0007C |
vert; U+0007C |
vfr; U+1D533 𝔳
vltri; U+022B2
vnsub; U+02282 U+020D2 ⊂⃒
vnsup; U+02283 U+020D2 ⊃⃒
vopf; U+1D567 𝕧
vprop; U+0221D
vrtri; U+022B3
vscr; U+1D4CB 𝓋
vsubnE; U+02ACB U+0FE00 ⫋︀
vsubne; U+0228A U+0FE00 ⊊︀
vsupnE; U+02ACC U+0FE00 ⫌︀
vsupne; U+0228B U+0FE00 ⊋︀
vzigzag; U+0299A
wcirc; U+00175 ŵ
wedbar; U+02A5F
wedge; U+02227
wedgeq; U+02259
weierp; U+02118
wfr; U+1D534 𝔴
wopf; U+1D568 𝕨
wp; U+02118
wr; U+02240
wreath; U+02240
wscr; U+1D4CC 𝓌
xcap; U+022C2
xcirc; U+025EF
xcup; U+022C3
xdtri; U+025BD
xfr; U+1D535 𝔵
xhArr; U+027FA
xharr; U+027F7
xi; U+003BE ξ
xlArr; U+027F8
xlarr; U+027F5
xmap; U+027FC
xnis; U+022FB
xodot; U+02A00
xopf; U+1D569 𝕩
xoplus; U+02A01
xotime; U+02A02
xrArr; U+027F9
xrarr; U+027F6
xscr; U+1D4CD 𝓍
xsqcup; U+02A06
xuplus; U+02A04
xutri; U+025B3
xvee; U+022C1
xwedge; U+022C0
yacute; U+000FD ý
yacy; U+0044F я
ycirc; U+00177 ŷ
ycy; U+0044B ы
yen; U+000A5 ¥
yfr; U+1D536 𝔶
yicy; U+00457 ї
yopf; U+1D56A 𝕪
yscr; U+1D4CE 𝓎
yucy; U+0044E ю
yuml; U+000FF ÿ
zacute; U+0017A ź
zcaron; U+0017E ž
zcy; U+00437 з
zdot; U+0017C ż
zeetrf; U+02128
zeta; U+003B6 ζ
zfr; U+1D537 𝔷
zhcy; U+00436 ж
zigrarr; U+021DD
zopf; U+1D56B 𝕫
zscr; U+1D4CF 𝓏
zwj; U+0200D
zwnj; U+0200C
AElig U+000C6 Æ
AMP U+00026 &
Aacute U+000C1 Á
Acirc U+000C2 Â
Agrave U+000C0 À
Aring U+000C5 Å
Atilde U+000C3 Ã
Auml U+000C4 Ä
COPY U+000A9 ©
Ccedil U+000C7 Ç
ETH U+000D0 Ð
Eacute U+000C9 É
Ecirc U+000CA Ê
Egrave U+000C8 È
Euml U+000CB Ë
GT U+0003E >
Iacute U+000CD Í
Icirc U+000CE Î
Igrave U+000CC Ì
Iuml U+000CF Ï
LT U+0003C <
Ntilde U+000D1 Ñ
Oacute U+000D3 Ó
Ocirc U+000D4 Ô
Ograve U+000D2 Ò
Oslash U+000D8 Ø
Otilde U+000D5 Õ
Ouml U+000D6 Ö
QUOT U+00022 "
REG U+000AE ®
THORN U+000DE Þ
Uacute U+000DA Ú
Ucirc U+000DB Û
Ugrave U+000D9 Ù
Uuml U+000DC Ü
Yacute U+000DD Ý
aacute U+000E1 á
acirc U+000E2 â
acute U+000B4 ´
aelig U+000E6 æ
agrave U+000E0 à
amp U+00026 &
aring U+000E5 å
atilde U+000E3 ã
auml U+000E4 ä
brvbar U+000A6 ¦
ccedil U+000E7 ç
cedil U+000B8 ¸
cent U+000A2 ¢
copy U+000A9 ©
curren U+000A4 ¤
deg U+000B0 °
divide U+000F7 ÷
eacute U+000E9 é
ecirc U+000EA ê
egrave U+000E8 è
eth U+000F0 ð
euml U+000EB ë
frac12 U+000BD ½
frac14 U+000BC ¼
frac34 U+000BE ¾
gt U+0003E >
iacute U+000ED í
icirc U+000EE î
iexcl U+000A1 ¡
igrave U+000EC ì
iquest U+000BF ¿
iuml U+000EF ï
laquo U+000AB «
lt U+0003C <
macr U+000AF ¯
micro U+000B5 µ
middot U+000B7 ·
nbsp U+000A0  
not U+000AC ¬
ntilde U+000F1 ñ
oacute U+000F3 ó
ocirc U+000F4 ô
ograve U+000F2 ò
ordf U+000AA ª
ordm U+000BA º
oslash U+000F8 ø
otilde U+000F5 õ
ouml U+000F6 ö
para U+000B6
plusmn U+000B1 ±
pound U+000A3 £
quot U+00022 "
raquo U+000BB »
reg U+000AE ®
sect U+000A7 §
shy U+000AD ­
sup1 U+000B9 ¹
sup2 U+000B2 ²
sup3 U+000B3 ³
szlig U+000DF ß
thorn U+000FE þ
times U+000D7 ×
uacute U+000FA ú
ucirc U+000FB û
ugrave U+000F9 ù
uml U+000A8 ¨
uuml U+000FC ü
yacute U+000FD ý
yen U+000A5 ¥
yuml U+000FF ÿ
"), set the escape flag to false. - - In any case, emit the input character as a character token. Stay - in the data state. - - EOF - Emit an end-of-file token. - - Anything else - Emit the input character as a character token. Stay in the data - state. - - 8.2.4.2 Character reference data state - - (This cannot happen if the content model flag is set to the CDATA - state.) - - Attempt to consume a character reference, with no additional allowed - character. - - If nothing is returned, emit a U+0026 AMPERSAND character token. - - Otherwise, emit the character token that was returned. - - Finally, switch to the data state. - - 8.2.4.3 Tag open state - - The behavior of this state depends on the content model flag. - - If the content model flag is set to the RCDATA or CDATA states - Consume the next input character. If it is a U+002F SOLIDUS (/) - character, switch to the close tag open state. Otherwise, emit a - U+003C LESS-THAN SIGN character token and reconsume the current - input character in the data state. - - If the content model flag is set to the PCDATA state - Consume the next input character: - - U+0021 EXCLAMATION MARK (!) - Switch to the markup declaration open state. - - U+002F SOLIDUS (/) - Switch to the close tag open state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL - LETTER Z - Create a new start tag token, set its tag name to the - lowercase version of the input character (add 0x0020 to - the character's code point), then switch to the tag name - state. (Don't emit the token yet; further details will be - filled in before it is emitted.) - - U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new start tag token, set its tag name to the - input character, then switch to the tag name state. (Don't - emit the token yet; further details will be filled in - before it is emitted.) - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit a U+003C LESS-THAN SIGN character token - and a U+003E GREATER-THAN SIGN character token. Switch to - the data state. - - U+003F QUESTION MARK (?) - Parse error. Switch to the bogus comment state. - - Anything else - Parse error. Emit a U+003C LESS-THAN SIGN character token - and reconsume the current input character in the data - state. - - 8.2.4.4 Close tag open state - - If the content model flag is set to the RCDATA or CDATA states but no - start tag token has ever been emitted by this instance of the tokeniser - (fragment case), or, if the content model flag is set to the RCDATA or - CDATA states and the next few characters do not match the tag name of - the last start tag token emitted (compared in an ASCII case-insensitive - manner), or if they do but they are not immediately followed by one of - the following characters: - * U+0009 CHARACTER TABULATION - * U+000A LINE FEED (LF) - * U+000C FORM FEED (FF) - * U+0020 SPACE - * U+003E GREATER-THAN SIGN (>) - * U+002F SOLIDUS (/) - * EOF - - ...then emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS - character token, and switch to the data state to process the next input - character. - - Otherwise, if the content model flag is set to the PCDATA state, or if - the next few characters do match that tag name, consume the next input - character: - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Create a new end tag token, set its tag name to the lowercase - version of the input character (add 0x0020 to the character's - code point), then switch to the tag name state. (Don't emit the - token yet; further details will be filled in before it is - emitted.) - - U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new end tag token, set its tag name to the input - character, then switch to the tag name state. (Don't emit the - token yet; further details will be filled in before it is - emitted.) - - U+003E GREATER-THAN SIGN (>) - Parse error. Switch to the data state. - - EOF - Parse error. Emit a U+003C LESS-THAN SIGN character token and a - U+002F SOLIDUS character token. Reconsume the EOF character in - the data state. - - Anything else - Parse error. Switch to the bogus comment state. - - 8.2.4.5 Tag name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input character (add - 0x0020 to the character's code point) to the current tag token's - tag name. Stay in the tag name state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Append the current input character to the current tag token's - tag name. Stay in the tag name state. - - 8.2.4.6 Before attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current input - character (add 0x0020 to the character's code point), and its - value to the empty string. Switch to the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Start a new attribute in the current tag token. Set that - attribute's name to the current input character, and its value - to the empty string. Switch to the attribute name state. - - 8.2.4.7 Attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003D EQUALS SIGN (=) - Switch to the before attribute value state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input character (add - 0x0020 to the character's code point) to the current attribute's - name. Stay in the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Append the current input character to the current attribute's - name. Stay in the attribute name state. - - When the user agent leaves the attribute name state (and before - emitting the tag token, if appropriate), the complete attribute's name - must be compared to the other attributes on the same token; if there is - already an attribute on the token with the exact same name, then this - is a parse error and the new attribute must be dropped, along with the - value that gets associated with it (if any). - - 8.2.4.8 After attribute name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003D EQUALS SIGN (=) - Switch to the before attribute value state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current input - character (add 0x0020 to the character's code point), and its - value to the empty string. Switch to the attribute name state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Start a new attribute in the current tag token. Set that - attribute's name to the current input character, and its value - to the empty string. Switch to the attribute name state. - - 8.2.4.9 Before attribute value state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute value state. - - U+0022 QUOTATION MARK (") - Switch to the attribute value (double-quoted) state. - - U+0026 AMPERSAND (&) - Switch to the attribute value (unquoted) state and reconsume - this input character. - - U+0027 APOSTROPHE (') - Switch to the attribute value (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the current tag token. Switch to the data - state. - - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Switch to the attribute value (unquoted) state. - - 8.2.4.10 Attribute value (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after attribute value (quoted) state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - the additional allowed character being U+0022 QUOTATION MARK - ("). - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (double-quoted) state. - - 8.2.4.11 Attribute value (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after attribute value (quoted) state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - the additional allowed character being U+0027 APOSTROPHE ('). - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (single-quoted) state. - - 8.2.4.12 Attribute value (unquoted) state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+0026 AMPERSAND (&) - Switch to the character reference in attribute value state, with - no additional allowed character. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry below. - - EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. - - Anything else - Append the current input character to the current attribute's - value. Stay in the attribute value (unquoted) state. - - 8.2.4.13 Character reference in attribute value state - - Attempt to consume a character reference. - - If nothing is returned, append a U+0026 AMPERSAND character to the - current attribute's value. - - Otherwise, append the returned character token to the current - attribute's value. - - Finally, switch back to the attribute value state that you were in when - were switched into this state. - - 8.2.4.14 After attribute value (quoted) state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. - - U+002F SOLIDUS (/) - Switch to the self-closing start tag state. - - U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Parse error. Reconsume the character in the before attribute - name state. - - 8.2.4.15 Self-closing start tag state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Set the self-closing flag of the current tag token. Emit the - current tag token. Switch to the data state. - - EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. - - Anything else - Parse error. Reconsume the character in the before attribute - name state. - - 8.2.4.16 Bogus comment state - - (This can only happen if the content model flag is set to the PCDATA - state.) - - Consume every character up to and including the first U+003E - GREATER-THAN SIGN character (>) or the end of the file (EOF), whichever - comes first. Emit a comment token whose data is the concatenation of - all the characters starting from and including the character that - caused the state machine to switch into the bogus comment state, up to - and including the character immediately before the last consumed - character (i.e. up to the character just before the U+003E or EOF - character). (If the comment was started by the end of the file (EOF), - the token is empty.) - - Switch to the data state. - - If the end of the file was reached, reconsume the EOF character. - - 8.2.4.17 Markup declaration open state - - (This can only happen if the content model flag is set to the PCDATA - state.) - - If the next two characters are both U+002D HYPHEN-MINUS (-) characters, - consume those two characters, create a comment token whose data is the - empty string, and switch to the comment start state. - - Otherwise, if the next seven characters are an ASCII case-insensitive - match for the word "DOCTYPE", then consume those characters and switch - to the DOCTYPE state. - - Otherwise, if the insertion mode is "in foreign content" and the - current node is not an element in the HTML namespace and the next seven - characters are an ASCII case-sensitive match for the string "[CDATA[" - (the five uppercase letters "CDATA" with a U+005B LEFT SQUARE BRACKET - character before and after), then consume those characters and switch - to the CDATA section state (which is unrelated to the content model - flag's CDATA state). - - Otherwise, this is a parse error. Switch to the bogus comment state. - The next character that is consumed, if any, is the first character - that will be in the comment. - - 8.2.4.18 Comment start state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment start dash state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the data state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append the input character to the comment token's data. Switch - to the comment state. - - 8.2.4.19 Comment start dash state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end state - - U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the data state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment - state. - - 8.2.4.20 Comment state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end dash state - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append the input character to the comment token's data. Stay in - the comment state. - - 8.2.4.21 Comment end dash state - - Consume the next input character: - - U+002D HYPHEN-MINUS (-) - Switch to the comment end state - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment - state. - - 8.2.4.22 Comment end state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Emit the comment token. Switch to the data state. - - U+002D HYPHEN-MINUS (-) - Parse error. Append a U+002D HYPHEN-MINUS (-) character to the - comment token's data. Stay in the comment end state. - - EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. - - Anything else - Parse error. Append two U+002D HYPHEN-MINUS (-) characters and - the input character to the comment token's data. Switch to the - comment state. - - 8.2.4.23 DOCTYPE state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before DOCTYPE name state. - - Anything else - Parse error. Reconsume the current character in the before - DOCTYPE name state. - - 8.2.4.24 Before DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Create a new DOCTYPE token. Set its force-quirks - flag to on. Emit the token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Create a new DOCTYPE token. Set the token's name to the - lowercase version of the input character (add 0x0020 to the - character's code point). Switch to the DOCTYPE name state. - - EOF - Parse error. Create a new DOCTYPE token. Set its force-quirks - flag to on. Emit the token. Reconsume the EOF character in the - data state. - - Anything else - Create a new DOCTYPE token. Set the token's name to the current - input character. Switch to the DOCTYPE name state. - - 8.2.4.25 DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the input character (add 0x0020 - to the character's code point) to the current DOCTYPE token's - name. Stay in the DOCTYPE name state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's name. Stay in the DOCTYPE name state. - - 8.2.4.26 After DOCTYPE name state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE name state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - If the six characters starting from the current input character - are an ASCII case-insensitive match for the word "PUBLIC", then - consume those characters and switch to the before DOCTYPE public - identifier state. - - Otherwise, if the six characters starting from the current input - character are an ASCII case-insensitive match for the word - "SYSTEM", then consume those characters and switch to the before - DOCTYPE system identifier state. - - Otherwise, this is the parse error. Set the DOCTYPE token's - force-quirks flag to on. Switch to the bogus DOCTYPE state. - - 8.2.4.27 Before DOCTYPE public identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE public identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's public identifier to the empty string - (not missing), then switch to the DOCTYPE public identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's public identifier to the empty string - (not missing), then switch to the DOCTYPE public identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.28 DOCTYPE public identifier (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE public identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's public identifier. Stay in the DOCTYPE public identifier - (double-quoted) state. - - 8.2.4.29 DOCTYPE public identifier (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after DOCTYPE public identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's public identifier. Stay in the DOCTYPE public identifier - (single-quoted) state. - - 8.2.4.30 After DOCTYPE public identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE public identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.31 Before DOCTYPE system identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE system identifier state. - - U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (double-quoted) state. - - U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the empty string - (not missing), then switch to the DOCTYPE system identifier - (single-quoted) state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Switch to the bogus DOCTYPE state. - - 8.2.4.32 DOCTYPE system identifier (double-quoted) state - - Consume the next input character: - - U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's system identifier. Stay in the DOCTYPE system identifier - (double-quoted) state. - - 8.2.4.33 DOCTYPE system identifier (single-quoted) state - - Consume the next input character: - - U+0027 APOSTROPHE (') - Switch to the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Append the current input character to the current DOCTYPE - token's system identifier. Stay in the DOCTYPE system identifier - (single-quoted) state. - - 8.2.4.34 After DOCTYPE system identifier state - - Consume the next input character: - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE system identifier state. - - U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. - - EOF - Parse error. Set the DOCTYPE token's force-quirks flag to on. - Emit that DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Parse error. Switch to the bogus DOCTYPE state. (This does not - set the DOCTYPE token's force-quirks flag to on.) - - 8.2.4.35 Bogus DOCTYPE state - - Consume the next input character: - - U+003E GREATER-THAN SIGN (>) - Emit the DOCTYPE token. Switch to the data state. - - EOF - Emit the DOCTYPE token. Reconsume the EOF character in the data - state. - - Anything else - Stay in the bogus DOCTYPE state. - - 8.2.4.36 CDATA section state - - (This can only happen if the content model flag is set to the PCDATA - state, and is unrelated to the content model flag's CDATA state.) - - Consume every character up to the next occurrence of the three - character sequence U+005D RIGHT SQUARE BRACKET U+005D RIGHT SQUARE - BRACKET U+003E GREATER-THAN SIGN (]]>), or the end of the file (EOF), - whichever comes first. Emit a series of character tokens consisting of - all the characters consumed except the matching three character - sequence at the end (if one was found before the end of the file). - - Switch to the data state. - - If the end of the file was reached, reconsume the EOF character. - - 8.2.4.37 Tokenizing character references - - This section defines how to consume a character reference. This - definition is used when parsing character references in text and in - attributes. - - The behavior depends on the identity of the next character (the one - immediately after the U+0026 AMPERSAND character): - - U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - U+003C LESS-THAN SIGN - U+0026 AMPERSAND - EOF - The additional allowed character, if there is one - Not a character reference. No characters are consumed, and - nothing is returned. (This is not an error, either.) - - U+0023 NUMBER SIGN (#) - Consume the U+0023 NUMBER SIGN. - - The behavior further depends on the character after the U+0023 - NUMBER SIGN: - - U+0078 LATIN SMALL LETTER X - U+0058 LATIN CAPITAL LETTER X - Consume the X. - - Follow the steps below, but using the range of characters - U+0030 DIGIT ZERO through to U+0039 DIGIT NINE, U+0061 - LATIN SMALL LETTER A through to U+0066 LATIN SMALL LETTER - F, and U+0041 LATIN CAPITAL LETTER A, through to U+0046 - LATIN CAPITAL LETTER F (in other words, 0-9, A-F, a-f). - - When it comes to interpreting the number, interpret it as - a hexadecimal number. - - Anything else - Follow the steps below, but using the range of characters - U+0030 DIGIT ZERO through to U+0039 DIGIT NINE (i.e. just - 0-9). - - When it comes to interpreting the number, interpret it as - a decimal number. - - Consume as many characters as match the range of characters - given above. - - If no characters match the range, then don't consume any - characters (and unconsume the U+0023 NUMBER SIGN character and, - if appropriate, the X character). This is a parse error; nothing - is returned. - - Otherwise, if the next character is a U+003B SEMICOLON, consume - that too. If it isn't, there is a parse error. - - If one or more characters match the range, then take them all - and interpret the string of characters as a number (either - hexadecimal or decimal as appropriate). - - If that number is one of the numbers in the first column of the - following table, then this is a parse error. Find the row with - that number in the first column, and return a character token - for the Unicode character given in the second column of that - row. - - Number Unicode character - 0x0D U+000A LINE FEED (LF) - 0x80 U+20AC EURO SIGN ('€') - 0x81 U+FFFD REPLACEMENT CHARACTER - 0x82 U+201A SINGLE LOW-9 QUOTATION MARK ('‚') - 0x83 U+0192 LATIN SMALL LETTER F WITH HOOK ('ƒ') - 0x84 U+201E DOUBLE LOW-9 QUOTATION MARK ('„') - 0x85 U+2026 HORIZONTAL ELLIPSIS ('…') - 0x86 U+2020 DAGGER ('†') - 0x87 U+2021 DOUBLE DAGGER ('‡') - 0x88 U+02C6 MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ') - 0x89 U+2030 PER MILLE SIGN ('‰') - 0x8A U+0160 LATIN CAPITAL LETTER S WITH CARON ('Š') - 0x8B U+2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹') - 0x8C U+0152 LATIN CAPITAL LIGATURE OE ('Œ') - 0x8D U+FFFD REPLACEMENT CHARACTER - 0x8E U+017D LATIN CAPITAL LETTER Z WITH CARON ('Ž') - 0x8F U+FFFD REPLACEMENT CHARACTER - 0x90 U+FFFD REPLACEMENT CHARACTER - 0x91 U+2018 LEFT SINGLE QUOTATION MARK ('‘') - 0x92 U+2019 RIGHT SINGLE QUOTATION MARK ('’') - 0x93 U+201C LEFT DOUBLE QUOTATION MARK ('“') - 0x94 U+201D RIGHT DOUBLE QUOTATION MARK ('”') - 0x95 U+2022 BULLET ('•') - 0x96 U+2013 EN DASH ('–') - 0x97 U+2014 EM DASH ('—') - 0x98 U+02DC SMALL TILDE ('˜') - 0x99 U+2122 TRADE MARK SIGN ('™') - 0x9A U+0161 LATIN SMALL LETTER S WITH CARON ('š') - 0x9B U+203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›') - 0x9C U+0153 LATIN SMALL LIGATURE OE ('œ') - 0x9D U+FFFD REPLACEMENT CHARACTER - 0x9E U+017E LATIN SMALL LETTER Z WITH CARON ('ž') - 0x9F U+0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ') - - Otherwise, if the number is in the range 0x0000 to 0x0008, - 0x000E to 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to - 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, - 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, - 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, - 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, or 0x10FFFF, or is higher than 0x10FFFF, then this is - a parse error; return a character token for the U+FFFD - REPLACEMENT CHARACTER character instead. - - Otherwise, return a character token for the Unicode character - whose code point is that number. - - Anything else - Consume the maximum number of characters possible, with the - consumed characters matching one of the identifiers in the first - column of the named character references table (in a - case-sensitive manner). - - If no match can be made, then this is a parse error. No - characters are consumed, and nothing is returned. - - If the last character matched is not a U+003B SEMICOLON (;), - there is a parse error. - - If the character reference is being consumed as part of an - attribute, and the last character matched is not a U+003B - SEMICOLON (;), and the next character is in the range U+0030 - DIGIT ZERO to U+0039 DIGIT NINE, U+0041 LATIN CAPITAL LETTER A - to U+005A LATIN CAPITAL LETTER Z, or U+0061 LATIN SMALL LETTER A - to U+007A LATIN SMALL LETTER Z, then, for historical reasons, - all the characters that were matched after the U+0026 AMPERSAND - (&) must be unconsumed, and nothing is returned. - - Otherwise, return a character token for the character - corresponding to the character reference name (as given by the - second column of the named character references table). - - If the markup contains I'm ¬it; I tell you, the character - reference is parsed as "not", as in, I'm ¬it; I tell you. But if - the markup was I'm ∉ I tell you, the character reference - would be parsed as "notin;", resulting in I'm ∉ I tell you. diff --git a/doc/tree-construction.txt b/doc/tree-construction.txt deleted file mode 100644 index 0febf147..00000000 --- a/doc/tree-construction.txt +++ /dev/null @@ -1,2201 +0,0 @@ - #8.2.4 Tokenization Table of contents 8.4 Serializing HTML fragments - - WHATWG - -HTML 5 - -Draft Recommendation — 13 January 2009 - - ← 8.2.4 Tokenization – Table of contents – 8.4 Serializing HTML - fragments → - - 8.2.5 Tree construction - - The input to the tree construction stage is a sequence of tokens from - the tokenization stage. The tree construction stage is associated with - a DOM Document object when a parser is created. The "output" of this - stage consists of dynamically modifying or extending that document's - DOM tree. - - This specification does not define when an interactive user agent has - to render the Document so that it is available to the user, or when it - has to begin accepting user input. - - As each token is emitted from the tokeniser, the user agent must - process the token according to the rules given in the section - corresponding to the current insertion mode. - - When the steps below require the UA to insert a character into a node, - if that node has a child immediately before where the character is to - be inserted, and that child is a Text node, and that Text node was the - last node that the parser inserted into the document, then the - character must be appended to that Text node; otherwise, a new Text - node whose data is just that character must be inserted in the - appropriate place. - - DOM mutation events must not fire for changes caused by the UA parsing - the document. (Conceptually, the parser is not mutating the DOM, it is - constructing it.) This includes the parsing of any content inserted - using document.write() and document.writeln() calls. [DOM3EVENTS] - - Not all of the tag names mentioned below are conformant tag names in - this specification; many are included to handle legacy content. They - still form part of the algorithm that implementations are required to - implement to claim conformance. - - The algorithm described below places no limit on the depth of the DOM - tree generated, or on the length of tag names, attribute names, - attribute values, text nodes, etc. While implementors are encouraged to - avoid arbitrary limits, it is recognized that practical concerns will - likely force user agents to impose nesting depths. - - 8.2.5.1 Creating and inserting elements - - When the steps below require the UA to create an element for a token in - a particular namespace, the UA must create a node implementing the - interface appropriate for the element type corresponding to the tag - name of the token in the given namespace (as given in the specification - that defines that element, e.g. for an a element in the HTML namespace, - this specification defines it to be the HTMLAnchorElement interface), - with the tag name being the name of that element, with the node being - in the given namespace, and with the attributes on the node being those - given in the given token. - - The interface appropriate for an element in the HTML namespace that is - not defined in this specification is HTMLElement. The interface - appropriate for an element in another namespace that is not defined by - that namespace's specification is Element. - - When a resettable element is created in this manner, its reset - algorithm must be invoked once the attributes are set. (This - initializes the element's value and checkedness based on the element's - attributes.) - __________________________________________________________________ - - When the steps below require the UA to insert an HTML element for a - token, the UA must first create an element for the token in the HTML - namespace, and then append this node to the current node, and push it - onto the stack of open elements so that it is the new current node. - - The steps below may also require that the UA insert an HTML element in - a particular place, in which case the UA must follow the same steps - except that it must insert or append the new node in the location - specified instead of appending it to the current node. (This happens in - particular during the parsing of tables with invalid content.) - - If an element created by the insert an HTML element algorithm is a - form-associated element, and the form element pointer is not null, and - the newly created element doesn't have a form attribute, the user agent - must associate the newly created element with the form element pointed - to by the form element pointer before inserting it wherever it is to be - inserted. - __________________________________________________________________ - - When the steps below require the UA to insert a foreign element for a - token, the UA must first create an element for the token in the given - namespace, and then append this node to the current node, and push it - onto the stack of open elements so that it is the new current node. If - the newly created element has an xmlns attribute in the XMLNS namespace - whose value is not exactly the same as the element's namespace, that is - a parse error. - - When the steps below require the user agent to adjust MathML attributes - for a token, then, if the token has an attribute named definitionurl, - change its name to definitionURL (note the case difference). - - When the steps below require the user agent to adjust foreign - attributes for a token, then, if any of the attributes on the token - match the strings given in the first column of the following table, let - the attribute be a namespaced attribute, with the prefix being the - string given in the corresponding cell in the second column, the local - name being the string given in the corresponding cell in the third - column, and the namespace being the namespace given in the - corresponding cell in the fourth column. (This fixes the use of - namespaced attributes, in particular xml:lang.) - - Attribute name Prefix Local name Namespace - xlink:actuate xlink actuate XLink namespace - xlink:arcrole xlink arcrole XLink namespace - xlink:href xlink href XLink namespace - xlink:role xlink role XLink namespace - xlink:show xlink show XLink namespace - xlink:title xlink title XLink namespace - xlink:type xlink type XLink namespace - xml:base xml base XML namespace - xml:lang xml lang XML namespace - xml:space xml space XML namespace - xmlns (none) xmlns XMLNS namespace - xmlns:xlink xmlns xlink XMLNS namespace - __________________________________________________________________ - - The generic CDATA element parsing algorithm and the generic RCDATA - element parsing algorithm consist of the following steps. These - algorithms are always invoked in response to a start tag token. - 1. Insert an HTML element for the token. - 2. If the algorithm that was invoked is the generic CDATA element - parsing algorithm, switch the tokeniser's content model flag to the - CDATA state; otherwise the algorithm invoked was the generic RCDATA - element parsing algorithm, switch the tokeniser's content model - flag to the RCDATA state. - 3. Let the original insertion mode be the current insertion mode. - 4. Then, switch the insertion mode to "in CDATA/RCDATA". - - 8.2.5.2 Closing elements that have implied end tags - - When the steps below require the UA to generate implied end tags, then, - while the current node is a dd element, a dt element, an li element, an - option element, an optgroup element, a p element, an rp element, or an - rt element, the UA must pop the current node off the stack of open - elements. - - If a step requires the UA to generate implied end tags but lists an - element to exclude from the process, then the UA must perform the above - steps as if that element was not in the above list. - - 8.2.5.3 Foster parenting - - Foster parenting happens when content is misnested in tables. - - When a node node is to be foster parented, the node node must be - inserted into the foster parent element, and the current table must be - marked as tainted. (Once the current table has been tainted, whitespace - characters are inserted into the foster parent element instead of the - current node.) - - The foster parent element is the parent element of the last table - element in the stack of open elements, if there is a table element and - it has such a parent element. If there is no table element in the stack - of open elements (fragment case), then the foster parent element is the - first element in the stack of open elements (the html element). - Otherwise, if there is a table element in the stack of open elements, - but the last table element in the stack of open elements has no parent, - or its parent node is not an element, then the foster parent element is - the element before the last table element in the stack of open - elements. - - If the foster parent element is the parent element of the last table - element in the stack of open elements, then node must be inserted - immediately before the last table element in the stack of open elements - in the foster parent element; otherwise, node must be appended to the - foster parent element. - - 8.2.5.4 The "initial" insertion mode - - When the insertion mode is "initial", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - If the DOCTYPE token's name is not a case-sensitive match for - the string "html", or if the token's public identifier is - neither missing nor a case-sensitive match for the string - "XSLT-compat", or if the token's system identifier is not - missing, then there is a parse error (this is the DOCTYPE parse - error). Conformance checkers may, instead of reporting this - error, switch to a conformance checking mode for another - language (e.g. based on the DOCTYPE token a conformance checker - could recognize that the document is an HTML4-era document, and - defer to an HTML4 conformance checker.) - - Append a DocumentType node to the Document node, with the name - attribute set to the name given in the DOCTYPE token; the - publicId attribute set to the public identifier given in the - DOCTYPE token, or the empty string if the public identifier was - missing; the systemId attribute set to the system identifier - given in the DOCTYPE token, or the empty string if the system - identifier was missing; and the other attributes specific to - DocumentType objects set to null and empty lists as appropriate. - Associate the DocumentType node with the Document object so that - it is returned as the value of the doctype attribute of the - Document object. - - Then, if the DOCTYPE token matches one of the conditions in the - following list, then set the document to quirks mode: - - + The force-quirks flag is set to on. - + The name is set to anything other than "HTML". - + The public identifier starts with: "+//Silmaril//dtd html Pro - v0r11 19970101//" - + The public identifier starts with: "-//AdvaSoft Ltd//DTD HTML - 3.0 asWedit + extensions//" - + The public identifier starts with: "-//AS//DTD HTML 3.0 - asWedit + extensions//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0 - Strict//" - + The public identifier starts with: "-//IETF//DTD HTML 2.0//" - + The public identifier starts with: "-//IETF//DTD HTML 2.1E//" - + The public identifier starts with: "-//IETF//DTD HTML 3.0//" - + The public identifier starts with: "-//IETF//DTD HTML 3.2 - Final//" - + The public identifier starts with: "-//IETF//DTD HTML 3.2//" - + The public identifier starts with: "-//IETF//DTD HTML 3//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 0//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 1//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 2//" - + The public identifier starts with: "-//IETF//DTD HTML Level - 3//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 0//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 1//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 2//" - + The public identifier starts with: "-//IETF//DTD HTML Strict - Level 3//" - + The public identifier starts with: "-//IETF//DTD HTML - Strict//" - + The public identifier starts with: "-//IETF//DTD HTML//" - + The public identifier starts with: "-//Metrius//DTD Metrius - Presentational//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 HTML Strict//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 HTML//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 2.0 Tables//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 HTML Strict//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 HTML//" - + The public identifier starts with: "-//Microsoft//DTD Internet - Explorer 3.0 Tables//" - + The public identifier starts with: "-//Netscape Comm. - Corp.//DTD HTML//" - + The public identifier starts with: "-//Netscape Comm. - Corp.//DTD Strict HTML//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML 2.0//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML Extended 1.0//" - + The public identifier starts with: "-//O'Reilly and - Associates//DTD HTML Extended Relaxed 1.0//" - + The public identifier starts with: "-//SoftQuad Software//DTD - HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//" - + The public identifier starts with: "-//SoftQuad//DTD HoTMetaL - PRO 4.0::19971010::extensions to HTML 4.0//" - + The public identifier starts with: "-//Spyglass//DTD HTML 2.0 - Extended//" - + The public identifier starts with: "-//SQ//DTD HTML 2.0 - HoTMetaL + extensions//" - + The public identifier starts with: "-//Sun Microsystems - Corp.//DTD HotJava HTML//" - + The public identifier starts with: "-//Sun Microsystems - Corp.//DTD HotJava Strict HTML//" - + The public identifier starts with: "-//W3C//DTD HTML 3 - 1995-03-24//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2 - Draft//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2 - Final//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2//" - + The public identifier starts with: "-//W3C//DTD HTML 3.2S - Draft//" - + The public identifier starts with: "-//W3C//DTD HTML 4.0 - Frameset//" - + The public identifier starts with: "-//W3C//DTD HTML 4.0 - Transitional//" - + The public identifier starts with: "-//W3C//DTD HTML - Experimental 19960712//" - + The public identifier starts with: "-//W3C//DTD HTML - Experimental 970421//" - + The public identifier starts with: "-//W3C//DTD W3 HTML//" - + The public identifier starts with: "-//W3O//DTD W3 HTML 3.0//" - + The public identifier is set to: "-//W3O//DTD W3 HTML Strict - 3.0//EN//" - + The public identifier starts with: "-//WebTechs//DTD Mozilla - HTML 2.0//" - + The public identifier starts with: "-//WebTechs//DTD Mozilla - HTML//" - + The public identifier is set to: "-/W3C/DTD HTML 4.0 - Transitional/EN" - + The public identifier is set to: "HTML" - + The system identifier is set to: - "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" - + The system identifier is missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Frameset//" - + The system identifier is missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Transitional//" - - Otherwise, if the DOCTYPE token matches one of the conditions in - the following list, then set the document to limited quirks - mode: - - + The public identifier starts with: "-//W3C//DTD XHTML 1.0 - Frameset//" - + The public identifier starts with: "-//W3C//DTD XHTML 1.0 - Transitional//" - + The system identifier is not missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Frameset//" - + The system identifier is not missing and the public identifier - starts with: "-//W3C//DTD HTML 4.01 Transitional//" - - The name, system identifier, and public identifier strings must - be compared to the values given in the lists above in an ASCII - case-insensitive manner. A system identifier whose value is the - empty string is not considered missing for the purposes of the - conditions above. - - Then, switch the insertion mode to "before html". - - Anything else - Parse error. - - Set the document to quirks mode. - - Switch the insertion mode to "before html", then reprocess the - current token. - - 8.2.5.5 The "before html" insertion mode - - When the insertion mode is "before html", tokens must be handled as - follows: - - A DOCTYPE token - Parse error. Ignore the token. - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A start tag whose tag name is "html" - Create an element for the token in the HTML namespace. Append it - to the Document object. Put this element in the stack of open - elements. - - If the token has an attribute "manifest", then resolve the value - of that attribute to an absolute URL, and if that is successful, - run the application cache selection algorithm with the resulting - absolute URL. Otherwise, if there is no such attribute or - resolving it fails, run the application cache selection - algorithm with no manifest. The algorithm must be passed the - Document object. - - Switch the insertion mode to "before head". - - Anything else - Create an HTMLElement node with the tag name html, in the HTML - namespace. Append it to the Document object. Put this element in - the stack of open elements. - - Run the application cache selection algorithm with no manifest, - passing it the Document object. - - Switch the insertion mode to "before head", then reprocess the - current token. - - Should probably make end tags be ignored, so that "" puts the comment before the root node (or should we?) - - The root element can end up being removed from the Document object, - e.g. by scripts; nothing in particular happens in such cases, content - continues being appended to the nodes as described in the next section. - - 8.2.5.6 The "before head" insertion mode - - When the insertion mode is "before head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Ignore the token. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "head" - Insert an HTML element for the token. - - Set the head element pointer to the newly created head element. - - Switch the insertion mode to "in head". - - An end tag whose tag name is one of: "head", "br" - Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. - - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. - - This will result in an empty head element being generated, with - the current token being reprocessed in the "after head" - insertion mode. - - 8.2.5.7 The "in head" insertion mode - - When the insertion mode is "in head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is one of: "base", "command", "eventsource", - "link" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "meta" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - If the element has a charset attribute, and its value is a - supported encoding, and the confidence is currently tentative, - then change the encoding to the encoding given by the value of - the charset attribute. - - Otherwise, if the element has a content attribute, and applying - the algorithm for extracting an encoding from a Content-Type to - its value returns a supported encoding encoding, and the - confidence is currently tentative, then change the encoding to - the encoding encoding. - - A start tag whose tag name is "title" - Follow the generic RCDATA element parsing algorithm. - - A start tag whose tag name is "noscript", if the scripting flag is - enabled - - A start tag whose tag name is one of: "noframes", "style" - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "noscript", if the scripting flag is - disabled - Insert an HTML element for the token. - - Switch the insertion mode to "in head noscript". - - A start tag whose tag name is "script" - - 1. Create an element for the token in the HTML namespace. - 2. Mark the element as being "parser-inserted". - This ensures that, if the script is external, any - document.write() calls in the script will execute in-line, - instead of blowing the document away, as would happen in most - other cases. It also prevents the script from executing until - the end tag is seen. - 3. If the parser was originally created for the HTML fragment - parsing algorithm, then mark the script element as "already - executed". (fragment case) - 4. Append the new element to the current node. - 5. Switch the tokeniser's content model flag to the CDATA state. - 6. Let the original insertion mode be the current insertion mode. - 7. Switch the insertion mode to "in CDATA/RCDATA". - - An end tag whose tag name is "head" - Pop the current node (which will be the head element) off the - stack of open elements. - - Switch the insertion mode to "after head". - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is "head" - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if an end tag token with the tag name "head" had been - seen, and reprocess the current token. - - In certain UAs, some elements don't trigger the "in body" mode - straight away, but instead get put into the head. Do we want to - copy that? - - 8.2.5.8 The "in head noscript" insertion mode - - When the insertion mode is "in head noscript", tokens must be handled - as follows: - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "noscript" - Pop the current node (which will be a noscript element) from the - stack of open elements; the new current node will be a head - element. - - Switch the insertion mode to "in head". - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A comment token - A start tag whose tag name is one of: "link", "meta", "noframes", - "style" - Process the token using the rules for the "in head" insertion - mode. - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is one of: "head", "noscript" - Any other end tag - Parse error. Ignore the token. - - Anything else - Parse error. Act as if an end tag with the tag name "noscript" - had been seen and reprocess the current token. - - 8.2.5.9 The "after head" insertion mode - - When the insertion mode is "after head", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "body" - Insert an HTML element for the token. - - Switch the insertion mode to "in body". - - A start tag whose tag name is "frameset" - Insert an HTML element for the token. - - Switch the insertion mode to "in frameset". - - A start tag token whose tag name is one of: "base", "link", "meta", - "noframes", "script", "style", "title" - Parse error. - - Push the node pointed to by the head element pointer onto the - stack of open elements. - - Process the token using the rules for the "in head" insertion - mode. - - Remove the node pointed to by the head element pointer from the - stack of open elements. - - An end tag whose tag name is "br" - Act as described in the "anything else" entry below. - - A start tag whose tag name is "head" - Any other end tag - Parse error. Ignore the token. - - Anything else - Act as if a start tag token with the tag name "body" and no - attributes had been seen, and then reprocess the current token. - - 8.2.5.10 The "in body" insertion mode - - When the insertion mode is "in body", tokens must be handled as - follows: - - A character token - Reconstruct the active formatting elements, if any. - - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Parse error. For each attribute on the token, check to see if - the attribute is already present on the top element of the stack - of open elements. If it is not, add the attribute and its - corresponding value to that element. - - A start tag token whose tag name is one of: "base", "command", - "eventsource", "link", "meta", "noframes", "script", "style", - "title" - Process the token using the rules for the "in head" insertion - mode. - - A start tag whose tag name is "body" - Parse error. - - If the second element on the stack of open elements is not a - body element, or, if the stack of open elements has only one - node on it, then ignore the token. (fragment case) - - Otherwise, for each attribute on the token, check to see if the - attribute is already present on the body element (the second - element) on the stack of open elements. If it is not, add the - attribute and its corresponding value to that element. - - An end-of-file token - If there is a node in the stack of open elements that is not - either a dd element, a dt element, an li element, a p element, a - tbody element, a td element, a tfoot element, a th element, a - thead element, a tr element, the body element, or the html - element, then this is a parse error. - - Stop parsing. - - An end tag whose tag name is "body" - If the stack of open elements does not have a body element in - scope, this is a parse error; ignore the token. - - Otherwise, if there is a node in the stack of open elements that - is not either a dd element, a dt element, an li element, a p - element, a tbody element, a td element, a tfoot element, a th - element, a thead element, a tr element, the body element, or the - html element, then this is a parse error. - - Switch the insertion mode to "after body". - - An end tag whose tag name is "html" - Act as if an end tag with tag name "body" had been seen, then, - if that token wasn't ignored, reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - A start tag whose tag name is one of: "address", "article", "aside", - "blockquote", "center", "datagrid", "details", "dialog", "dir", - "div", "dl", "fieldset", "figure", "footer", "header", "menu", - "nav", "ol", "p", "section", "ul" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", - "h6" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - If the current node is an element whose tag name is one of "h1", - "h2", "h3", "h4", "h5", or "h6", then this is a parse error; pop - the current node off the stack of open elements. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "pre", "listing" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - If the next token is a U+000A LINE FEED (LF) character token, - then ignore that token and move on to the next one. (Newlines at - the start of pre blocks are ignored as an authoring - convenience.) - - A start tag whose tag name is "form" - If the form element pointer is not null, then this is a parse - error; ignore the token. - - Otherwise: - - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token, and set the form element - pointer to point to the element created. - - A start tag whose tag name is "li" - Run the following algorithm: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node is an li element, then act as if an end tag with the - tag name "li" had been seen, then jump to the last step. - 3. If node is not in the formatting category, and is not in the - phrasing category, and is not an address, div, or p element, - then jump to the last step. - 4. Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. - 5. This is the last step. - If the stack of open elements has a p element in scope, then - act as if an end tag with the tag name "p" had been seen. - Finally, insert an HTML element for the token. - - A start tag whose tag name is one of: "dd", "dt" - Run the following algorithm: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node is a dd or dt element, then act as if an end tag with - the same tag name as node had been seen, then jump to the last - step. - 3. If node is not in the formatting category, and is not in the - phrasing category, and is not an address, div, or p element, - then jump to the last step. - 4. Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. - 5. This is the last step. - If the stack of open elements has a p element in scope, then - act as if an end tag with the tag name "p" had been seen. - Finally, insert an HTML element for the token. - - A start tag whose tag name is "plaintext" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - Switch the content model flag to the PLAINTEXT state. - - Once a start tag with the tag name "plaintext" has been seen, - that will be the last token ever seen other than character - tokens (and the end-of-file token), because there is no way to - switch the content model flag out of the PLAINTEXT state. - - An end tag whose tag name is one of: "address", "article", "aside", - "blockquote", "center", "datagrid", "details", "dialog", "dir", - "div", "dl", "fieldset", "figure", "footer", "header", - "listing", "menu", "nav", "ol", "pre", "section", "ul" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is "form" - Let node be the element that the form element pointer is set to. - - Set the form element pointer to null. - - If node is null or the stack of open elements does not have node - in scope, then this is a parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not node, then this is a parse error. - 3. Remove node from the stack of open elements. - - An end tag whose tag name is "p" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; act as if a start tag with the tag name p had been - seen, then reprocess the current token. - - Otherwise, run these steps: - - 1. Generate implied end tags, except for elements with the same - tag name as the token. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is one of: "dd", "dt", "li" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags, except for elements with the same - tag name as the token. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - - An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" - If the stack of open elements does not have an element in scope - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", - then this is a parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6" - has been popped from the stack. - - An end tag whose tag name is "sarcasm" - Take a deep breath, then act as described in the "any other end - tag" entry below. - - A start tag whose tag name is "a" - If the list of active formatting elements contains an element - whose tag name is "a" between the end of the list and the last - marker on the list (or the start of the list if there is no - marker on the list), then this is a parse error; act as if an - end tag with the tag name "a" had been seen, then remove that - element from the list of active formatting elements and the - stack of open elements if the end tag didn't already remove it - (it might not have if the element is not in table scope). - - In the non-conforming stream - ab
x, the first a element - would be closed upon seeing the second one, and the "x" - character would be inside a link to "b", not to "a". This is - despite the fact that the outer a element is not in table scope - (meaning that a regular end tag at the start of the table - wouldn't close the outer a element). - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - A start tag whose tag name is one of: "b", "big", "em", "font", "i", - "s", "small", "strike", "strong", "tt", "u" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - A start tag whose tag name is "nobr" - Reconstruct the active formatting elements, if any. - - If the stack of open elements has a nobr element in scope, then - this is a parse error; act as if an end tag with the tag name - "nobr" had been seen, then once again reconstruct the active - formatting elements, if any. - - Insert an HTML element for the token. Add that element to the - list of active formatting elements. - - An end tag whose tag name is one of: "a", "b", "big", "em", "font", - "i", "nobr", "s", "small", "strike", "strong", "tt", "u" - Follow these steps: - - 1. Let the formatting element be the last element in the list of - active formatting elements that: - o is between the end of the list and the last scope marker - in the list, if any, or the start of the list otherwise, - and - o has the same tag name as the token. - If there is no such node, or, if that node is also in the - stack of open elements but the element is not in scope, then - this is a parse error; ignore the token, and abort these - steps. - Otherwise, if there is such a node, but that node is not in - the stack of open elements, then this is a parse error; remove - the element from the list, and abort these steps. - Otherwise, there is a formatting element and that element is - in the stack and is in scope. If the element is not the - current node, this is a parse error. In any case, proceed with - the algorithm as written in the following steps. - 2. Let the furthest block be the topmost node in the stack of - open elements that is lower in the stack than the formatting - element, and is not an element in the phrasing or formatting - categories. There might not be one. - 3. If there is no furthest block, then the UA must skip the - subsequent steps and instead just pop all the nodes from the - bottom of the stack of open elements, from the current node up - to and including the formatting element, and remove the - formatting element from the list of active formatting - elements. - 4. Let the common ancestor be the element immediately above the - formatting element in the stack of open elements. - 5. If the furthest block has a parent node, then remove the - furthest block from its parent node. - 6. Let a bookmark note the position of the formatting element in - the list of active formatting elements relative to the - elements on either side of it in the list. - 7. Let node and last node be the furthest block. Follow these - steps: - 1. Let node be the element immediately above node in the - stack of open elements. - 2. If node is not in the list of active formatting elements, - then remove node from the stack of open elements and then - go back to step 1. - 3. Otherwise, if node is the formatting element, then go to - the next step in the overall algorithm. - 4. Otherwise, if last node is the furthest block, then move - the aforementioned bookmark to be immediately after the - node in the list of active formatting elements. - 5. If node has any children, perform a shallow clone of - node, replace the entry for node in the list of active - formatting elements with an entry for the clone, replace - the entry for node in the stack of open elements with an - entry for the clone, and let node be the clone. - 6. Insert last node into node, first removing it from its - previous parent node if any. - 7. Let last node be node. - 8. Return to step 1 of this inner set of steps. - 8. If the common ancestor node is a table, tbody, tfoot, thead, - or tr element, then, foster parent whatever last node ended up - being in the previous step. - Otherwise, append whatever last node ended up being in the - previous step to the common ancestor node, first removing it - from its previous parent node if any. - 9. Perform a shallow clone of the formatting element. - 10. Take all of the child nodes of the furthest block and append - them to the clone created in the last step. - 11. Append that clone to the furthest block. - 12. Remove the formatting element from the list of active - formatting elements, and insert the clone into the list of - active formatting elements at the position of the - aforementioned bookmark. - 13. Remove the formatting element from the stack of open elements, - and insert the clone into the stack of open elements - immediately below the position of the furthest block in that - stack. - 14. Jump back to step 1 in this series of steps. - - The way these steps are defined, only elements in the formatting - category ever get cloned by this algorithm. - - Because of the way this algorithm causes elements to change - parents, it has been dubbed the "adoption agency algorithm" (in - contrast with other possibly algorithms for dealing with - misnested content, which included the "incest algorithm", the - "secret affair algorithm", and the "Heisenberg algorithm"). - - A start tag whose tag name is "button" - If the stack of open elements has a button element in scope, - then this is a parse error; act as if an end tag with the tag - name "button" had been seen, then reprocess the token. - - Otherwise: - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - Insert a marker at the end of the list of active formatting - elements. - - A start tag token whose tag name is one of: "applet", "marquee", - "object" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - Insert a marker at the end of the list of active formatting - elements. - - An end tag token whose tag name is one of: "applet", "button", - "marquee", "object" - If the stack of open elements does not have an element in scope - with the same tag name as that of the token, then this is a - parse error; ignore the token. - - Otherwise, run these steps: - - 1. Generate implied end tags. - 2. If the current node is not an element with the same tag name - as that of the token, then this is a parse error. - 3. Pop elements from the stack of open elements until an element - with the same tag name as the token has been popped from the - stack. - 4. Clear the list of active formatting elements up to the last - marker. - - A start tag whose tag name is "xmp" - Reconstruct the active formatting elements, if any. - - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "table" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. - - Switch the insertion mode to "in table". - - A start tag whose tag name is one of: "area", "basefont", "bgsound", - "br", "embed", "img", "input", "spacer", "wbr" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is one of: "param", "source" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "hr" - If the stack of open elements has a p element in scope, then act - as if an end tag with the tag name "p" had been seen. - - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "image" - Parse error. Change the token's tag name to "img" and reprocess - it. (Don't ask.) - - A start tag whose tag name is "isindex" - Parse error. - - If the form element pointer is not null, then ignore the token. - - Otherwise: - - Acknowledge the token's self-closing flag, if it is set. - - Act as if a start tag token with the tag name "form" had been - seen. - - If the token has an attribute called "action", set the action - attribute on the resulting form element to the value of the - "action" attribute of the token. - - Act as if a start tag token with the tag name "hr" had been - seen. - - Act as if a start tag token with the tag name "p" had been seen. - - Act as if a start tag token with the tag name "label" had been - seen. - - Act as if a stream of character tokens had been seen (see below - for what they should say). - - Act as if a start tag token with the tag name "input" had been - seen, with all the attributes from the "isindex" token except - "name", "action", and "prompt". Set the name attribute of the - resulting input element to the value "isindex". - - Act as if a stream of character tokens had been seen (see below - for what they should say). - - Act as if an end tag token with the tag name "label" had been - seen. - - Act as if an end tag token with the tag name "p" had been seen. - - Act as if a start tag token with the tag name "hr" had been - seen. - - Act as if an end tag token with the tag name "form" had been - seen. - - If the token has an attribute with the name "prompt", then the - first stream of characters must be the same string as given in - that attribute, and the second stream of characters must be - empty. Otherwise, the two streams of character tokens together - should, together with the input element, express the equivalent - of "This is a searchable index. Insert your search keywords - here: (input field)" in the user's preferred language. - - A start tag whose tag name is "textarea" - - 1. Insert an HTML element for the token. - 2. If the next token is a U+000A LINE FEED (LF) character token, - then ignore that token and move on to the next one. (Newlines - at the start of textarea elements are ignored as an authoring - convenience.) - 3. Switch the tokeniser's content model flag to the RCDATA state. - 4. Let the original insertion mode be the current insertion mode. - 5. Switch the insertion mode to "in CDATA/RCDATA". - - A start tag whose tag name is one of: "iframe", "noembed" - A start tag whose tag name is "noscript", if the scripting flag is - enabled - Follow the generic CDATA element parsing algorithm. - - A start tag whose tag name is "select" - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - If the insertion mode is one of in table", "in caption", "in - column group", "in table body", "in row", or "in cell", then - switch the insertion mode to "in select in table". Otherwise, - switch the insertion mode to "in select". - - A start tag whose tag name is one of: "optgroup", "option" - If the stack of open elements has an option element in scope, - then act as if an end tag with the tag name "option" had been - seen. - - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - A start tag whose tag name is one of: "rp", "rt" - If the stack of open elements has a ruby element in scope, then - generate implied end tags. If the current node is not then a - ruby element, this is a parse error; pop all the nodes from the - current node up to the node immediately before the bottommost - ruby element on the stack of open elements. - - Insert an HTML element for the token. - - An end tag whose tag name is "br" - Parse error. Act as if a start tag token with the tag name "br" - had been seen. Ignore the end tag token. - - A start tag whose tag name is "math" - Reconstruct the active formatting elements, if any. - - Adjust MathML attributes for the token. (This fixes the case of - MathML attributes that are not all lowercase.) - - Adjust foreign attributes for the token. (This fixes the use of - namespaced attributes, in particular XLink.) - - Insert a foreign element for the token, in the MathML namespace. - - If the token has its self-closing flag set, pop the current node - off the stack of open elements and acknowledge the token's - self-closing flag. - - Otherwise, let the secondary insertion mode be the current - insertion mode, and then switch the insertion mode to "in - foreign content". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "frame", "frameset", "head", "tbody", "td", "tfoot", "th", - "thead", "tr" - Parse error. Ignore the token. - - Any other start tag - Reconstruct the active formatting elements, if any. - - Insert an HTML element for the token. - - This element will be a phrasing element. - - Any other end tag - Run the following steps: - - 1. Initialize node to be the current node (the bottommost node of - the stack). - 2. If node has the same tag name as the end tag token, then: - 1. Generate implied end tags. - 2. If the tag name of the end tag token does not match the - tag name of the current node, this is a parse error. - 3. Pop all the nodes from the current node up to node, - including node, then stop these steps. - 3. Otherwise, if node is in neither the formatting category nor - the phrasing category, then this is a parse error; ignore the - token, and abort these steps. - 4. Set node to the previous entry in the stack of open elements. - 5. Return to step 2. - - 8.2.5.11 The "in CDATA/RCDATA" insertion mode - - When the insertion mode is "in CDATA/RCDATA", tokens must be handled as - follows: - - A character token - Insert the token's character into the current node. - - An end-of-file token - Parse error. - - If the current node is a script element, mark the script element - as "already executed". - - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode and - reprocess the current token. - - An end tag whose tag name is "script" - Let script be the current node (which will be a script element). - - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode. - - Let the old insertion point have the same value as the current - insertion point. Let the insertion point be just before the next - input character. - - Increment the parser's script nesting level by one. - - Run the script. This might cause some script to execute, which - might cause new characters to be inserted into the tokeniser, - and might cause the tokeniser to output more tokens, resulting - in a reentrant invocation of the parser. - - Decrement the parser's script nesting level by one. If the - parser's script nesting level is zero, then set the parser pause - flag to false. - - Let the insertion point have the value of the old insertion - point. (In other words, restore the insertion point to the value - it had before the previous paragraph. This value might be the - "undefined" value.) - - At this stage, if there is a pending external script, then: - - If the tree construction stage is being called reentrantly, say - from a call to document.write(): - Set the parser pause flag to true, and abort the - processing of any nested invocations of the tokeniser, - yielding control back to the caller. (Tokenization will - resume when the caller returns to the "outer" tree - construction stage.) - - Otherwise: - Follow these steps: - - 1. Let the script be the pending external script. There is - no longer a pending external script. - 2. Pause until the script has completed loading. - 3. Let the insertion point be just before the next input - character. - 4. Execute the script. - 5. Let the insertion point be undefined again. - 6. If there is once again a pending external script, then - repeat these steps from step 1. - - Any other end tag - Pop the current node off the stack of open elements. - - Switch the insertion mode to the original insertion mode. - - 8.2.5.12 The "in table" insertion mode - - When the insertion mode is "in table", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - If the current table is tainted, then act as described in the - "anything else" entry below. - - Otherwise, insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "caption" - Clear the stack back to a table context. (See below.) - - Insert a marker at the end of the list of active formatting - elements. - - Insert an HTML element for the token, then switch the insertion - mode to "in caption". - - A start tag whose tag name is "colgroup" - Clear the stack back to a table context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in column group". - - A start tag whose tag name is "col" - Act as if a start tag token with the tag name "colgroup" had - been seen, then reprocess the current token. - - A start tag whose tag name is one of: "tbody", "tfoot", "thead" - Clear the stack back to a table context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in table body". - - A start tag whose tag name is one of: "td", "th", "tr" - Act as if a start tag token with the tag name "tbody" had been - seen, then reprocess the current token. - - A start tag whose tag name is "table" - Parse error. Act as if an end tag token with the tag name - "table" had been seen, then, if that token wasn't ignored, - reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is "table" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Pop elements from this stack until a table element has been - popped from the stack. - - Reset the insertion mode appropriately. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" - Parse error. Ignore the token. - - A start tag whose tag name is one of: "style", "script" - If the current table is tainted then act as described in the - "anything else" entry below. - - Otherwise, process the token using the rules for the "in head" - insertion mode. - - A start tag whose tag name is "input" - If the token does not have an attribute with the name "type", or - if it does, but that attribute's value is not an ASCII - case-insensitive match for the string "hidden", or, if the - current table is tainted, then: act as described in the - "anything else" entry below. - - Otherwise: - - Parse error. - - Insert an HTML element for the token. - - Pop that input element off the stack of open elements. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Process the token using the rules for the "in body" - insertion mode, except that if the current node is a table, - tbody, tfoot, thead, or tr element, then, whenever a node would - be inserted into the current node, it must instead be foster - parented. - - When the steps above require the UA to clear the stack back to a table - context, it means that the UA must, while the current node is not a - table element or an html element, pop elements from the stack of open - elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.13 The "in caption" insertion mode - - When the insertion mode is "in caption", tokens must be handled as - follows: - - An end tag whose tag name is "caption" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Generate implied end tags. - - Now, if the current node is not a caption element, then this is - a parse error. - - Pop elements from this stack until a caption element has been - popped from the stack. - - Clear the list of active formatting elements up to the last - marker. - - Switch the insertion mode to "in table". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" - - An end tag whose tag name is "table" - Parse error. Act as if an end tag with the tag name "caption" - had been seen, then, if that token wasn't ignored, reprocess the - current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is one of: "body", "col", "colgroup", "html", - "tbody", "td", "tfoot", "th", "thead", "tr" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in body" insertion - mode. - - 8.2.5.14 The "in column group" insertion mode - - When the insertion mode is "in column group", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "col" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - An end tag whose tag name is "colgroup" - If the current node is the root html element, then this is a - parse error; ignore the token. (fragment case) - - Otherwise, pop the current node (which will be a colgroup - element) from the stack of open elements. Switch the insertion - mode to "in table". - - An end tag whose tag name is "col" - Parse error. Ignore the token. - - An end-of-file token - If the current node is the root html element, then stop parsing. - (fragment case) - - Otherwise, act as described in the "anything else" entry below. - - Anything else - Act as if an end tag with the tag name "colgroup" had been seen, - and then, if that token wasn't ignored, reprocess the current - token. - - The fake end tag token here can only be ignored in the fragment - case. - - 8.2.5.15 The "in table body" insertion mode - - When the insertion mode is "in table body", tokens must be handled as - follows: - - A start tag whose tag name is "tr" - Clear the stack back to a table body context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in row". - - A start tag whose tag name is one of: "th", "td" - Parse error. Act as if a start tag with the tag name "tr" had - been seen, then reprocess the current token. - - An end tag whose tag name is one of: "tbody", "tfoot", "thead" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. - - Otherwise: - - Clear the stack back to a table body context. (See below.) - - Pop the current node from the stack of open elements. Switch the - insertion mode to "in table". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead" - - An end tag whose tag name is "table" - If the stack of open elements does not have a tbody, thead, or - tfoot element in table scope, this is a parse error. Ignore the - token. (fragment case) - - Otherwise: - - Clear the stack back to a table body context. (See below.) - - Act as if an end tag with the same tag name as the current node - ("tbody", "tfoot", or "thead") had been seen, then reprocess the - current token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th", "tr" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in table" insertion - mode. - - When the steps above require the UA to clear the stack back to a table - body context, it means that the UA must, while the current node is not - a tbody, tfoot, thead, or html element, pop elements from the stack of - open elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.16 The "in row" insertion mode - - When the insertion mode is "in row", tokens must be handled as follows: - - A start tag whose tag name is one of: "th", "td" - Clear the stack back to a table row context. (See below.) - - Insert an HTML element for the token, then switch the insertion - mode to "in cell". - - Insert a marker at the end of the list of active formatting - elements. - - An end tag whose tag name is "tr" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Clear the stack back to a table row context. (See below.) - - Pop the current node (which will be a tr element) from the stack - of open elements. Switch the insertion mode to "in table body". - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", "tr" - - An end tag whose tag name is "table" - Act as if an end tag with the tag name "tr" had been seen, then, - if that token wasn't ignored, reprocess the current token. - - The fake end tag token here can only be ignored in the fragment - case. - - An end tag whose tag name is one of: "tbody", "tfoot", "thead" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. - - Otherwise, act as if an end tag with the tag name "tr" had been - seen, then reprocess the current token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th" - Parse error. Ignore the token. - - Anything else - Process the token using the rules for the "in table" insertion - mode. - - When the steps above require the UA to clear the stack back to a table - row context, it means that the UA must, while the current node is not a - tr element or an html element, pop elements from the stack of open - elements. - - The current node being an html element after this process is a fragment - case. - - 8.2.5.17 The "in cell" insertion mode - - When the insertion mode is "in cell", tokens must be handled as - follows: - - An end tag whose tag name is one of: "td", "th" - If the stack of open elements does not have an element in table - scope with the same tag name as that of the token, then this is - a parse error and the token must be ignored. - - Otherwise: - - Generate implied end tags. - - Now, if the current node is not an element with the same tag - name as the token, then this is a parse error. - - Pop elements from this stack until an element with the same tag - name as the token has been popped from the stack. - - Clear the list of active formatting elements up to the last - marker. - - Switch the insertion mode to "in row". (The current node will be - a tr element at this point.) - - A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" - If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (fragment case) - - Otherwise, close the cell (see below) and reprocess the current - token. - - An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html" - Parse error. Ignore the token. - - An end tag whose tag name is one of: "table", "tbody", "tfoot", - "thead", "tr" - If the stack of open elements does not have an element in table - scope with the same tag name as that of the token (which can - only happen for "tbody", "tfoot" and "thead", or, in the - fragment case), then this is a parse error and the token must be - ignored. - - Otherwise, close the cell (see below) and reprocess the current - token. - - Anything else - Process the token using the rules for the "in body" insertion - mode. - - Where the steps above say to close the cell, they mean to run the - following algorithm: - 1. If the stack of open elements has a td element in table scope, then - act as if an end tag token with the tag name "td" had been seen. - 2. Otherwise, the stack of open elements will have a th element in - table scope; act as if an end tag token with the tag name "th" had - been seen. - - The stack of open elements cannot have both a td and a th element in - table scope at the same time, nor can it have neither when the - insertion mode is "in cell". - - 8.2.5.18 The "in select" insertion mode - - When the insertion mode is "in select", tokens must be handled as - follows: - - A character token - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "option" - If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. - - Insert an HTML element for the token. - - A start tag whose tag name is "optgroup" - If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. - - If the current node is an optgroup element, act as if an end tag - with the tag name "optgroup" had been seen. - - Insert an HTML element for the token. - - An end tag whose tag name is "optgroup" - First, if the current node is an option element, and the node - immediately before it in the stack of open elements is an - optgroup element, then act as if an end tag with the tag name - "option" had been seen. - - If the current node is an optgroup element, then pop that node - from the stack of open elements. Otherwise, this is a parse - error; ignore the token. - - An end tag whose tag name is "option" - If the current node is an option element, then pop that node - from the stack of open elements. Otherwise, this is a parse - error; ignore the token. - - An end tag whose tag name is "select" - If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse - error. Ignore the token. (fragment case) - - Otherwise: - - Pop elements from the stack of open elements until a select - element has been popped from the stack. - - Reset the insertion mode appropriately. - - A start tag whose tag name is "select" - Parse error. Act as if the token had been an end tag with the - tag name "select" instead. - - A start tag whose tag name is one of: "input", "textarea" - Parse error. Act as if an end tag with the tag name "select" had - been seen, and reprocess the token. - - A start tag token whose tag name is "script" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Ignore the token. - - 8.2.5.19 The "in select in table" insertion mode - - When the insertion mode is "in select in table", tokens must be handled - as follows: - - A start tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" - Parse error. Act as if an end tag with the tag name "select" had - been seen, and reprocess the token. - - An end tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" - Parse error. - - If the stack of open elements has an element in table scope with - the same tag name as that of the token, then act as if an end - tag with the tag name "select" had been seen, and reprocess the - token. Otherwise, ignore the token. - - Anything else - Process the token using the rules for the "in select" insertion - mode. - - 8.2.5.20 The "in foreign content" insertion mode - - When the insertion mode is "in foreign content", tokens must be handled - as follows: - - A character token - Insert the token's character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mi element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mo element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mn element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an ms element in the MathML namespace. - - A start tag whose tag name is neither "mglyph" nor "malignmark", if the - current node is an mtext element in the MathML namespace. - - A start tag, if the current node is an element in the HTML namespace. - An end tag - Process the token using the rules for the secondary insertion - mode. - - If, after doing so, the insertion mode is still "in foreign - content", but there is no element in scope that has a namespace - other than the HTML namespace, switch the insertion mode to the - secondary insertion mode. - - A start tag whose tag name is one of: "b", "big", "blockquote", "body", - "br", "center", "code", "dd", "div", "dl", "dt", "em", "embed", - "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img", - "li", "listing", "menu", "meta", "nobr", "ol", "p", "pre", - "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", - "table", "tt", "u", "ul", "var" - - A start tag whose tag name is "font", if the token has any attributes - named "color", "face", or "size" - - An end-of-file token - Parse error. - - Pop elements from the stack of open elements until the current - node is in the HTML namespace. - - Switch the insertion mode to the secondary insertion mode, and - reprocess the token. - - Any other start tag - If the current node is an element in the MathML namespace, - adjust MathML attributes for the token. (This fixes the case of - MathML attributes that are not all lowercase.) - - Adjust foreign attributes for the token. (This fixes the use of - namespaced attributes, in particular XLink in SVG.) - - Insert a foreign element for the token, in the same namespace as - the current node. - - If the token has its self-closing flag set, pop the current node - off the stack of open elements and acknowledge the token's - self-closing flag. - - 8.2.5.21 The "after body" insertion mode - - When the insertion mode is "after body", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Process the token using the rules for the "in body" insertion - mode. - - A comment token - Append a Comment node to the first element in the stack of open - elements (the html element), with the data attribute set to the - data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "html" - If the parser was originally created as part of the HTML - fragment parsing algorithm, this is a parse error; ignore the - token. (fragment case) - - Otherwise, switch the insertion mode to "after after body". - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Switch the insertion mode to "in body" and - reprocess the token. - - 8.2.5.22 The "in frameset" insertion mode - - When the insertion mode is "in frameset", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - A start tag whose tag name is "frameset" - Insert an HTML element for the token. - - An end tag whose tag name is "frameset" - If the current node is the root html element, then this is a - parse error; ignore the token. (fragment case) - - Otherwise, pop the current node from the stack of open elements. - - If the parser was not originally created as part of the HTML - fragment parsing algorithm (fragment case), and the current node - is no longer a frameset element, then switch the insertion mode - to "after frameset". - - A start tag whose tag name is "frame" - Insert an HTML element for the token. Immediately pop the - current node off the stack of open elements. - - Acknowledge the token's self-closing flag, if it is set. - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - If the current node is not the root html element, then this is a - parse error. - - It can only be the current node in the fragment case. - - Stop parsing. - - Anything else - Parse error. Ignore the token. - - 8.2.5.23 The "after frameset" insertion mode - - When the insertion mode is "after frameset", tokens must be handled as - follows: - - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - Insert the character into the current node. - - A comment token - Append a Comment node to the current node with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - Parse error. Ignore the token. - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end tag whose tag name is "html" - Switch the insertion mode to "after after frameset". - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Ignore the token. - - This doesn't handle UAs that don't support frames, or that do support - frames but want to show the NOFRAMES content. Supporting the former is - easy; supporting the latter is harder. - - 8.2.5.24 The "after after body" insertion mode - - When the insertion mode is "after after body", tokens must be handled - as follows: - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end-of-file token - Stop parsing. - - Anything else - Parse error. Switch the insertion mode to "in body" and - reprocess the token. - - 8.2.5.25 The "after after frameset" insertion mode - - When the insertion mode is "after after frameset", tokens must be - handled as follows: - - A comment token - Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. - - A DOCTYPE token - A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE - - A start tag whose tag name is "html" - Process the token using the rules for the "in body" insertion - mode. - - An end-of-file token - Stop parsing. - - A start tag whose tag name is "noframes" - Process the token using the rules for the "in head" insertion - mode. - - Anything else - Parse error. Ignore the token. - - 8.2.6 The end - - Once the user agent stops parsing the document, the user agent must - follow the steps in this section. - - First, the current document readiness must be set to "interactive". - - Then, the rules for when a script completes loading start applying - (script execution is no longer managed by the parser). - - If any of the scripts in the list of scripts that will execute as soon - as possible have completed loading, or if the list of scripts that will - execute asynchronously is not empty and the first script in that list - has completed loading, then the user agent must act as if those scripts - just completed loading, following the rules given for that in the - script element definition. - - Then, if the list of scripts that will execute when the document has - finished parsing is not empty, and the first item in this list has - already completed loading, then the user agent must act as if that - script just finished loading. - - By this point, there will be no scripts that have loaded but have not - yet been executed. - - The user agent must then fire a simple event called DOMContentLoaded at - the Document. - - Once everything that delays the load event has completed, the user - agent must set the current document readiness to "complete", and then - fire a load event at the body element. - - delaying the load event for things like image loads allows for intranet - port scans (even without javascript!). Should we really encode that - into the spec? - - 8.2.7 Coercing an HTML DOM into an infoset - - When an application uses an HTML parser in conjunction with an XML - pipeline, it is possible that the constructed DOM is not compatible - with the XML tool chain in certain subtle ways. For example, an XML - toolchain might not be able to represent attributes with the name - xmlns, since they conflict with the Namespaces in XML syntax. There is - also some data that the HTML parser generates that isn't included in - the DOM itself. This section specifies some rules for handling these - issues. - - If the XML API being used doesn't support DOCTYPEs, the tool may drop - DOCTYPEs altogether. - - If the XML API doesn't support attributes in no namespace that are - named "xmlns", attributes whose names start with "xmlns:", or - attributes in the XMLNS namespace, then the tool may drop such - attributes. - - The tool may annotate the output with any namespace declarations - required for proper operation. - - If the XML API being used restricts the allowable characters in the - local names of elements and attributes, then the tool may map all - element and attribute local names that the API wouldn't support to a - set of names that are allowed, by replacing any character that isn't - supported with the uppercase letter U and the five digits of the - character's Unicode codepoint when expressed in hexadecimal, using - digits 0-9 and capital letters A-F as the symbols, in increasing - numeric order. - - For example, the element name foo start tag will be closed - by a end tag, and never by a end tag, even if - the user agent is using the rules above to then generate an actual - element in the DOM with the name aU0003AU0003A for that start tag. - - 8.3 Namespaces - - The HTML namespace is: http://www.w3.org/1999/xhtml - - The MathML namespace is: http://www.w3.org/1998/Math/MathML - - The SVG namespace is: http://www.w3.org/2000/svg - - The XLink namespace is: http://www.w3.org/1999/xlink - - The XML namespace is: http://www.w3.org/XML/1998/namespace - - The XMLNS namespace is: http://www.w3.org/2000/xmlns/ diff --git a/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml b/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml deleted file mode 100644 index 1eab09c2..00000000 --- a/gwt-src/nu/validator/htmlparser/HtmlParser.gwt.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java b/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java deleted file mode 100644 index 2eaa6764..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/BrowserTreeBuilder.java +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2008-2017 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import java.util.LinkedList; - -import nu.validator.htmlparser.common.DocumentMode; -import nu.validator.htmlparser.impl.CoalescingTreeBuilder; -import nu.validator.htmlparser.impl.HtmlAttributes; - -import org.xml.sax.SAXException; - -import com.google.gwt.core.client.JavaScriptException; -import com.google.gwt.core.client.JavaScriptObject; - -class BrowserTreeBuilder extends CoalescingTreeBuilder { - - private JavaScriptObject document; - - private JavaScriptObject script; - - private JavaScriptObject placeholder; - - private boolean readyToRun; - - private final LinkedList scriptStack = new LinkedList(); - - private class ScriptHolder { - private final JavaScriptObject script; - - private final JavaScriptObject placeholder; - - /** - * @param script - * @param placeholder - */ - public ScriptHolder(JavaScriptObject script, - JavaScriptObject placeholder) { - this.script = script; - this.placeholder = placeholder; - } - - /** - * Returns the script. - * - * @return the script - */ - public JavaScriptObject getScript() { - return script; - } - - /** - * Returns the placeholder. - * - * @return the placeholder - */ - public JavaScriptObject getPlaceholder() { - return placeholder; - } - } - - protected BrowserTreeBuilder(JavaScriptObject document) { - super(); - this.document = document; - installExplorerCreateElementNS(document); - } - - private static native boolean installExplorerCreateElementNS( - JavaScriptObject doc) /*-{ - if (!doc.createElementNS) { - doc.createElementNS = function (uri, local) { - if ("http://www.w3.org/1999/xhtml" == uri) { - return doc.createElement(local); - } else if ("http://www.w3.org/1998/Math/MathML" == uri) { - if (!doc.mathplayerinitialized) { - var obj = document.createElement("object"); - obj.setAttribute("id", "mathplayer"); - obj.setAttribute("classid", "clsid:32F66A20-7614-11D4-BD11-00104BD3F987"); - document.getElementsByTagName("head")[0].appendChild(obj); - document.namespaces.add("m", "http://www.w3.org/1998/Math/MathML", "#mathplayer"); - doc.mathplayerinitialized = true; - } - return doc.createElement("m:" + local); - } else if ("http://www.w3.org/2000/svg" == uri) { - if (!doc.renesisinitialized) { - var obj = document.createElement("object"); - obj.setAttribute("id", "renesis"); - obj.setAttribute("classid", "clsid:AC159093-1683-4BA2-9DCF-0C350141D7F2"); - document.getElementsByTagName("head")[0].appendChild(obj); - document.namespaces.add("s", "http://www.w3.org/2000/svg", "#renesis"); - doc.renesisinitialized = true; - } - return doc.createElement("s:" + local); - } else { - // throw - } - } - } - }-*/; - - private static native boolean hasAttributeNS(JavaScriptObject element, - String uri, String localName) /*-{ - return element.hasAttributeNS(uri, localName); - }-*/; - - private static native void setAttributeNS(JavaScriptObject element, - String uri, String localName, String value) /*-{ - element.setAttributeNS(uri, localName, value); - }-*/; - - @Override protected void addAttributesToElement(JavaScriptObject element, - HtmlAttributes attributes) throws SAXException { - try { - for (int i = 0; i < attributes.getLength(); i++) { - String localName = attributes.getLocalNameNoBoundsCheck(i); - String uri = attributes.getURINoBoundsCheck(i); - if (!hasAttributeNS(element, uri, localName)) { - setAttributeNS(element, uri, localName, - attributes.getValueNoBoundsCheck(i)); - } - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native void appendChild(JavaScriptObject parent, - JavaScriptObject child) /*-{ - parent.appendChild(child); - }-*/; - - private static native JavaScriptObject createTextNode(JavaScriptObject doc, - String text) /*-{ - return doc.createTextNode(text); - }-*/; - - private static native JavaScriptObject getLastChild(JavaScriptObject node) /*-{ - return node.lastChild; - }-*/; - - private static native void extendTextNode(JavaScriptObject node, String text) /*-{ - node.data += text; - }-*/; - - @Override protected void appendCharacters(JavaScriptObject parent, - String text) throws SAXException { - try { - if (parent == placeholder) { - appendChild(script, createTextNode(document, text)); - - } - JavaScriptObject lastChild = getLastChild(parent); - if (lastChild != null && getNodeType(lastChild) == 3) { - extendTextNode(lastChild, text); - return; - } - appendChild(parent, createTextNode(document, text)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native boolean hasChildNodes(JavaScriptObject element) /*-{ - return element.hasChildNodes(); - }-*/; - - private static native JavaScriptObject getFirstChild( - JavaScriptObject element) /*-{ - return element.firstChild; - }-*/; - - @Override protected void appendChildrenToNewParent( - JavaScriptObject oldParent, JavaScriptObject newParent) - throws SAXException { - try { - while (hasChildNodes(oldParent)) { - appendChild(newParent, getFirstChild(oldParent)); - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native JavaScriptObject createComment(JavaScriptObject doc, - String text) /*-{ - return doc.createComment(text); - }-*/; - - @Override protected void appendComment(JavaScriptObject parent, - String comment) throws SAXException { - try { - if (parent == placeholder) { - appendChild(script, createComment(document, comment)); - } - appendChild(parent, createComment(document, comment)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected void appendCommentToDocument(String comment) - throws SAXException { - try { - appendChild(document, createComment(document, comment)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native JavaScriptObject createElementNS( - JavaScriptObject doc, String ns, String local) /*-{ - return doc.createElementNS(ns, local); - }-*/; - - @Override protected JavaScriptObject createElement(String ns, String name, - HtmlAttributes attributes) throws SAXException { - try { - JavaScriptObject rv = createElementNS(document, ns, name); - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - - if ("script" == name) { - if (placeholder != null) { - scriptStack.addLast(new ScriptHolder(script, placeholder)); - } - script = rv; - placeholder = createElementNS(document, - "http://n.validator.nu/placeholder/", "script"); - rv = placeholder; - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - } - - return rv; - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - @Override protected JavaScriptObject createHtmlElementSetAsRoot( - HtmlAttributes attributes) throws SAXException { - try { - JavaScriptObject rv = createElementNS(document, - "http://www.w3.org/1999/xhtml", "html"); - for (int i = 0; i < attributes.getLength(); i++) { - setAttributeNS(rv, attributes.getURINoBoundsCheck(i), - attributes.getLocalNameNoBoundsCheck(i), - attributes.getValueNoBoundsCheck(i)); - } - appendChild(document, rv); - return rv; - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - private static native JavaScriptObject getParentNode( - JavaScriptObject element) /*-{ - return element.parentNode; - }-*/; - - @Override protected void appendElement(JavaScriptObject child, - JavaScriptObject newParent) throws SAXException { - try { - if (newParent == placeholder) { - appendChild(script, cloneNodeDeep(child)); - } - appendChild(newParent, child); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected boolean hasChildren(JavaScriptObject element) - throws SAXException { - try { - return hasChildNodes(element); - } catch (JavaScriptException e) { - fatal(e); - throw new RuntimeException("Unreachable"); - } - } - - private static native void insertBeforeNative(JavaScriptObject parent, - JavaScriptObject child, JavaScriptObject sibling) /*-{ - parent.insertBefore(child, sibling); - }-*/; - - private static native int getNodeType(JavaScriptObject node) /*-{ - return node.nodeType; - }-*/; - - private static native JavaScriptObject cloneNodeDeep(JavaScriptObject node) /*-{ - return node.cloneNode(true); - }-*/; - - /** - * Returns the document. - * - * @return the document - */ - JavaScriptObject getDocument() { - JavaScriptObject rv = document; - document = null; - return rv; - } - - private static native JavaScriptObject createDocumentFragment( - JavaScriptObject doc) /*-{ - return doc.createDocumentFragment(); - }-*/; - - JavaScriptObject getDocumentFragment() { - JavaScriptObject rv = createDocumentFragment(document); - JavaScriptObject rootElt = getFirstChild(document); - while (hasChildNodes(rootElt)) { - appendChild(rv, getFirstChild(rootElt)); - } - document = null; - return rv; - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#createJavaScriptObject(String, - * java.lang.String, org.xml.sax.Attributes, java.lang.Object) - */ - @Override protected JavaScriptObject createElement(String ns, String name, - HtmlAttributes attributes, JavaScriptObject form) - throws SAXException { - try { - JavaScriptObject rv = createElement(ns, name, attributes); - // rv.setUserData("nu.validator.form-pointer", form, null); - return rv; - } catch (JavaScriptException e) { - fatal(e); - return null; - } - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#start() - */ - @Override protected void start(boolean fragment) throws SAXException { - script = null; - placeholder = null; - readyToRun = false; - } - - protected void documentMode(DocumentMode mode, String publicIdentifier, - String systemIdentifier) - throws SAXException { - // document.setUserData("nu.validator.document-mode", mode, null); - } - - /** - * @see nu.validator.htmlparser.impl.TreeBuilder#elementPopped(java.lang.String, - * java.lang.String, java.lang.Object) - */ - @Override protected void elementPopped(String ns, String name, - JavaScriptObject node) throws SAXException { - if (node == placeholder) { - readyToRun = true; - requestSuspension(); - } - } - - private static native void replace(JavaScriptObject oldNode, - JavaScriptObject newNode) /*-{ - oldNode.parentNode.replaceChild(newNode, oldNode); - }-*/; - - private static native JavaScriptObject getPreviousSibling(JavaScriptObject node) /*-{ - return node.previousSibling; - }-*/; - - void maybeRunScript() { - if (readyToRun) { - readyToRun = false; - replace(placeholder, script); - if (scriptStack.isEmpty()) { - script = null; - placeholder = null; - } else { - ScriptHolder scriptHolder = scriptStack.removeLast(); - script = scriptHolder.getScript(); - placeholder = scriptHolder.getPlaceholder(); - } - } - } - - @Override protected void insertFosterParentedCharacters(String text, - JavaScriptObject table, JavaScriptObject stackParent) - throws SAXException { - try { - JavaScriptObject parent = getParentNode(table); - if (parent != null) { // always an element if not null - JavaScriptObject previousSibling = getPreviousSibling(table); - if (previousSibling != null - && getNodeType(previousSibling) == 3) { - extendTextNode(previousSibling, text); - return; - } - insertBeforeNative(parent, createTextNode(document, text), table); - return; - } - JavaScriptObject lastChild = getLastChild(stackParent); - if (lastChild != null && getNodeType(lastChild) == 3) { - extendTextNode(lastChild, text); - return; - } - appendChild(stackParent, createTextNode(document, text)); - } catch (JavaScriptException e) { - fatal(e); - } - } - - @Override protected void insertFosterParentedChild(JavaScriptObject child, - JavaScriptObject table, JavaScriptObject stackParent) - throws SAXException { - JavaScriptObject parent = getParentNode(table); - try { - if (parent != null && getNodeType(parent) == 1) { - insertBeforeNative(parent, child, table); - } else { - appendChild(stackParent, child); - } - } catch (JavaScriptException e) { - fatal(e); - } - } - - private static native void removeChild(JavaScriptObject parent, - JavaScriptObject child) /*-{ - parent.removeChild(child); - }-*/; - - @Override protected void detachFromParent(JavaScriptObject element) - throws SAXException { - try { - JavaScriptObject parent = getParentNode(element); - if (parent != null) { - removeChild(parent, element); - } - } catch (JavaScriptException e) { - fatal(e); - } - } -} diff --git a/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java b/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java deleted file mode 100644 index 1d71cdfd..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/HtmlParser.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007-2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import java.util.LinkedList; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.impl.ErrorReportingTokenizer; -import nu.validator.htmlparser.impl.Tokenizer; -import nu.validator.htmlparser.impl.UTF16Buffer; - -import org.xml.sax.ErrorHandler; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -import com.google.gwt.core.client.JavaScriptObject; -import com.google.gwt.user.client.Timer; - -/** - * This class implements an HTML5 parser that exposes data through the DOM - * interface. - * - *

By default, when using the constructor without arguments, the - * this parser treats XML 1.0-incompatible infosets as fatal errors. - * This corresponds to - * FATAL as the general XML violation policy. To make the parser - * support non-conforming HTML fully per the HTML 5 spec while on the other - * hand potentially violating the DOM API contract, set the general XML - * violation policy to ALLOW. This does not work with a standard - * DOM implementation. Handling all input without fatal errors and without - * violating the DOM API contract is possible by setting - * the general XML violation policy to ALTER_INFOSET. This - * makes the parser non-conforming but is probably the most useful - * setting for most applications. - * - *

The doctype is not represented in the tree. - * - *

The document mode is represented as user data DocumentMode - * object with the key nu.validator.document-mode on the document - * node. - * - *

The form pointer is also stored as user data with the key - * nu.validator.form-pointer. - * - * @version $Id: HtmlDocumentBuilder.java 255 2008-05-29 08:57:38Z hsivonen $ - * @author hsivonen - */ -public class HtmlParser { - - private static final int CHUNK_SIZE = 512; - - private final Tokenizer tokenizer; - - private final BrowserTreeBuilder domTreeBuilder; - - private final StringBuilder documentWriteBuffer = new StringBuilder(); - - private ErrorHandler errorHandler; - - private UTF16Buffer stream; - - private int streamLength; - - private boolean lastWasCR; - - private boolean ending; - - private ParseEndListener parseEndListener; - - private final LinkedList bufferStack = new LinkedList(); - - /** - * Instantiates the parser - * - * @param implementation - * the DOM implementation - * @param xmlPolicy the policy - */ - public HtmlParser(JavaScriptObject document) { - this.domTreeBuilder = new BrowserTreeBuilder(document); - this.tokenizer = new ErrorReportingTokenizer(domTreeBuilder); - this.domTreeBuilder.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setCommentPolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setContentNonXmlCharPolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setNamePolicy(XmlViolationPolicy.ALTER_INFOSET); - this.tokenizer.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); - } - - /** - * Parses a document from a SAX InputSource. - * @param is the source - * @return the doc - * @see javax.xml.parsers.DocumentBuilder#parse(org.xml.sax.InputSource) - */ - public void parse(String source, ParseEndListener callback) throws SAXException { - parseEndListener = callback; - domTreeBuilder.setFragmentContext(null); - tokenize(source, null); - } - - /** - * @param is - * @throws SAXException - * @throws IOException - * @throws MalformedURLException - */ - private void tokenize(String source, String context) throws SAXException { - lastWasCR = false; - ending = false; - documentWriteBuffer.setLength(0); - streamLength = source.length(); - stream = new UTF16Buffer(source.toCharArray(), 0, - (streamLength < CHUNK_SIZE ? streamLength : CHUNK_SIZE)); - bufferStack.clear(); - push(stream); - domTreeBuilder.setFragmentContext(context == null ? null : context.intern()); - tokenizer.start(); - pump(); - } - - private void pump() throws SAXException { - if (ending) { - tokenizer.end(); - domTreeBuilder.getDocument(); // drops the internal reference - parseEndListener.parseComplete(); - // Don't schedule timeout - return; - } - - int docWriteLen = documentWriteBuffer.length(); - if (docWriteLen > 0) { - char[] newBuf = new char[docWriteLen]; - documentWriteBuffer.getChars(0, docWriteLen, newBuf, 0); - push(new UTF16Buffer(newBuf, 0, docWriteLen)); - documentWriteBuffer.setLength(0); - } - - for (;;) { - UTF16Buffer buffer = peek(); - if (!buffer.hasMore()) { - if (buffer == stream) { - if (buffer.getEnd() == streamLength) { - // Stop parsing - tokenizer.eof(); - ending = true; - break; - } else { - int newEnd = buffer.getStart() + CHUNK_SIZE; - buffer.setEnd(newEnd < streamLength ? newEnd - : streamLength); - continue; - } - } else { - pop(); - continue; - } - } - // now we have a non-empty buffer - buffer.adjust(lastWasCR); - lastWasCR = false; - if (buffer.hasMore()) { - lastWasCR = tokenizer.tokenizeBuffer(buffer); - domTreeBuilder.maybeRunScript(); - break; - } else { - continue; - } - } - - // schedule - Timer timer = new Timer() { - - @Override public void run() { - try { - pump(); - } catch (SAXException e) { - ending = true; - if (errorHandler != null) { - try { - errorHandler.fatalError(new SAXParseException( - e.getMessage(), null, null, -1, -1, e)); - } catch (SAXException e1) { - } - } - } - } - - }; - timer.schedule(1); - } - - private void push(UTF16Buffer buffer) { - bufferStack.addLast(buffer); - } - - private UTF16Buffer peek() { - return bufferStack.getLast(); - } - - private void pop() { - bufferStack.removeLast(); - } - - public void documentWrite(String text) throws SAXException { - UTF16Buffer buffer = new UTF16Buffer(text.toCharArray(), 0, text.length()); - while (buffer.hasMore()) { - buffer.adjust(lastWasCR); - lastWasCR = false; - if (buffer.hasMore()) { - lastWasCR = tokenizer.tokenizeBuffer(buffer); - domTreeBuilder.maybeRunScript(); - } - } - } - - /** - * @see javax.xml.parsers.DocumentBuilder#setErrorHandler(org.xml.sax.ErrorHandler) - */ - public void setErrorHandler(ErrorHandler errorHandler) { - this.errorHandler = errorHandler; - domTreeBuilder.setErrorHandler(errorHandler); - tokenizer.setErrorHandler(errorHandler); - } - - /** - * Sets whether comment nodes appear in the tree. - * @param ignoreComments true to ignore comments - * @see nu.validator.htmlparser.impl.TreeBuilder#setIgnoringComments(boolean) - */ - public void setIgnoringComments(boolean ignoreComments) { - domTreeBuilder.setIgnoringComments(ignoreComments); - } - - /** - * Sets whether the parser considers scripting to be enabled for noscript treatment. - * @param scriptingEnabled true to enable - * @see nu.validator.htmlparser.impl.TreeBuilder#setScriptingEnabled(boolean) - */ - public void setScriptingEnabled(boolean scriptingEnabled) { - domTreeBuilder.setScriptingEnabled(scriptingEnabled); - } - -} diff --git a/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java b/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java deleted file mode 100644 index 255a02d1..00000000 --- a/gwt-src/nu/validator/htmlparser/gwt/HtmlParserModule.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import org.xml.sax.SAXException; - -import com.google.gwt.core.client.EntryPoint; -import com.google.gwt.core.client.JavaScriptObject; - -public class HtmlParserModule implements EntryPoint { - - private static native void zapChildren(JavaScriptObject node) /*-{ - while (node.hasChildNodes()) { - node.removeChild(node.lastChild); - } - }-*/; - - private static native void installDocWrite(JavaScriptObject doc, HtmlParser parser) /*-{ - doc.write = function() { - if (arguments.length == 0) { - return; - } - var text = arguments[0]; - for (var i = 1; i < arguments.length; i++) { - text += arguments[i]; - } - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); - } - doc.writeln = function() { - if (arguments.length == 0) { - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)("\n"); - return; - } - var text = arguments[0]; - for (var i = 1; i < arguments.length; i++) { - text += arguments[i]; - } - text += "\n"; - parser.@nu.validator.htmlparser.gwt.HtmlParser::documentWrite(Ljava/lang/String;)(text); - } - }-*/; - - @SuppressWarnings("unused") - private static void parseHtmlDocument(String source, JavaScriptObject document, JavaScriptObject readyCallback, JavaScriptObject errorHandler) throws SAXException { - if (readyCallback == null) { - readyCallback = JavaScriptObject.createFunction(); - } - zapChildren(document); - HtmlParser parser = new HtmlParser(document); - parser.setScriptingEnabled(true); - // XXX error handler - - installDocWrite(document, parser); - - parser.parse(source, new ParseEndListener(readyCallback)); - } - - private static native void exportEntryPoints() /*-{ - $wnd.parseHtmlDocument = @nu.validator.htmlparser.gwt.HtmlParserModule::parseHtmlDocument(Ljava/lang/String;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;Lcom/google/gwt/core/client/JavaScriptObject;); - }-*/; - - - public void onModuleLoad() { - exportEntryPoints(); - } - -} diff --git a/gwt-src/nu/validator/htmlparser/public/HtmlParser.html b/gwt-src/nu/validator/htmlparser/public/HtmlParser.html deleted file mode 100644 index 4d9cde81..00000000 --- a/gwt-src/nu/validator/htmlparser/public/HtmlParser.html +++ /dev/null @@ -1,225 +0,0 @@ - - - - Live DOM Viewer - - - - - - -

Live DOM Viewer

-

Markup to test (, upload, download, hide):

-

-

DOM view (hide, refresh):

-
    -

    Rendered view: (hide):

    -

    -

    innerHTML view: (show, refresh):

    - -

    Log: (hide):

    -
    Script not loaded.
    - -

    This script puts a function w(s) into the - global scope of the test page, where s is a string to - output to the log. Also, five files are accessible in the current - directory for test purposes: image (a GIF image), - flash (a Flash file), script (a JS file), - style (a CSS file), and document (an HTML - file).

    - - \ No newline at end of file diff --git a/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt b/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt deleted file mode 100644 index bd2f4fcf..00000000 --- a/gwt-src/nu/validator/htmlparser/public/LICENSE.Live-DOM-viewer.txt +++ /dev/null @@ -1,25 +0,0 @@ -From: -http://software.hixie.ch/utilities/js/live-dom-viewer/LICENSE -regarding the upstream of HtmlParser.html: - -The MIT License - -Copyright (c) 2000, 2006, 2008 Ian Hickson and various contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/gwt-src/nu/validator/htmlparser/public/blank.html b/gwt-src/nu/validator/htmlparser/public/blank.html deleted file mode 100644 index a8756c9f..00000000 --- a/gwt-src/nu/validator/htmlparser/public/blank.html +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/htmlparser/pom.xml b/htmlparser/pom.xml new file mode 100644 index 00000000..3922ae75 --- /dev/null +++ b/htmlparser/pom.xml @@ -0,0 +1,134 @@ + + + 4.0.0 + + + nu.validator.htmlparser + parent + 2.0 + + + htmlparser + + htmlparser + + + + nu.validator.htmlparser + saxtree + + + com.sdicons.jsontools + jsontools-core + 1.7 + test + + + + + + + maven-resources-plugin + + + backup-tokenizer + process-sources + + copy-resources + + + true + ${project.build.directory}/hotspot-backup + + + src/main/java/nu/validator/htmlparser/impl + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    + + restore-tokenizer + process-classes + + copy-resources + + + true + src/main/java/nu/validator/htmlparser/impl + + + ${project.build.directory}/hotspot-backup + +
  • Tokenizer.java
  • +
    +
    +
    +
    +
    +
    +
    + + org.codehaus.mojo + exec-maven-plugin + + + tokenizer-hotspot-workaround-javac + process-sources + + exec + + + javac + + ${project.basedir}/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java + -d + ${project.build.directory}/hotspot-classes + + + + + tokenizer-hotspot-workaround-java + process-sources + + exec + + + java + + -cp + ${project.build.directory}/hotspot-classes + nu.validator.htmlparser.generator.ApplyHotSpotWorkaround + ${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java + ${project.basedir}/src/hotspot/resources/HotSpotWorkaround.txt + + + + + +
    +
    +
    diff --git a/translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java b/htmlparser/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java similarity index 100% rename from translator-src/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java rename to htmlparser/src/hotspot/java/nu/validator/htmlparser/generator/ApplyHotSpotWorkaround.java diff --git a/src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt b/htmlparser/src/hotspot/resources/HotSpotWorkaround.txt similarity index 100% rename from src/nu/validator/htmlparser/impl/HotSpotWorkaround.txt rename to htmlparser/src/hotspot/resources/HotSpotWorkaround.txt diff --git a/htmlparser/src/main/java/module-info.java b/htmlparser/src/main/java/module-info.java new file mode 100644 index 00000000..a9e5f2ef --- /dev/null +++ b/htmlparser/src/main/java/module-info.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides an implementation of the HTML5 parsing algorithm in Java for applications. + * The parser is designed to work as a drop-in replacement for the XML parser in applications + * that already support XHTML 1.x content with an XML parser and use SAX or DOM to interface with the parser. + */ +module nu.validator.htmlparser { + requires transitive java.xml; + requires nu.validator.saxtree; + + exports nu.validator.htmlparser.annotation; + exports nu.validator.htmlparser.common; + exports nu.validator.htmlparser.dom; + exports nu.validator.htmlparser.extra; + exports nu.validator.htmlparser.impl; + exports nu.validator.htmlparser.io; + exports nu.validator.htmlparser.rewindable; + exports nu.validator.htmlparser.sax; +} diff --git a/src/nu/validator/htmlparser/annotation/Auto.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Auto.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Auto.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Auto.java diff --git a/src/nu/validator/htmlparser/annotation/CharacterName.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/CharacterName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/CharacterName.java diff --git a/src/nu/validator/htmlparser/annotation/Const.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Const.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Const.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Const.java diff --git a/src/nu/validator/htmlparser/annotation/Creator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Creator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Creator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Creator.java diff --git a/src/nu/validator/htmlparser/annotation/HtmlCreator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/HtmlCreator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/HtmlCreator.java diff --git a/src/nu/validator/htmlparser/annotation/IdType.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/IdType.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/IdType.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/IdType.java diff --git a/src/nu/validator/htmlparser/annotation/Inline.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Inline.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Inline.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Inline.java diff --git a/src/nu/validator/htmlparser/annotation/Literal.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Literal.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Literal.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Literal.java diff --git a/src/nu/validator/htmlparser/annotation/Local.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Local.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Local.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Local.java diff --git a/src/nu/validator/htmlparser/annotation/NoLength.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/NoLength.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/NoLength.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/NoLength.java diff --git a/src/nu/validator/htmlparser/annotation/NsUri.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/NsUri.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/NsUri.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/NsUri.java diff --git a/src/nu/validator/htmlparser/annotation/Prefix.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Prefix.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Prefix.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Prefix.java diff --git a/src/nu/validator/htmlparser/annotation/QName.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/QName.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/QName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/QName.java diff --git a/src/nu/validator/htmlparser/annotation/SvgCreator.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/SvgCreator.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/SvgCreator.java diff --git a/src/nu/validator/htmlparser/annotation/Unsigned.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Unsigned.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Unsigned.java diff --git a/src/nu/validator/htmlparser/annotation/Virtual.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/Virtual.java similarity index 100% rename from src/nu/validator/htmlparser/annotation/Virtual.java rename to htmlparser/src/main/java/nu/validator/htmlparser/annotation/Virtual.java diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java new file mode 100644 index 00000000..ca70c00b --- /dev/null +++ b/htmlparser/src/main/java/nu/validator/htmlparser/annotation/package-info.java @@ -0,0 +1,27 @@ +/* + Copyright (c) 2008 Mozilla Foundation + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + +/** + * This package provides annotations for facilitating automated translation + * of the source code into other programming languages. + */ +package nu.validator.htmlparser.annotation; diff --git a/src/nu/validator/htmlparser/common/ByteReadable.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/ByteReadable.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/common/ByteReadable.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/ByteReadable.java diff --git a/src/nu/validator/htmlparser/common/CharacterHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/CharacterHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/CharacterHandler.java diff --git a/src/nu/validator/htmlparser/common/DocumentMode.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentMode.java similarity index 100% rename from src/nu/validator/htmlparser/common/DocumentMode.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentMode.java diff --git a/src/nu/validator/htmlparser/common/DocumentModeHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/DocumentModeHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/DocumentModeHandler.java diff --git a/src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/EncodingDeclarationHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/EncodingDeclarationHandler.java diff --git a/src/nu/validator/htmlparser/common/Heuristics.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/Heuristics.java similarity index 100% rename from src/nu/validator/htmlparser/common/Heuristics.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/Heuristics.java diff --git a/src/nu/validator/htmlparser/common/Interner.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/Interner.java similarity index 100% rename from src/nu/validator/htmlparser/common/Interner.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/Interner.java diff --git a/src/nu/validator/htmlparser/common/TokenHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/TokenHandler.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/common/TokenHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/TokenHandler.java diff --git a/src/nu/validator/htmlparser/common/TransitionHandler.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java similarity index 100% rename from src/nu/validator/htmlparser/common/TransitionHandler.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/TransitionHandler.java diff --git a/src/nu/validator/htmlparser/common/XmlViolationPolicy.java b/htmlparser/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java similarity index 100% rename from src/nu/validator/htmlparser/common/XmlViolationPolicy.java rename to htmlparser/src/main/java/nu/validator/htmlparser/common/XmlViolationPolicy.java diff --git a/test-src/nu/validator/htmlparser/tools/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java similarity index 86% rename from test-src/nu/validator/htmlparser/tools/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java index a04bf3cd..0d04ee70 100644 --- a/test-src/nu/validator/htmlparser/tools/package.html +++ b/htmlparser/src/main/java/nu/validator/htmlparser/common/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    Demo apps.

    - - \ No newline at end of file + */ + +/** + * This package provides common interfaces and enumerations. + */ +package nu.validator.htmlparser.common; diff --git a/src/nu/validator/htmlparser/dom/DOMTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/dom/DOMTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/DOMTreeBuilder.java diff --git a/src/nu/validator/htmlparser/dom/Dom2Sax.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java similarity index 100% rename from src/nu/validator/htmlparser/dom/Dom2Sax.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/Dom2Sax.java diff --git a/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java similarity index 98% rename from src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java index 1674aa72..cbeccb51 100644 --- a/src/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/dom/HtmlDocumentBuilder.java @@ -375,7 +375,7 @@ public void setTransitionHander(TransitionHandler handler) { /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -384,7 +384,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -609,7 +609,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; @@ -650,6 +650,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -659,6 +660,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } diff --git a/test-src/nu/validator/htmlparser/test/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java similarity index 85% rename from test-src/nu/validator/htmlparser/test/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java index 57809b84..8d874fd0 100644 --- a/test-src/nu/validator/htmlparser/test/package.html +++ b/htmlparser/src/main/java/nu/validator/htmlparser/dom/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    Test drivers.

    - - \ No newline at end of file + */ + +/** + * This package provides an HTML5 parser that exposes the document using the DOM API. + */ +package nu.validator.htmlparser.dom; diff --git a/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java new file mode 100644 index 00000000..44016348 --- /dev/null +++ b/htmlparser/src/main/java/nu/validator/htmlparser/extra/NormalizationChecker.java @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2006, 2007 Henri Sivonen + * Copyright (c) 2007 Mozilla Foundation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +package nu.validator.htmlparser.extra; + +import nu.validator.htmlparser.common.CharacterHandler; + +import java.text.Normalizer; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +/** + * @version $Id$ + * @author hsivonen + */ +public final class NormalizationChecker implements CharacterHandler { + + private ErrorHandler errorHandler; + + private Locator locator; + + /** + * A set of composing characters as per Charmod Norm. + * + * Generated with ICU4J 67.1 using: new UnicodeSet("[[:nfc_qc=maybe:][:^ccc=0:]]").freeze() + */ + private static final Set COMPOSING_CHARACTERS = new HashSet(Arrays.asList( + 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, + 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, + 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, + 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, + 840, 841, 842, 843, 844, 845, 846, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, + 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, + 877, 878, 879, 1155, 1156, 1157, 1158, 1159, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, + 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, + 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, + 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1471, 1473, 1474, 1476, 1477, 1479, 1552, 1553, + 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1611, 1612, 1613, 1614, 1615, 1616, + 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, + 1648, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1759, 1760, 1761, 1762, 1763, 1764, 1767, + 1768, 1770, 1771, 1772, 1773, 1809, 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, + 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, + 1864, 1865, 1866, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2045, 2070, 2071, + 2072, 2073, 2075, 2076, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2085, 2086, 2087, 2089, + 2090, 2091, 2092, 2093, 2137, 2138, 2139, 2259, 2260, 2261, 2262, 2263, 2264, 2265, 2266, + 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, + 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296, 2297, + 2298, 2299, 2300, 2301, 2302, 2303, 2364, 2381, 2385, 2386, 2387, 2388, 2492, 2494, 2509, + 2519, 2558, 2620, 2637, 2748, 2765, 2876, 2878, 2893, 2902, 2903, 3006, 3021, 3031, 3149, + 3157, 3158, 3260, 3266, 3277, 3285, 3286, 3387, 3388, 3390, 3405, 3415, 3530, 3535, 3551, + 3640, 3641, 3642, 3656, 3657, 3658, 3659, 3768, 3769, 3770, 3784, 3785, 3786, 3787, 3864, + 3865, 3893, 3895, 3897, 3953, 3954, 3956, 3962, 3963, 3964, 3965, 3968, 3970, 3971, 3972, + 3974, 3975, 4038, 4142, 4151, 4153, 4154, 4237, 4449, 4450, 4451, 4452, 4453, 4454, 4455, + 4456, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4520, + 4521, 4522, 4523, 4524, 4525, 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, 4534, 4535, + 4536, 4537, 4538, 4539, 4540, 4541, 4542, 4543, 4544, 4545, 4546, 4957, 4958, 4959, 5908, + 5940, 6098, 6109, 6313, 6457, 6458, 6459, 6679, 6680, 6752, 6773, 6774, 6775, 6776, 6777, + 6778, 6779, 6780, 6783, 6832, 6833, 6834, 6835, 6836, 6837, 6838, 6839, 6840, 6841, 6842, + 6843, 6844, 6845, 6847, 6848, 6964, 6965, 6980, 7019, 7020, 7021, 7022, 7023, 7024, 7025, + 7026, 7027, 7082, 7083, 7142, 7154, 7155, 7223, 7376, 7377, 7378, 7380, 7381, 7382, 7383, + 7384, 7385, 7386, 7387, 7388, 7389, 7390, 7391, 7392, 7394, 7395, 7396, 7397, 7398, 7399, + 7400, 7405, 7412, 7416, 7417, 7616, 7617, 7618, 7619, 7620, 7621, 7622, 7623, 7624, 7625, + 7626, 7627, 7628, 7629, 7630, 7631, 7632, 7633, 7634, 7635, 7636, 7637, 7638, 7639, 7640, + 7641, 7642, 7643, 7644, 7645, 7646, 7647, 7648, 7649, 7650, 7651, 7652, 7653, 7654, 7655, + 7656, 7657, 7658, 7659, 7660, 7661, 7662, 7663, 7664, 7665, 7666, 7667, 7668, 7669, 7670, + 7671, 7672, 7673, 7675, 7676, 7677, 7678, 7679, 8400, 8401, 8402, 8403, 8404, 8405, 8406, + 8407, 8408, 8409, 8410, 8411, 8412, 8417, 8421, 8422, 8423, 8424, 8425, 8426, 8427, 8428, + 8429, 8430, 8431, 8432, 11503, 11504, 11505, 11647, 11744, 11745, 11746, 11747, 11748, 11749, + 11750, 11751, 11752, 11753, 11754, 11755, 11756, 11757, 11758, 11759, 11760, 11761, 11762, + 11763, 11764, 11765, 11766, 11767, 11768, 11769, 11770, 11771, 11772, 11773, 11774, 11775, + 12330, 12331, 12332, 12333, 12334, 12335, 12441, 12442, 42607, 42612, 42613, 42614, 42615, + 42616, 42617, 42618, 42619, 42620, 42621, 42654, 42655, 42736, 42737, 43014, 43052, 43204, + 43232, 43233, 43234, 43235, 43236, 43237, 43238, 43239, 43240, 43241, 43242, 43243, 43244, + 43245, 43246, 43247, 43248, 43249, 43307, 43308, 43309, 43347, 43443, 43456, 43696, 43698, + 43699, 43700, 43703, 43704, 43710, 43711, 43713, 43766, 44013, 64286, 65056, 65057, 65058, + 65059, 65060, 65061, 65062, 65063, 65064, 65065, 65066, 65067, 65068, 65069, 65070, 65071, + 66045, 66272, 66422, 66423, 66424, 66425, 66426, 68109, 68111, 68152, 68153, 68154, 68159, + 68325, 68326, 68900, 68901, 68902, 68903, 69291, 69292, 69446, 69447, 69448, 69449, 69450, + 69451, 69452, 69453, 69454, 69455, 69456, 69702, 69759, 69817, 69818, 69888, 69889, 69890, + 69927, 69939, 69940, 70003, 70080, 70090, 70197, 70198, 70377, 70378, 70459, 70460, 70462, + 70477, 70487, 70502, 70503, 70504, 70505, 70506, 70507, 70508, 70512, 70513, 70514, 70515, + 70516, 70722, 70726, 70750, 70832, 70842, 70845, 70850, 70851, 71087, 71103, 71104, 71231, + 71350, 71351, 71467, 71737, 71738, 71984, 71997, 71998, 72003, 72160, 72244, 72263, 72345, + 72767, 73026, 73028, 73029, 73111, 92912, 92913, 92914, 92915, 92916, 92976, 92977, 92978, + 92979, 92980, 92981, 92982, 94192, 94193, 113822, 119141, 119142, 119143, 119144, 119145, + 119149, 119150, 119151, 119152, 119153, 119154, 119163, 119164, 119165, 119166, 119167, + 119168, 119169, 119170, 119173, 119174, 119175, 119176, 119177, 119178, 119179, 119210, + 119211, 119212, 119213, 119362, 119363, 119364, 122880, 122881, 122882, 122883, 122884, + 122885, 122886, 122888, 122889, 122890, 122891, 122892, 122893, 122894, 122895, 122896, + 122897, 122898, 122899, 122900, 122901, 122902, 122903, 122904, 122907, 122908, 122909, + 122910, 122911, 122912, 122913, 122915, 122916, 122918, 122919, 122920, 122921, 122922, + 123184, 123185, 123186, 123187, 123188, 123189, 123190, 123628, 123629, 123630, 123631, + 125136, 125137, 125138, 125139, 125140, 125141, 125142, 125252, 125253, 125254, 125255, + 125256, 125257, 125258 + )); + + // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 + + /** + * A buffer for holding sequences overlap the SAX buffer boundary. + */ + private char[] buf = new char[128]; + + /** + * A holder for the original buffer (for the memory leak prevention + * mechanism). + */ + private char[] bufHolder = null; + + /** + * The current used length of the buffer, i.e. the index of the first slot + * that does not hold current data. + */ + private int pos; + + /** + * Indicates whether the checker the next call to characters() + * is the first call in a run. + */ + private boolean atStartOfRun; + + /** + * Indicates whether the current run has already caused an error. + */ + private boolean alreadyComplainedAboutThisRun; + + /** + * Emit an error. The locator is used. + * + * @param message the error message + * @throws SAXException if something goes wrong + */ + public void err(String message) throws SAXException { + if (errorHandler != null) { + SAXParseException spe = new SAXParseException(message, locator); + errorHandler.error(spe); + } + } + + /** + * Returns true if the argument is a composing BMP character + * or a surrogate and false otherwise. + * + * @param c a UTF-16 code unit + * @return true if the argument is a composing BMP character + * or a surrogate and false otherwise + */ + private static boolean isComposingCharOrSurrogate(char c) { + if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) { + return true; + } + return isComposingChar(c); + } + + /** + * Returns true if the argument is a composing character + * and false otherwise. + * + * @param c a Unicode code point + * @return true if the argument is a composing character + * false otherwise + */ + private static boolean isComposingChar(int c) { + return COMPOSING_CHARACTERS.contains(c); + } + + /** + * Constructor with locator. + * + * @param locator + */ + public NormalizationChecker(Locator locator) { + super(); + start(); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#start() + */ + public void start() { + atStartOfRun = true; + alreadyComplainedAboutThisRun = false; + pos = 0; + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int) + */ + public void characters(char[] ch, int start, int length) + throws SAXException { + if (alreadyComplainedAboutThisRun) { + return; + } + if (atStartOfRun) { + char c = ch[start]; + if (pos == 1) { + // there's a single high surrogate in buf + if (isComposingChar(getCodePoint(buf[0], c))) { + err("Text run starts with a composing character."); + } + atStartOfRun = false; + } else { + if (length == 1 && Character.isHighSurrogate(c)) { + buf[0] = c; + pos = 1; + return; + } else { + if (Character.isHighSurrogate(c)) { + if (isComposingChar(getCodePoint(c, ch[start + 1]))) { + err("Text run starts with a composing character."); + } + } else { + if (isComposingCharOrSurrogate(c)) { + err("Text run starts with a composing character."); + } + } + atStartOfRun = false; + } + } + } + int i = start; + int stop = start + length; + if (pos > 0) { + // there's stuff in buf + while (i < stop && isComposingCharOrSurrogate(ch[i])) { + i++; + } + appendToBuf(ch, start, i); + if (i == stop) { + return; + } else { + if (!Normalizer.isNormalized(new String(buf, 0, pos), Normalizer.Form.NFC)) { + errAboutTextRun(); + } + pos = 0; + } + } + if (i < stop) { + start = i; + i = stop - 1; + while (i > start && isComposingCharOrSurrogate(ch[i])) { + i--; + } + if (i > start) { + if (!Normalizer.isNormalized(new String(ch, start, i), Normalizer.Form.NFC)) { + errAboutTextRun(); + } + } + appendToBuf(ch, i, stop); + } + } + + private static int getCodePoint(char lead, char trail) { + if (Character.isSurrogatePair(lead, trail)) { + return Character.toCodePoint(lead, trail); + } + throw new IllegalArgumentException("Illegal surrogate characters"); + } + + /** + * Emits an error stating that the current text run or the source + * text is not in NFC. + * + * @throws SAXException if the ErrorHandler throws + */ + private void errAboutTextRun() throws SAXException { + err("Source text is not in Unicode Normalization Form C."); + alreadyComplainedAboutThisRun = true; + } + + /** + * Appends a slice of an UTF-16 code unit array to the internal + * buffer. + * + * @param ch the array from which to copy + * @param start the index of the first element that is copied + * @param end the index of the first element that is not copied + */ + private void appendToBuf(char[] ch, int start, int end) { + if (start == end) { + return; + } + int neededBufLen = pos + (end - start); + if (neededBufLen > buf.length) { + char[] newBuf = new char[neededBufLen]; + System.arraycopy(buf, 0, newBuf, 0, pos); + if (bufHolder == null) { + bufHolder = buf; // keep the original around + } + buf = newBuf; + } + System.arraycopy(ch, start, buf, pos, end - start); + pos += (end - start); + } + + /** + * @see nu.validator.htmlparser.common.CharacterHandler#end() + */ + public void end() throws SAXException { + if (!alreadyComplainedAboutThisRun + && !Normalizer.isNormalized(new String(buf, 0, pos), Normalizer.Form.NFC)) { + errAboutTextRun(); + } + if (bufHolder != null) { + // restore the original small buffer to avoid leaking + // memory if this checker is recycled + buf = bufHolder; + bufHolder = null; + } + } + + public void setErrorHandler(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + +} diff --git a/src/nu/validator/htmlparser/impl/AttributeName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java similarity index 99% rename from src/nu/validator/htmlparser/impl/AttributeName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java index ca748713..1f8ce5e4 100644 --- a/src/nu/validator/htmlparser/impl/AttributeName.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/AttributeName.java @@ -22,11 +22,6 @@ package nu.validator.htmlparser.impl; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; - import nu.validator.htmlparser.annotation.Inline; import nu.validator.htmlparser.annotation.Local; import nu.validator.htmlparser.annotation.NoLength; diff --git a/src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/CoalescingTreeBuilder.java diff --git a/src/nu/validator/htmlparser/impl/ElementName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ElementName.java similarity index 100% rename from src/nu/validator/htmlparser/impl/ElementName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/ElementName.java diff --git a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java similarity index 99% rename from src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index 19fbe7a6..4c41e33a 100644 --- a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -128,7 +128,7 @@ public void setContentNonXmlCharPolicy( /** * Sets the errorProfile. * - * @param errorProfile + * @param errorProfileMap */ public void setErrorProfile(HashMap errorProfileMap) { this.errorProfileMap = errorProfileMap; @@ -225,6 +225,7 @@ public boolean isNextCharOnNewLine() { cstart = 0x7fffffff; } + @SuppressWarnings("fallthrough") @Override protected char checkChar(@NoLength char[] buf, int pos) throws SAXException { linePrev = line; @@ -564,6 +565,7 @@ private boolean isAstralPrivateUse(int c) { } } + @SuppressWarnings("fallthrough") @Override protected char errNcrNonCharacter(char ch) throws SAXException { switch (contentNonXmlCharPolicy) { case FATAL: @@ -592,6 +594,7 @@ private boolean isAstralPrivateUse(int c) { err("Character reference expands to a surrogate."); } + @SuppressWarnings("fallthrough") @Override protected char errNcrControlChar(char ch) throws SAXException { switch (contentNonXmlCharPolicy) { case FATAL: diff --git a/src/nu/validator/htmlparser/impl/HtmlAttributes.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java similarity index 99% rename from src/nu/validator/htmlparser/impl/HtmlAttributes.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java index 7f747c3a..5401f233 100644 --- a/src/nu/validator/htmlparser/impl/HtmlAttributes.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/HtmlAttributes.java @@ -31,7 +31,6 @@ import nu.validator.htmlparser.annotation.NsUri; import nu.validator.htmlparser.annotation.Prefix; import nu.validator.htmlparser.annotation.QName; -import nu.validator.htmlparser.common.Interner; import nu.validator.htmlparser.common.XmlViolationPolicy; /** @@ -482,6 +481,7 @@ public boolean equalsAnother(HtmlAttributes other) { return true; } + @SuppressWarnings("fallthrough") void processNonNcNames(TreeBuilder treeBuilder, XmlViolationPolicy namePolicy) throws SAXException { for (int i = 0; i < length; i++) { AttributeName attName = names[i]; diff --git a/src/nu/validator/htmlparser/impl/LocatorImpl.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java similarity index 97% rename from src/nu/validator/htmlparser/impl/LocatorImpl.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java index b334aa6a..29bb8c72 100644 --- a/src/nu/validator/htmlparser/impl/LocatorImpl.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/LocatorImpl.java @@ -26,7 +26,7 @@ import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; -public class LocatorImpl implements Locator, Locator2 { +public class LocatorImpl implements Locator2 { private final String systemId; diff --git a/src/nu/validator/htmlparser/impl/MetaScanner.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java old mode 100755 new mode 100644 similarity index 99% rename from src/nu/validator/htmlparser/impl/MetaScanner.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java index a1c8eb80..10c24e40 --- a/src/nu/validator/htmlparser/impl/MetaScanner.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/MetaScanner.java @@ -208,6 +208,7 @@ protected int read() throws IOException { /** * The runs the meta scanning algorithm. */ + @SuppressWarnings("fallthrough") protected final void stateLoop(int state) throws SAXException, IOException { int c = -1; diff --git a/src/nu/validator/htmlparser/impl/NCName.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NCName.java similarity index 100% rename from src/nu/validator/htmlparser/impl/NCName.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NCName.java diff --git a/src/nu/validator/htmlparser/impl/NamedCharacters.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/impl/NamedCharacters.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharacters.java diff --git a/src/nu/validator/htmlparser/impl/NamedCharactersAccel.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java similarity index 100% rename from src/nu/validator/htmlparser/impl/NamedCharactersAccel.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/NamedCharactersAccel.java diff --git a/src/nu/validator/htmlparser/impl/Portability.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Portability.java similarity index 100% rename from src/nu/validator/htmlparser/impl/Portability.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/Portability.java diff --git a/src/nu/validator/htmlparser/impl/PushedLocation.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java similarity index 100% rename from src/nu/validator/htmlparser/impl/PushedLocation.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/PushedLocation.java diff --git a/src/nu/validator/htmlparser/impl/StackNode.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StackNode.java similarity index 100% rename from src/nu/validator/htmlparser/impl/StackNode.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/StackNode.java diff --git a/src/nu/validator/htmlparser/impl/StateSnapshot.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java similarity index 99% rename from src/nu/validator/htmlparser/impl/StateSnapshot.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java index cba711f7..f6c3359a 100644 --- a/src/nu/validator/htmlparser/impl/StateSnapshot.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/StateSnapshot.java @@ -25,6 +25,7 @@ import nu.validator.htmlparser.annotation.Auto; +@SuppressWarnings("exports") public class StateSnapshot implements TreeBuilderState { private final @Auto StackNode[] stack; diff --git a/src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java similarity index 100% rename from src/nu/validator/htmlparser/impl/TaintableLocatorImpl.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TaintableLocatorImpl.java diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java old mode 100755 new mode 100644 similarity index 99% rename from src/nu/validator/htmlparser/impl/Tokenizer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java index 13fd56b1..db6257e1 --- a/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/Tokenizer.java @@ -36,7 +36,6 @@ package nu.validator.htmlparser.impl; import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -67,7 +66,8 @@ * @version $Id$ * @author hsivonen */ -public class Tokenizer implements Locator, Locator2 { +@SuppressWarnings("fallthrough") +public class Tokenizer implements Locator2 { private static final int DATA_AND_RCDATA_MASK = ~1; diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java similarity index 99% rename from src/nu/validator/htmlparser/impl/TreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java index 7b78b1b7..784ab921 100644 --- a/src/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilder.java @@ -35,7 +35,6 @@ package nu.validator.htmlparser.impl; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -54,10 +53,10 @@ import nu.validator.htmlparser.annotation.NsUri; import nu.validator.htmlparser.common.DocumentMode; import nu.validator.htmlparser.common.DocumentModeHandler; -import nu.validator.htmlparser.common.Interner; import nu.validator.htmlparser.common.TokenHandler; import nu.validator.htmlparser.common.XmlViolationPolicy; +@SuppressWarnings("exports") public abstract class TreeBuilder implements TokenHandler, TreeBuilderState { @@ -577,7 +576,7 @@ final void warn(String message, Locator locator) throws SAXException { // ]NOCPP] - @SuppressWarnings("unchecked") public final void startTokenization(Tokenizer self) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) public final void startTokenization(Tokenizer self) throws SAXException { tokenizer = self; stackNodes = new StackNode[64]; stack = new StackNode[64]; @@ -844,6 +843,7 @@ public final void comment(@NoLength char[] buf, int start, int length) * @see nu.validator.htmlparser.common.TokenHandler#characters(char[], int, * int) */ + @SuppressWarnings("fallthrough") public final void characters(@Const @NoLength char[] buf, int start, int length) throws SAXException { // Note: Can't attach error messages to EOF in C++ yet @@ -1241,6 +1241,7 @@ public void zeroOriginatingReplacementCharacter() throws SAXException { } } + @SuppressWarnings("fallthrough") public final void eof() throws SAXException { flushCharacters(); // Note: Can't attach error messages to EOF in C++ yet @@ -1448,6 +1449,7 @@ public final void endTokenization() throws SAXException { end(); } + @SuppressWarnings("fallthrough") public final void startTag(ElementName elementName, HtmlAttributes attributes, boolean selfClosing) throws SAXException { flushCharacters(); @@ -2898,8 +2900,6 @@ private boolean isSpecialParentInForeign(StackNode stackNode) { * C++ memory note: The return value must be released. * * @return - * @throws SAXException - * @throws StopSniffingException */ public static String extractCharsetFromContent(String attributeValue // CPPONLY: , TreeBuilder tb @@ -3095,6 +3095,7 @@ private void checkMetaCharset(HtmlAttributes attributes) } } + @SuppressWarnings("fallthrough") public final void endTag(ElementName elementName) throws SAXException { flushCharacters(); needToDropLF = false; @@ -4292,7 +4293,7 @@ private void pushTemplateMode(int mode) { templateModeStack[templateModePtr] = mode; } - @SuppressWarnings("unchecked") private void push(StackNode node) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) private void push(StackNode node) throws SAXException { currentPtr++; if (currentPtr == stack.length) { StackNode[] newStack = new StackNode[stack.length + 64]; @@ -4303,7 +4304,7 @@ private void pushTemplateMode(int mode) { elementPushed(node.ns, node.popName, node.node); } - @SuppressWarnings("unchecked") private void silentPush(StackNode node) throws SAXException { + @SuppressWarnings({"rawtypes", "unchecked"}) private void silentPush(StackNode node) throws SAXException { currentPtr++; if (currentPtr == stack.length) { StackNode[] newStack = new StackNode[stack.length + 64]; @@ -4313,7 +4314,7 @@ private void pushTemplateMode(int mode) { stack[currentPtr] = node; } - @SuppressWarnings("unchecked") private void append(StackNode node) { + @SuppressWarnings({"rawtypes", "unchecked"}) private void append(StackNode node) { listPtr++; if (listPtr == listOfActiveFormattingElements.length) { StackNode[] newList = new StackNode[listOfActiveFormattingElements.length + 64]; @@ -4787,7 +4788,7 @@ void notifyUnusedStackNode(int idxInStackNodes) { } } - @SuppressWarnings("unchecked") private StackNode getUnusedStackNode() { + @SuppressWarnings({"rawtypes", "unchecked"}) private StackNode getUnusedStackNode() { // Search for an unused stack node. while (stackNodesIdx < numStackNodes) { if (stackNodes[stackNodesIdx].isUnused()) { @@ -5871,7 +5872,7 @@ private boolean charBufferContainsNonWhitespace() { * @return a snapshot. * @throws SAXException */ - @SuppressWarnings("unchecked") public TreeBuilderState newSnapshot() + @SuppressWarnings({"rawtypes", "unchecked"}) public TreeBuilderState newSnapshot() throws SAXException { StackNode[] listCopy = new StackNode[listPtr + 1]; for (int i = 0; i < listCopy.length; i++) { @@ -5965,7 +5966,7 @@ public boolean snapshotMatches(TreeBuilderState snapshot) { return true; } - @SuppressWarnings("unchecked") public void loadState( + @SuppressWarnings({"rawtypes", "unchecked"}) public void loadState( TreeBuilderState snapshot) throws SAXException { // CPPONLY: mCurrentHtmlScriptIsAsyncOrDefer = false; diff --git a/src/nu/validator/htmlparser/impl/TreeBuilderState.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java similarity index 99% rename from src/nu/validator/htmlparser/impl/TreeBuilderState.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java index 5b7a9dcf..42c019fc 100644 --- a/src/nu/validator/htmlparser/impl/TreeBuilderState.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/TreeBuilderState.java @@ -29,6 +29,7 @@ * @version $Id$ * @author hsivonen */ +@SuppressWarnings("exports") public interface TreeBuilderState { /** diff --git a/src/nu/validator/htmlparser/impl/UTF16Buffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java similarity index 100% rename from src/nu/validator/htmlparser/impl/UTF16Buffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/UTF16Buffer.java diff --git a/src/nu/validator/htmlparser/common/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java similarity index 83% rename from src/nu/validator/htmlparser/common/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java index 43f141cd..dfa43eec 100644 --- a/src/nu/validator/htmlparser/common/package.html +++ b/htmlparser/src/main/java/nu/validator/htmlparser/impl/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package provides common interfaces and enumerations.

    - - \ No newline at end of file + */ + +/** + * This package contains the bulk of parser internals. + * Only implementors of additional tree builders or token handlers should look here. + */ +package nu.validator.htmlparser.impl; diff --git a/src/nu/validator/htmlparser/io/BomSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/BomSniffer.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/io/BomSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/BomSniffer.java diff --git a/src/nu/validator/htmlparser/io/Confidence.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Confidence.java similarity index 100% rename from src/nu/validator/htmlparser/io/Confidence.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Confidence.java diff --git a/src/nu/validator/htmlparser/io/Driver.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java similarity index 99% rename from src/nu/validator/htmlparser/io/Driver.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java index aa2354c9..0c971eef 100644 --- a/src/nu/validator/htmlparser/io/Driver.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/Driver.java @@ -497,6 +497,7 @@ protected Encoding whineAboutEncodingAndReturnActual(String encoding, } } + @SuppressWarnings("serial") private class ReparseException extends SAXException { } diff --git a/src/nu/validator/htmlparser/io/Encoding.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/Encoding.java similarity index 100% rename from src/nu/validator/htmlparser/io/Encoding.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/Encoding.java diff --git a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java old mode 100755 new mode 100644 similarity index 96% rename from src/nu/validator/htmlparser/io/HtmlInputStreamReader.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java index 3de1af2a..b7d7e14b --- a/src/nu/validator/htmlparser/io/HtmlInputStreamReader.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/HtmlInputStreamReader.java @@ -35,19 +35,15 @@ import nu.validator.htmlparser.common.ByteReadable; import nu.validator.htmlparser.common.Heuristics; -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.extra.ChardetSniffer; -import nu.validator.htmlparser.extra.IcuDetectorSniffer; import nu.validator.htmlparser.impl.Tokenizer; import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** - * Be very careful with this class. It is not a general-purpose subclass of of + * Be very careful with this class. It is not a general-purpose subclass of * Reader. Instead, it is the minimal implementation that does * what Tokenizer needs while being an instance of * Reader. @@ -58,8 +54,7 @@ * @version $Id$ * @author hsivonen */ -public final class HtmlInputStreamReader extends Reader implements - ByteReadable, Locator, Locator2 { +public final class HtmlInputStreamReader extends Reader implements ByteReadable, Locator2 { private static final int SNIFFING_LIMIT = 1024; @@ -112,7 +107,9 @@ public final class HtmlInputStreamReader extends Reader implements /** * @param inputStream * @param errorHandler - * @param locator + * @param tokenizer + * @param driver + * @param heuristics * @throws IOException * @throws SAXException */ @@ -136,15 +133,6 @@ public HtmlInputStreamReader(InputStream inputStream, + encoding.getCanonName() + "\u201D used. Documents must use UTF-8."); } - if (encoding == null - && (heuristics == Heuristics.CHARDET || heuristics == Heuristics.ALL)) { - encoding = (new ChardetSniffer(byteArray, limit)).sniff(); - } - if (encoding == null - && (heuristics == Heuristics.ICU || heuristics == Heuristics.ALL)) { - position = 0; - encoding = (new IcuDetectorSniffer(this)).sniff(); - } sniffing = false; if (encoding == null) { encoding = Encoding.WINDOWS1252; diff --git a/src/nu/validator/htmlparser/io/MetaSniffer.java b/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java old mode 100755 new mode 100644 similarity index 98% rename from src/nu/validator/htmlparser/io/MetaSniffer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java index 9deaef7a..dd4dee9f --- a/src/nu/validator/htmlparser/io/MetaSniffer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/io/MetaSniffer.java @@ -34,7 +34,7 @@ import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -public class MetaSniffer extends MetaScanner implements Locator, Locator2 { +public class MetaSniffer extends MetaScanner implements Locator2 { private Encoding characterEncoding = null; @@ -91,7 +91,6 @@ protected int read() throws IOException { * * @throws SAXException * @throws IOException - * @throws */ public Encoding sniff(ByteReadable readable) throws SAXException, IOException { this.readable = readable; diff --git a/src/nu/validator/htmlparser/rewindable/Rewindable.java b/htmlparser/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/rewindable/Rewindable.java rename to htmlparser/src/main/java/nu/validator/htmlparser/rewindable/Rewindable.java diff --git a/src/nu/validator/htmlparser/rewindable/RewindableInputStream.java b/htmlparser/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java old mode 100755 new mode 100644 similarity index 100% rename from src/nu/validator/htmlparser/rewindable/RewindableInputStream.java rename to htmlparser/src/main/java/nu/validator/htmlparser/rewindable/RewindableInputStream.java diff --git a/src/nu/validator/htmlparser/sax/HtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java similarity index 98% rename from src/nu/validator/htmlparser/sax/HtmlParser.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java index ad17e892..c7751761 100644 --- a/src/nu/validator/htmlparser/sax/HtmlParser.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlParser.java @@ -552,6 +552,7 @@ public void setTransitionHandler(TransitionHandler handler) { * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) * @deprecated For Validator.nu internal use */ + @Deprecated public void setTreeBuilderErrorHandlerOverride(ErrorHandler handler) { treeBuilderErrorHandler = handler; if (driver != null) { @@ -714,7 +715,7 @@ public void setProperty(String name, Object value) /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -723,7 +724,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -941,8 +942,8 @@ public boolean isReportingDoctype() { } /** - * @param errorProfile - * @see nu.validator.htmlparser.impl.errorReportingTokenizer#setErrorProfile(set) + * @param errorProfileMap + * @see nu.validator.htmlparser.impl.ErrorReportingTokenizer#setErrorProfile(HashMap) */ public void setErrorProfile(HashMap errorProfileMap) { this.errorProfileMap = errorProfileMap; @@ -965,7 +966,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; @@ -1006,6 +1007,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -1015,6 +1017,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } diff --git a/src/nu/validator/htmlparser/sax/HtmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/HtmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/HtmlSerializer.java diff --git a/src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java similarity index 100% rename from src/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/InfosetCoercingHtmlParser.java diff --git a/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java similarity index 99% rename from src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java index b6cb2f87..bacbbfce 100644 --- a/src/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/NameCheckingXmlSerializer.java @@ -40,7 +40,7 @@ public NameCheckingXmlSerializer(Writer out) { } /** - * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName() + * @see nu.validator.htmlparser.sax.XmlSerializer#checkNCName(String) */ @Override protected void checkNCName(String name) throws SAXException { if (!NCName.isNCName(name)) { diff --git a/src/nu/validator/htmlparser/sax/SAXStreamer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/SAXStreamer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXStreamer.java diff --git a/src/nu/validator/htmlparser/sax/SAXTreeBuilder.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/sax/SAXTreeBuilder.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/SAXTreeBuilder.java diff --git a/src/nu/validator/htmlparser/sax/XmlSerializer.java b/htmlparser/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java similarity index 100% rename from src/nu/validator/htmlparser/sax/XmlSerializer.java rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/XmlSerializer.java diff --git a/test-src/nu/validator/saxtree/test/package.html b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java similarity index 85% rename from test-src/nu/validator/saxtree/test/package.html rename to htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java index 57809b84..7ade27d9 100644 --- a/test-src/nu/validator/saxtree/test/package.html +++ b/htmlparser/src/main/java/nu/validator/htmlparser/sax/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    Test drivers.

    - - \ No newline at end of file + */ + +/** + * This package provides an HTML5 parser that exposes the document through the SAX API. + */ +package nu.validator.htmlparser.sax; diff --git a/test-src/nu/validator/htmlparser/test/DecoderLoopTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DecoderLoopTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DecoderLoopTester.java diff --git a/test-src/nu/validator/htmlparser/test/DomIdTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DomIdTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DomIdTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DomIdTester.java diff --git a/test-src/nu/validator/htmlparser/test/DomTest.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/DomTest.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/DomTest.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/DomTest.java diff --git a/test-src/nu/validator/htmlparser/test/EncodingTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/EncodingTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/EncodingTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/EncodingTester.java diff --git a/test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/JSONArrayTokenHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/JSONArrayTokenHandler.java diff --git a/test-src/nu/validator/htmlparser/test/ListErrorHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/ListErrorHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/ListErrorHandler.java diff --git a/test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/SystemErrErrorHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/SystemErrErrorHandler.java diff --git a/test-src/nu/validator/htmlparser/test/TokenPrinter.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java old mode 100755 new mode 100644 similarity index 100% rename from test-src/nu/validator/htmlparser/test/TokenPrinter.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TokenPrinter.java diff --git a/test-src/nu/validator/htmlparser/test/TokenizerTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TokenizerTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TokenizerTester.java diff --git a/test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreeDumpContentHandler.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreeDumpContentHandler.java diff --git a/test-src/nu/validator/htmlparser/test/TreePrinter.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreePrinter.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreePrinter.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreePrinter.java diff --git a/test-src/nu/validator/htmlparser/test/TreeTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/TreeTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/TreeTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/TreeTester.java diff --git a/test-src/nu/validator/htmlparser/test/UntilHashInputStream.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/UntilHashInputStream.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/UntilHashInputStream.java diff --git a/test-src/nu/validator/htmlparser/test/XmlSerializerTester.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/XmlSerializerTester.java rename to htmlparser/src/test/java/nu/validator/htmlparser/test/XmlSerializerTester.java diff --git a/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java b/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java new file mode 100644 index 00000000..82d43a7d --- /dev/null +++ b/htmlparser/src/test/java/nu/validator/htmlparser/test/package-info.java @@ -0,0 +1,26 @@ +/* + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + +/** + * Test drivers. + */ +package nu.validator.htmlparser.test; diff --git a/test-src/nu/validator/saxtree/test/PassThruPrinter.java b/htmlparser/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java similarity index 100% rename from test-src/nu/validator/saxtree/test/PassThruPrinter.java rename to htmlparser/src/test/java/nu/validator/saxtree/test/PassThruPrinter.java diff --git a/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java b/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java new file mode 100644 index 00000000..ad8ce418 --- /dev/null +++ b/htmlparser/src/test/java/nu/validator/saxtree/test/package-info.java @@ -0,0 +1,26 @@ +/* + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + +/** + * Test drivers. + */ +package nu.validator.saxtree.test; diff --git a/mozilla-export-scripts/README.txt b/mozilla-export-scripts/README.txt deleted file mode 100644 index 3567b846..00000000 --- a/mozilla-export-scripts/README.txt +++ /dev/null @@ -1,25 +0,0 @@ -These scripts export the Java-to-C++ translator and the java source files that -implement the HTML5 parser. The exported translator may be used (with no -external dependencies) to translate the exported java source files into Gecko- -compatible C++. - -Hacking the translator itself still requires a working copy of the Java HTML5 -parser repository, but hacking the parser (modifying the Java source files and -performing the translation) should now be possible using only files committed -to the mozilla source tree. - -Run any of these scripts without arguments to receive usage instructions. - - make-translator-jar.sh: compiles the Java-to-C++ translator into a .jar file - export-java-srcs.sh: exports minimal java source files implementing the - HTML5 parser - export-translator.sh: exports the compiled translator and javaparser.jar - export-all.sh: runs the previous two scripts - util.sh: provides various shell utility functions to the - scripts listed above (does nothing if run directly) - -All path arguments may be either absolute or relative. This includes the path -to the script itself ($0), so the directory from which you run these scripts -doesn't matter. - -Ben Newman (7 July 2009) diff --git a/mozilla-export-scripts/export-all.sh b/mozilla-export-scripts/export-all.sh deleted file mode 100644 index 9ae07d33..00000000 --- a/mozilla-export-scripts/export-all.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo - exit 1 -fi - -$SCRIPT_DIR/export-translator.sh $MOZ_PARSER_PATH -$SCRIPT_DIR/export-java-srcs.sh $MOZ_PARSER_PATH - -echo -echo "Now go to $MOZ_PARSER_PATH and run" -echo " java -jar javalib/translator.jar javasrc . nsHtml5AtomList.h" -echo diff --git a/mozilla-export-scripts/export-java-srcs.sh b/mozilla-export-scripts/export-java-srcs.sh deleted file mode 100644 index 6d32b07d..00000000 --- a/mozilla-export-scripts/export-java-srcs.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -SRCDIR=`abs $SCRIPT_DIR/../src/nu/validator/htmlparser/impl` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo - exit 1 -fi - -SRCTARGET=$MOZ_PARSER_PATH/javasrc - -rm -rf $SRCTARGET -mkdir $SRCTARGET -# Avoid copying the .svn directory: -cp -rv $SRCDIR/*.java $SRCTARGET diff --git a/mozilla-export-scripts/export-translator.sh b/mozilla-export-scripts/export-translator.sh deleted file mode 100644 index d1f4f1c3..00000000 --- a/mozilla-export-scripts/export-translator.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -LIBDIR=`abs $SCRIPT_DIR/../translator-lib` - -if [ $# -eq 1 ] -then - MOZ_PARSER_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/mozilla-central/parser/html" - echo "Note that relative paths will work just fine." - echo "Be sure that you have run `dirname $0`/make-translator-jar.sh before running this script." - echo - exit 1 -fi - -LIBTARGET=$MOZ_PARSER_PATH/javalib - -rm -rf $LIBTARGET -cp -rv $LIBDIR $LIBTARGET diff --git a/mozilla-export-scripts/make-translator-jar.sh b/mozilla-export-scripts/make-translator-jar.sh deleted file mode 100644 index 4f21ae66..00000000 --- a/mozilla-export-scripts/make-translator-jar.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env sh - -SCRIPT_DIR=`dirname $0` -source $SCRIPT_DIR/util.sh -SCRIPT_DIR=`abs $SCRIPT_DIR` - -SRCDIR=`abs $SCRIPT_DIR/../translator-src` -BINDIR=`abs $SCRIPT_DIR/../translator-bin` -LIBDIR=`abs $SCRIPT_DIR/../translator-lib` - -if [ $# -eq 1 ] -then - JAVAPARSER_JAR_PATH=`abs $1` -else - echo - echo "Usage: sh `basename $0` /path/to/javaparser-1.0.7.jar" - echo "Note that relative paths will work just fine." - echo "Obtain javaparser-1.0.7.jar from http://code.google.com/p/javaparser" - echo - exit 1 -fi - -set_up() { - rm -rf $BINDIR; mkdir $BINDIR - rm -rf $LIBDIR; mkdir $LIBDIR - cp $JAVAPARSER_JAR_PATH $LIBDIR/javaparser.jar -} - -write_manifest() { - rm -f $LIBDIR/manifest - echo "Main-Class: nu.validator.htmlparser.cpptranslate.Main" > $LIBDIR/manifest - echo "Class-Path: javaparser.jar" >> $LIBDIR/manifest -} - -compile_translator() { - find $SRCDIR -name "*.java" | \ - xargs javac -cp $LIBDIR/javaparser.jar -g -d $BINDIR -} - -generate_jar() { - jar cvfm $LIBDIR/translator.jar $LIBDIR/manifest -C $BINDIR . -} - -clean_up() { - rm -f $LIBDIR/manifest -} - -success_message() { - echo - echo "Successfully generated directory \"$LIBDIR\" with contents:" - echo - ls -al $LIBDIR - echo - echo "Now run `dirname $0`/export-all.sh with no arguments and follow the usage instructions." - echo -} - -set_up && \ - compile_translator && \ - write_manifest && \ - generate_jar && \ - clean_up && \ - success_message diff --git a/mozilla-export-scripts/util.sh b/mozilla-export-scripts/util.sh deleted file mode 100644 index 348ca14f..00000000 --- a/mozilla-export-scripts/util.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env sh - -abs() { - local rel - local p - if [ $# -ne 1 ] - then - rel=. - else - rel=$1 - fi - if [ -d $rel ] - then - pushd $rel > /dev/null - p=`pwd` - popd > /dev/null - else - pushd `dirname $rel` > /dev/null - p=`pwd`/`basename $rel` - popd > /dev/null - fi - echo $p -} diff --git a/pom.xml b/pom.xml index 41f46725..8720018f 100644 --- a/pom.xml +++ b/pom.xml @@ -20,221 +20,140 @@ * DEALINGS IN THE SOFTWARE. --> + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - nu.validator.htmlparser - htmlparser - bundle - 1.4 - htmlparser - http://about.validator.nu/htmlparser/ - The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. - - - hsivonen - Henri Sivonen - hsivonen@iki.fi - http://hsivonen.iki.fi/ - - + + nu.validator.htmlparser + parent + 2.0 + pom + + parent + The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser. + https://about.validator.nu/htmlparser/ The MIT License - http://www.opensource.org/licenses/mit-license.php + https://opensource.org/licenses/mit-license.php repo The (New) BSD License - http://www.opensource.org/licenses/bsd-license.php + https://opensource.org/licenses/bsd-license.php repo + + + + hsivonen + Henri Sivonen + hsivonen@iki.fi + https://hsivonen.fi/ + + + + + htmlparser + saxtree + xom + + - scm:hg:http://hg.mozilla.org/projects/htmlparser/ - http://hg.mozilla.org/projects/htmlparser/ + scm:git:https://github.com/validator/htmlparser.git + https://github.com/validator/htmlparser - - ${project.build.directory}/src - ${basedir}/test-src - - - org.apache.maven.plugins - maven-compiler-plugin - - 1.5 - 1.5 - - - - maven-antrun-plugin - 1.7 - - - com.sun - tools - 1.5.0 - system - ${java.home}/../lib/tools.jar - - - - - intitialize-sources - initialize - - run - - - - - - - - - - - - - tokenizer-hotspot-workaround - process-sources - - run - - - - - - - - - - - - - - - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - true - - - - org.apache.felix - maven-bundle-plugin - 2.3.7 - true - - - false - - - ${project.name} - nu.validator.htmlparser - ${project.version} - J2SE-1.5 - <_removeheaders>Built-By,Bnd-LastModified - - - - - org.codehaus.mojo - rpm-maven-plugin - - 1 - The MIT License - Development/Java - /var/tmp/${project.build.finalName} - - _javadir ${rpm.java.dir} - _javadocdir ${rpm.javadoc.dir} - - - - ${rpm.java.dir} - 644 - root - root - - - ${project.build.directory}/${project.build.finalName}.jar - - - - - ${rpm.javadoc.dir}/${project.build.finalName} - 644 - root - root - - - ${project.build.directory}/apidocs - - - - - %__ln_s ${project.build.finalName}.jar %{buildroot}%{_javadir}/${project.name}.jar - - - - - - - com.ibm.icu - icu4j - 4.0.1 - compile - true - - - xom - xom - 1.1 - compile - true - - - net.sourceforge.jchardet - jchardet - 1.0 - compile - true - - - com.sdicons.jsontools - jsontools-core - 1.4 - test - - + - /usr/share/java - /usr/share/javadoc + 11 + true UTF-8 + + + + + nu.validator.htmlparser + htmlparser + 2.0 + + + nu.validator.htmlparser + saxtree + 2.0 + + + nu.validator.htmlparser + xom + 2.0 + + + + + + + + + maven-clean-plugin + 3.1.0 + + + maven-compiler-plugin + 3.8.1 + + + base-compile + + compile + + + 8 + + module-info.java + + + + + + + maven-javadoc-plugin + 3.2.0 + + + maven-resources-plugin + 3.2.0 + + + maven-source-plugin + 3.2.1 + + + maven-surefire-plugin + 2.22.2 + + + org.codehaus.mojo + exec-maven-plugin + 3.0.0 + + + + diff --git a/ruby-gcj/DomUtils.java b/ruby-gcj/DomUtils.java deleted file mode 100644 index dc43da83..00000000 --- a/ruby-gcj/DomUtils.java +++ /dev/null @@ -1,36 +0,0 @@ -import java.util.HashSet; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.w3c.dom.Element; - -public class DomUtils { - - private static HashSet pinned_list = new HashSet(); - - public static synchronized void pin(Document d) { - pinned_list.add(d); - } - - public static synchronized void unpin(Document d) { - pinned_list.remove(d); - } - - // return all the text content contained by a single element - public static void getElementContent(Element e, StringBuffer b) { - for (Node n = e.getFirstChild(); n!=null; n=n.getNextSibling()) { - if (n.getNodeType() == n.TEXT_NODE) { - b.append(n.getNodeValue()); - } else if (n.getNodeType() == n.ELEMENT_NODE) { - getElementContent((Element) e, b); - } - } - } - - // replace all child nodes of a given element with a single text element - public static void setElementContent(Element e, String s) { - while (e.hasChildNodes()) { - e.removeChild(e.getFirstChild()); - } - e.appendChild(e.getOwnerDocument().createTextNode(s)); - } -} diff --git a/ruby-gcj/README b/ruby-gcj/README deleted file mode 100644 index b368437f..00000000 --- a/ruby-gcj/README +++ /dev/null @@ -1,65 +0,0 @@ -Disclaimer: - - This code is experimental. - - When some people say experimental, they mean "it may not do what it is - intended to do; in fact, it might even wipe out your hard drive". I mean - that too. But I mean something more than that. - - In this case, experimental means that I don't even know what it is intended - to do. I just have a vague vision, and I am trying out various things in - the hopes that one of them will work out. - -Vision: - - My vague vision is that I would like to see HTML 5 be a success. For me to - consider it to be a success, it needs to be a standard, be interoperable, - and be ubiquitous. - - I believe that the Validator.nu parser can be used to bootstrap that - process. It is written in Java. Has been compiled into JavaScript. Has - been translated into C++ based on the Mozilla libraries with the intent of - being included in Firefox. It very closely tracks to the standard. - - For the moment, the effort is on extending that to another language (Ruby) - on a single environment (i.e., Linux). Once that is complete, intent is to - evaluate the results, decide what needs to be changed, and what needs to be - done to support other languages and environments. - - The bar I'm setting for myself isn't just another SWIG generated low level - interface to a DOM, but rather a best of breed interface; which for Ruby - seems to be the one pioneered by Hpricot and adopted by Nokogiri. Success - will mean passing all of the tests from one of those two parsers as well as - all of the HTML5 tests. - -Build instructions: - - You'll need icu4j and chardet jars. If you checked out and ran dldeps you - are already all set: - - svn co http://svn.versiondude.net/whattf/build/trunk/ build - python build/build.py checkout dldeps - - Fedora 11: - - yum install ruby-devel rubygem-rake java-1.5.0-gcj-devel gcc-c++ - - Ubuntu 9.04: - - apt-get install ruby ruby1.8-dev rake gcj g++ - - Also at this time, you need to install a jdk (e.g. sun-java6-jdk), simply - because the javac that comes with gcj doesn't support -sourcepath, and - I haven't spent the time to find a replacement. - - Finally, make sure that libjaxp1.3-java is *not* installed. - - http://gcc.gnu.org/ml/java/2009-06/msg00055.html - - If this is done, you should be all set. - - cd htmlparser/ruby-gcj - rake test - - If things are successful, the last lines of the output will list the - font attributes and values found in the test/google.html file. diff --git a/ruby-gcj/Rakefile b/ruby-gcj/Rakefile deleted file mode 100644 index 7b518025..00000000 --- a/ruby-gcj/Rakefile +++ /dev/null @@ -1,77 +0,0 @@ -deps = ENV['deps'] || '../../dependencies' -icu4j = "#{deps}/icu4j-4_0.jar" -chardet = "#{deps}/mozilla/intl/chardet/java/dist/lib/chardet.jar" -libgcj = Dir['/usr/share/java/libgcj*.jar'].grep(/gcj[-\d.]*jar$/).sort.last - -task :default => %w(headers libs Makefile validator.so) - -# headers - -hdb = 'nu/validator/htmlparser/dom/HtmlDocumentBuilder' -task :headers => %W(headers/DomUtils.h headers/#{hdb}.h) - -file 'headers/DomUtils.h' => 'DomUtils.java' do |t| - mkdir_p %w(classes headers), :verbose => false - sh "javac -d classes #{t.prerequisites.first}" - sh "gcjh -force -o #{t.name} -cp #{libgcj}:classes DomUtils" -end - -file "headers/#{hdb}.h" => "../src/#{hdb}.java" do |t| - mkdir_p %w(classes headers), :verbose => false - sh "javac -cp #{icu4j}:#{chardet} -d classes -sourcepath ../src " + - t.prerequisites.first - sh "gcjh -force -cp classes -o #{t.name} -cp #{libgcj}:classes " + - hdb.gsub('/','.') -end - -# libs - -task :libs => %w(htmlparser chardet icu).map {|name| "lib/libnu-#{name}.so"} - -htmlparser = Dir['../src/**/*.java'].reject {|name| name.include? '/xom/'} -file 'lib/libnu-htmlparser.so' => htmlparser + ['DomUtils.java'] do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared --classpath=#{icu4j}:#{chardet} -fPIC " + - "-o #{t.name} #{t.prerequisites.join(' ')}" -end - -file 'lib/libnu-chardet.so' => chardet do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" -end - -file 'lib/libnu-icu.so' => icu4j do |t| - mkdir_p 'lib', :verbose => false - sh "gcj -shared -fPIC -o #{t.name} #{t.prerequisites.join(' ')}" -end - -# module - -file 'Makefile' do - sh "ruby extconf.rb --with-gcj=#{libgcj}" -end - -file 'validator.so' => %w(Makefile validator.cpp headers/DomUtils.h) do - system 'make' -end - -file 'nu/validator.so' do - mkdir_p 'nu', :verbose => false - system 'ln -s -t nu ../validator.so' -end - -# tasks - -task :test => [:default, 'nu/validator.so'] do - ENV['LD_LIBRARY_PATH']='lib' - sh 'ruby test/fonts.rb test/google.html' -end - -task :clean do - rm_rf %W(classes lib nu mkmf.log headers/DomUtils.h headers/#{hdb}.h) + - Dir['*.o'] + Dir['*.so'] -end - -task :clobber => :clean do - rm_rf %w(headers Makefile) -end diff --git a/ruby-gcj/extconf.rb b/ruby-gcj/extconf.rb deleted file mode 100644 index 415cf430..00000000 --- a/ruby-gcj/extconf.rb +++ /dev/null @@ -1,45 +0,0 @@ -require 'mkmf' - -# system dependencies -gcj = with_config('gcj', '/usr/share/java/libgcj.jar') - -# headers for JAXP -CONFIG['CC'] = 'g++' -with_cppflags('-xc++') do - - unless find_header('org/w3c/dom/Document.h', 'headers') - - `jar tf #{gcj}`.split.each do |file| - next unless file =~ /\.class$/ - next unless file =~ /^(javax|org)\/(w3c|xml)/ - next if file.include? '$' - - dest = 'headers/' + file.sub(/\.class$/,'.h') - name = file.sub(/\.class$/,'').gsub('/','.') - - next if File.exist? dest - - cmd = "gcjh -cp #{gcj} -o #{dest} #{name}" - puts cmd - break unless system cmd - system "ruby -pi -e '$_.sub!(/namespace namespace$/," + - "\"namespace namespace$\")' #{dest}" - system "ruby -pi -e '$_.sub!(/::namespace::/," + - "\"::namespace$::\")' #{dest}" - end - - exit unless find_header('org/w3c/dom/Document.h', 'headers') - end - - find_header 'nu/validator/htmlparser/dom/HtmlDocumentBuilder.h', 'headers' -end - -# Java libraries -Config::CONFIG['CC'] = 'g++ -shared' -dir_config('nu-htmlparser', nil, 'lib') -have_library 'nu-htmlparser' -have_library 'nu-icu' -have_library 'nu-chardet' - -# Ruby library -create_makefile 'nu/validator' diff --git a/ruby-gcj/test/domencoding.rb b/ruby-gcj/test/domencoding.rb deleted file mode 100644 index 1beb94c1..00000000 --- a/ruby-gcj/test/domencoding.rb +++ /dev/null @@ -1,5 +0,0 @@ -require 'nu/validator' - -ARGV.each do |arg| - puts Nu::Validator::parse(open(arg)).root.name -end diff --git a/ruby-gcj/test/fonts.rb b/ruby-gcj/test/fonts.rb deleted file mode 100644 index 595e3ae0..00000000 --- a/ruby-gcj/test/fonts.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'nu/validator' -require 'open-uri' - -ARGV.each do |arg| - doc = Nu::Validator::parse(open(arg)) - doc.xpath("//*[local-name()='font']").each do |font| - font.attributes.each do |name, attr| - puts "#{name} => #{attr.value}" - end - end -end diff --git a/ruby-gcj/test/google.html b/ruby-gcj/test/google.html deleted file mode 100644 index 8d2183b2..00000000 --- a/ruby-gcj/test/google.html +++ /dev/null @@ -1,10 +0,0 @@ -Google



     
      Advanced Search
      Preferences
      Language Tools

    Find an opportunity to volunteer in your community today.


    Advertising Programs - Business Solutions - About Google

    ©2009 - Privacy

    \ No newline at end of file diff --git a/ruby-gcj/test/greek.xml b/ruby-gcj/test/greek.xml deleted file mode 100644 index a14d23eb..00000000 --- a/ruby-gcj/test/greek.xml +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/ruby-gcj/validator.cpp b/ruby-gcj/validator.cpp deleted file mode 100644 index aadd24ab..00000000 --- a/ruby-gcj/validator.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h" - -#include "DomUtils.h" - -#include "ruby.h" - -using namespace java::io; -using namespace java::lang; -using namespace java::util; -using namespace javax::xml::parsers; -using namespace javax::xml::xpath; -using namespace nu::validator::htmlparser::dom; -using namespace org::w3c::dom; -using namespace org::xml::sax; - -static VALUE jaxp_Document; -static VALUE jaxp_Attr; -static VALUE jaxp_Element; -static ID ID_read; -static ID ID_doc; -static ID ID_element; - -// convert a Java string into a Ruby string -static VALUE j2r(String *string) { - if (string == NULL) return Qnil; - jint len = JvGetStringUTFLength(string); - char buf[len]; - JvGetStringUTFRegion(string, 0, len, buf); - return rb_str_new(buf, len); -} - -// convert a Ruby string into a Java string -static String *r2j(VALUE string) { - return JvNewStringUTF(RSTRING(string)->ptr); -} - -// release the Java Document associated with this Ruby Document -static void vnu_document_free(Document *doc) { - DomUtils::unpin(doc); -} - -// Nu::Validator::parse( string|file ) -static VALUE vnu_parse(VALUE self, VALUE input) { - HtmlDocumentBuilder *parser = new HtmlDocumentBuilder(); - - // read file-like objects into memory. TODO: buffer such objects - if (rb_respond_to(input, ID_read)) - input = rb_funcall(input, ID_read, 0); - - // convert input in to a ByteArrayInputStream - jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); - memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); - InputSource *source = new InputSource(new ByteArrayInputStream(bytes)); - - // parse, pin, and wrap - Document *doc = parser->parse(source); - DomUtils::pin(doc); - return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); -} - -// Jaxp::parse( string|file ) -static VALUE jaxp_parse(VALUE self, VALUE input) { - DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance(); - DocumentBuilder *parser = factory->newDocumentBuilder(); - - // read file-like objects into memory. TODO: buffer such objects - if (rb_respond_to(input, ID_read)) - input = rb_funcall(input, ID_read, 0); - - try { - jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); - memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); - Document *doc = parser->parse(new ByteArrayInputStream(bytes)); - DomUtils::pin(doc); - return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); - } catch (java::lang::Throwable *ex) { - ex->printStackTrace(); - return Qnil; - } -} - - -// Nu::Validator::Document#encoding -static VALUE jaxp_document_encoding(VALUE rdoc) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - return j2r(jdoc->getXmlEncoding()); -} - -// Nu::Validator::Document#root -static VALUE jaxp_document_root(VALUE rdoc) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - - Element *jelement = jdoc->getDocumentElement(); - if (jelement==NULL) return Qnil; - - VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement); - rb_ivar_set(relement, ID_doc, rdoc); - return relement; -} - -// Nu::Validator::Document#xpath -static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) { - Document *jdoc; - Data_Get_Struct(rdoc, Document, jdoc); - - Element *jelement = jdoc->getDocumentElement(); - if (jelement==NULL) return Qnil; - - XPath *xpath = XPathFactory::newInstance()->newXPath(); - XPathExpression *expr = xpath->compile(r2j(path)); - NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET); - - VALUE result = rb_ary_new(); - for (int i=0; igetLength(); i++) { - VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i)); - rb_ivar_set(relement, ID_doc, rdoc); - rb_ary_push(result, relement); - } - return result; -} - -// Nu::Validator::Element#name -static VALUE jaxp_element_name(VALUE relement) { - Element *jelement; - Data_Get_Struct(relement, Element, jelement); - return j2r(jelement->getNodeName()); -} - -// Nu::Validator::Element#attributes -static VALUE jaxp_element_attributes(VALUE relement) { - Element *jelement; - Data_Get_Struct(relement, Element, jelement); - VALUE result = rb_hash_new(); - NamedNodeMap *map = jelement->getAttributes(); - for (int i=0; igetLength(); i++) { - Attr *jattr = (Attr *) map->item(i); - VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr); - rb_ivar_set(rattr, ID_element, relement); - rb_hash_aset(result, j2r(jattr->getName()), rattr); - } - return result; -} - -// Nu::Validator::Attribute#value -static VALUE jaxp_attribute_value(VALUE rattribute) { - Attr *jattribute; - Data_Get_Struct(rattribute, Attr, jattribute); - return j2r(jattribute->getValue()); -} - -typedef VALUE (ruby_method)(...); - -// Nu::Validator module initialization -extern "C" void Init_validator() { - JvCreateJavaVM(NULL); - JvAttachCurrentThread(NULL, NULL); - JvInitClass(&DomUtils::class$); - JvInitClass(&XPathFactory::class$); - JvInitClass(&XPathConstants::class$); - - VALUE jaxp = rb_define_module("Jaxp"); - rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1); - - VALUE nu = rb_define_module("Nu"); - VALUE validator = rb_define_module_under(nu, "Validator"); - rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1); - - jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject); - rb_define_method(jaxp_Document, "encoding", - (ruby_method*)&jaxp_document_encoding, 0); - rb_define_method(jaxp_Document, "root", - (ruby_method*)&jaxp_document_root, 0); - rb_define_method(jaxp_Document, "xpath", - (ruby_method*)&jaxp_document_xpath, 1); - - jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject); - rb_define_method(jaxp_Element, "name", - (ruby_method*)&jaxp_element_name, 0); - rb_define_method(jaxp_Element, "attributes", - (ruby_method*)&jaxp_element_attributes, 0); - - jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject); - rb_define_method(jaxp_Attr, "value", - (ruby_method*)&jaxp_attribute_value, 0); - - ID_read = rb_intern("read"); - ID_doc = rb_intern("@doc"); - ID_element = rb_intern("@element"); -} diff --git a/super/nu/validator/htmlparser/translatable/java/io/IOException.java b/saxtree/pom.xml similarity index 68% rename from super/nu/validator/htmlparser/translatable/java/io/IOException.java rename to saxtree/pom.xml index f323f1e3..fe31a185 100644 --- a/super/nu/validator/htmlparser/translatable/java/io/IOException.java +++ b/saxtree/pom.xml @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2009 Mozilla Foundation + + + 4.0.0 -package java.io; + + nu.validator.htmlparser + parent + 2.0 + -public class IOException extends Exception { + saxtree - public IOException() { - } - - public IOException(String arg0) { - super(arg0); - } - - public IOException(Throwable arg0) { - super(arg0); - } - - public IOException(String arg0, Throwable arg1) { - super(arg0, arg1); - } - -} + saxtree + diff --git a/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java b/saxtree/src/main/java/module-info.java similarity index 58% rename from gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java rename to saxtree/src/main/java/module-info.java index 43235c5b..077e6379 100644 --- a/gwt-src/nu/validator/htmlparser/gwt/ParseEndListener.java +++ b/saxtree/src/main/java/module-info.java @@ -1,46 +1,30 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.gwt; - -import com.google.gwt.core.client.JavaScriptObject; - -public class ParseEndListener { - - private final JavaScriptObject callback; - - /** - * @param callback - */ - public ParseEndListener(JavaScriptObject callback) { - this.callback = callback; - } - - public void parseComplete() { - call(callback); - } - - private static native void call(JavaScriptObject callback) /*-{ - callback(); - }-*/; - -} +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides SAX Tree: a tree model optimized for creation from SAX events and replay as SAX events. + */ +module nu.validator.saxtree { + requires transitive java.xml; + + exports nu.validator.saxtree; +} diff --git a/src/nu/validator/saxtree/CDATA.java b/saxtree/src/main/java/nu/validator/saxtree/CDATA.java similarity index 100% rename from src/nu/validator/saxtree/CDATA.java rename to saxtree/src/main/java/nu/validator/saxtree/CDATA.java diff --git a/src/nu/validator/saxtree/CharBufferNode.java b/saxtree/src/main/java/nu/validator/saxtree/CharBufferNode.java similarity index 100% rename from src/nu/validator/saxtree/CharBufferNode.java rename to saxtree/src/main/java/nu/validator/saxtree/CharBufferNode.java diff --git a/src/nu/validator/saxtree/Characters.java b/saxtree/src/main/java/nu/validator/saxtree/Characters.java similarity index 100% rename from src/nu/validator/saxtree/Characters.java rename to saxtree/src/main/java/nu/validator/saxtree/Characters.java diff --git a/src/nu/validator/saxtree/Comment.java b/saxtree/src/main/java/nu/validator/saxtree/Comment.java similarity index 100% rename from src/nu/validator/saxtree/Comment.java rename to saxtree/src/main/java/nu/validator/saxtree/Comment.java diff --git a/src/nu/validator/saxtree/DTD.java b/saxtree/src/main/java/nu/validator/saxtree/DTD.java similarity index 100% rename from src/nu/validator/saxtree/DTD.java rename to saxtree/src/main/java/nu/validator/saxtree/DTD.java diff --git a/src/nu/validator/saxtree/Document.java b/saxtree/src/main/java/nu/validator/saxtree/Document.java similarity index 100% rename from src/nu/validator/saxtree/Document.java rename to saxtree/src/main/java/nu/validator/saxtree/Document.java diff --git a/src/nu/validator/saxtree/DocumentFragment.java b/saxtree/src/main/java/nu/validator/saxtree/DocumentFragment.java similarity index 95% rename from src/nu/validator/saxtree/DocumentFragment.java rename to saxtree/src/main/java/nu/validator/saxtree/DocumentFragment.java index a9e2db84..9f496bf3 100644 --- a/src/nu/validator/saxtree/DocumentFragment.java +++ b/saxtree/src/main/java/nu/validator/saxtree/DocumentFragment.java @@ -23,7 +23,7 @@ package nu.validator.saxtree; -import nu.validator.htmlparser.impl.LocatorImpl; +import org.xml.sax.ext.Locator2Impl; /** * A document fragment. @@ -37,7 +37,7 @@ public final class DocumentFragment extends ParentNode { * The constructor. */ public DocumentFragment() { - super(new LocatorImpl()); + super(new Locator2Impl()); } /** diff --git a/src/nu/validator/saxtree/Element.java b/saxtree/src/main/java/nu/validator/saxtree/Element.java similarity index 100% rename from src/nu/validator/saxtree/Element.java rename to saxtree/src/main/java/nu/validator/saxtree/Element.java diff --git a/src/nu/validator/saxtree/Entity.java b/saxtree/src/main/java/nu/validator/saxtree/Entity.java similarity index 100% rename from src/nu/validator/saxtree/Entity.java rename to saxtree/src/main/java/nu/validator/saxtree/Entity.java diff --git a/src/nu/validator/saxtree/IgnorableWhitespace.java b/saxtree/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java similarity index 100% rename from src/nu/validator/saxtree/IgnorableWhitespace.java rename to saxtree/src/main/java/nu/validator/saxtree/IgnorableWhitespace.java diff --git a/src/nu/validator/saxtree/LocatorImpl.java b/saxtree/src/main/java/nu/validator/saxtree/LocatorImpl.java similarity index 98% rename from src/nu/validator/saxtree/LocatorImpl.java rename to saxtree/src/main/java/nu/validator/saxtree/LocatorImpl.java index b6416f1a..6a1f81ba 100644 --- a/src/nu/validator/saxtree/LocatorImpl.java +++ b/saxtree/src/main/java/nu/validator/saxtree/LocatorImpl.java @@ -31,7 +31,7 @@ * @version $Id$ * @author hsivonen */ -public final class LocatorImpl implements Locator, Locator2 { +public final class LocatorImpl implements Locator2 { /** * The system id. diff --git a/src/nu/validator/saxtree/Node.java b/saxtree/src/main/java/nu/validator/saxtree/Node.java similarity index 99% rename from src/nu/validator/saxtree/Node.java rename to saxtree/src/main/java/nu/validator/saxtree/Node.java index c9292dd2..1ead08b4 100644 --- a/src/nu/validator/saxtree/Node.java +++ b/saxtree/src/main/java/nu/validator/saxtree/Node.java @@ -35,7 +35,7 @@ * @version $Id$ * @author hsivonen */ -public abstract class Node implements Locator, Locator2 { +public abstract class Node implements Locator2 { /** * The system id. diff --git a/src/nu/validator/saxtree/NodeType.java b/saxtree/src/main/java/nu/validator/saxtree/NodeType.java similarity index 100% rename from src/nu/validator/saxtree/NodeType.java rename to saxtree/src/main/java/nu/validator/saxtree/NodeType.java diff --git a/src/nu/validator/saxtree/NullLexicalHandler.java b/saxtree/src/main/java/nu/validator/saxtree/NullLexicalHandler.java similarity index 100% rename from src/nu/validator/saxtree/NullLexicalHandler.java rename to saxtree/src/main/java/nu/validator/saxtree/NullLexicalHandler.java diff --git a/src/nu/validator/saxtree/ParentNode.java b/saxtree/src/main/java/nu/validator/saxtree/ParentNode.java similarity index 100% rename from src/nu/validator/saxtree/ParentNode.java rename to saxtree/src/main/java/nu/validator/saxtree/ParentNode.java diff --git a/src/nu/validator/saxtree/PrefixMapping.java b/saxtree/src/main/java/nu/validator/saxtree/PrefixMapping.java similarity index 100% rename from src/nu/validator/saxtree/PrefixMapping.java rename to saxtree/src/main/java/nu/validator/saxtree/PrefixMapping.java diff --git a/src/nu/validator/saxtree/ProcessingInstruction.java b/saxtree/src/main/java/nu/validator/saxtree/ProcessingInstruction.java similarity index 100% rename from src/nu/validator/saxtree/ProcessingInstruction.java rename to saxtree/src/main/java/nu/validator/saxtree/ProcessingInstruction.java diff --git a/src/nu/validator/saxtree/SkippedEntity.java b/saxtree/src/main/java/nu/validator/saxtree/SkippedEntity.java similarity index 100% rename from src/nu/validator/saxtree/SkippedEntity.java rename to saxtree/src/main/java/nu/validator/saxtree/SkippedEntity.java diff --git a/src/nu/validator/saxtree/TreeBuilder.java b/saxtree/src/main/java/nu/validator/saxtree/TreeBuilder.java similarity index 100% rename from src/nu/validator/saxtree/TreeBuilder.java rename to saxtree/src/main/java/nu/validator/saxtree/TreeBuilder.java diff --git a/src/nu/validator/saxtree/TreeParser.java b/saxtree/src/main/java/nu/validator/saxtree/TreeParser.java similarity index 99% rename from src/nu/validator/saxtree/TreeParser.java rename to saxtree/src/main/java/nu/validator/saxtree/TreeParser.java index 6f86f7f0..29792f84 100644 --- a/src/nu/validator/saxtree/TreeParser.java +++ b/saxtree/src/main/java/nu/validator/saxtree/TreeParser.java @@ -35,7 +35,7 @@ * @version $Id$ * @author hsivonen */ -public final class TreeParser implements Locator, Locator2 { +public final class TreeParser implements Locator2 { /** * The content handler. diff --git a/src/nu/validator/htmlparser/impl/package.html b/saxtree/src/main/java/nu/validator/saxtree/package-info.java similarity index 52% rename from src/nu/validator/htmlparser/impl/package.html rename to saxtree/src/main/java/nu/validator/saxtree/package-info.java index 6d029a13..54af5a4c 100644 --- a/src/nu/validator/htmlparser/impl/package.html +++ b/saxtree/src/main/java/nu/validator/saxtree/package-info.java @@ -1,7 +1,4 @@ - - -Package Overview - - - -

    This package contains the bulk of parser internals. Only implementors of -additional tree builders or token handlers should look here.

    - - \ No newline at end of file + */ + +/** + * This package provides SAX Tree: a tree model optimized for creation from SAX events and replay as SAX events. + * + *

    Design Principles

    + *
      + *
    1. Preserve information exposed through ContentHandler, + * LexicalHandler and Locator. + *
    2. Creation from SAX events or as part of the parse of a conforming + * HTML5 document should be fast. + *
    3. Emitting SAX events based on the tree should be fast. + *
    4. Mutations should be possible but should not make the above "fast" cases slower. + *
    5. Concurrent reads should work without locking when there are no concurrent mutations. + *
    6. The user of the API has the responsibility of using the API properly: + * for the sake of performance, the model does not check if it is being used properly. + * Improper use may, therefore, put the model in and inconsistent state. + *
    + */ +package nu.validator.saxtree; diff --git a/src/nu/validator/htmlparser/annotation/package.html b/src/nu/validator/htmlparser/annotation/package.html deleted file mode 100644 index af15d382..00000000 --- a/src/nu/validator/htmlparser/annotation/package.html +++ /dev/null @@ -1,30 +0,0 @@ - - -Package Overview - - - -

    This package provides annotations for facilitating automated translation -of the source code into other programming languages.

    - - \ No newline at end of file diff --git a/src/nu/validator/htmlparser/dom/package.html b/src/nu/validator/htmlparser/dom/package.html deleted file mode 100644 index d793bcf8..00000000 --- a/src/nu/validator/htmlparser/dom/package.html +++ /dev/null @@ -1,29 +0,0 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document using the DOM API.

    - - \ No newline at end of file diff --git a/src/nu/validator/htmlparser/extra/ChardetSniffer.java b/src/nu/validator/htmlparser/extra/ChardetSniffer.java deleted file mode 100644 index a7575039..00000000 --- a/src/nu/validator/htmlparser/extra/ChardetSniffer.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.extra; - -import java.io.IOException; -import java.nio.charset.UnsupportedCharsetException; - -import nu.validator.htmlparser.io.Encoding; - -import org.mozilla.intl.chardet.nsDetector; -import org.mozilla.intl.chardet.nsICharsetDetectionObserver; -import org.mozilla.intl.chardet.nsPSMDetector; - -import com.ibm.icu.text.CharsetDetector; - -public class ChardetSniffer implements nsICharsetDetectionObserver { - - private final byte[] source; - - private final int length; - - private Encoding returnValue = null; - - /** - * @param source - */ - public ChardetSniffer(final byte[] source, final int length) { - this.source = source; - this.length = length; - } - - public Encoding sniff() throws IOException { - nsDetector detector = new nsDetector(nsPSMDetector.ALL); - detector.Init(this); - detector.DoIt(source, length, false); - detector.DataEnd(); - if (returnValue != null && returnValue != Encoding.WINDOWS1252 && returnValue.isAsciiSuperset()) { - return returnValue; - } else { - return null; - } - } - - public static void main(String[] args) { - String[] detectable = CharsetDetector.getAllDetectableCharsets(); - for (int i = 0; i < detectable.length; i++) { - String charset = detectable[i]; - System.out.println(charset); - } - } - - public void Notify(String charsetName) { - try { - Encoding enc = Encoding.forName(charsetName); - Encoding actual = enc.getActualHtmlEncoding(); - if (actual != null) { - enc = actual; - } - returnValue = enc; - } catch (UnsupportedCharsetException e) { - returnValue = null; - } - } -} diff --git a/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java b/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java deleted file mode 100644 index f3caab5c..00000000 --- a/src/nu/validator/htmlparser/extra/IcuDetectorSniffer.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.extra; - -import java.io.IOException; -import java.io.InputStream; - -import nu.validator.htmlparser.common.ByteReadable; -import nu.validator.htmlparser.io.Encoding; - -import com.ibm.icu.text.CharsetDetector; -import com.ibm.icu.text.CharsetMatch; - -public class IcuDetectorSniffer extends InputStream { - - private final ByteReadable source; - - /** - * @param source - */ - public IcuDetectorSniffer(final ByteReadable source) { - this.source = source; - } - - @Override - public int read() throws IOException { - return source.readByte(); - } - - public Encoding sniff() throws IOException { - try { - CharsetDetector detector = new CharsetDetector(); - detector.setText(this); - CharsetMatch match = detector.detect(); - Encoding enc = Encoding.forName(match.getName()); - Encoding actual = enc.getActualHtmlEncoding(); - if (actual != null) { - enc = actual; - } - if (enc != Encoding.WINDOWS1252 && enc.isAsciiSuperset()) { - return enc; - } else { - return null; - } - } catch (Exception e) { - return null; - } - } - - public static void main(String[] args) { - String[] detectable = CharsetDetector.getAllDetectableCharsets(); - for (int i = 0; i < detectable.length; i++) { - String charset = detectable[i]; - System.out.println(charset); - } - } -} diff --git a/src/nu/validator/htmlparser/extra/NormalizationChecker.java b/src/nu/validator/htmlparser/extra/NormalizationChecker.java deleted file mode 100644 index 45df62fb..00000000 --- a/src/nu/validator/htmlparser/extra/NormalizationChecker.java +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright (c) 2006, 2007 Henri Sivonen - * Copyright (c) 2007 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.extra; - -import nu.validator.htmlparser.common.CharacterHandler; - -import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.UnicodeSet; - -/** - * @version $Id$ - * @author hsivonen - */ -public final class NormalizationChecker implements CharacterHandler { - - private ErrorHandler errorHandler; - - private Locator locator; - - /** - * A thread-safe set of composing characters as per Charmod Norm. - */ - @SuppressWarnings("deprecation") - private static final UnicodeSet COMPOSING_CHARACTERS = (UnicodeSet) new UnicodeSet( - "[[:nfc_qc=maybe:][:^ccc=0:]]").freeze(); - - // see http://sourceforge.net/mailarchive/message.php?msg_id=37279908 - - /** - * A buffer for holding sequences overlap the SAX buffer boundary. - */ - private char[] buf = new char[128]; - - /** - * A holder for the original buffer (for the memory leak prevention - * mechanism). - */ - private char[] bufHolder = null; - - /** - * The current used length of the buffer, i.e. the index of the first slot - * that does not hold current data. - */ - private int pos; - - /** - * Indicates whether the checker the next call to characters() - * is the first call in a run. - */ - private boolean atStartOfRun; - - /** - * Indicates whether the current run has already caused an error. - */ - private boolean alreadyComplainedAboutThisRun; - - /** - * Emit an error. The locator is used. - * - * @param message the error message - * @throws SAXException if something goes wrong - */ - public void err(String message) throws SAXException { - if (errorHandler != null) { - SAXParseException spe = new SAXParseException(message, locator); - errorHandler.error(spe); - } - } - - /** - * Returns true if the argument is a composing BMP character - * or a surrogate and false otherwise. - * - * @param c a UTF-16 code unit - * @return true if the argument is a composing BMP character - * or a surrogate and false otherwise - */ - private static boolean isComposingCharOrSurrogate(char c) { - if (UCharacter.isHighSurrogate(c) || UCharacter.isLowSurrogate(c)) { - return true; - } - return isComposingChar(c); - } - - /** - * Returns true if the argument is a composing character - * and false otherwise. - * - * @param c a Unicode code point - * @return true if the argument is a composing character - * false otherwise - */ - private static boolean isComposingChar(int c) { - return COMPOSING_CHARACTERS.contains(c); - } - - /** - * Constructor with mode selection. - * - * @param sourceTextMode whether the source text-related messages - * should be enabled. - */ - public NormalizationChecker(Locator locator) { - super(); - start(); - } - - /** - * @see nu.validator.htmlparser.common.CharacterHandler#start() - */ - public void start() { - atStartOfRun = true; - alreadyComplainedAboutThisRun = false; - pos = 0; - } - - /** - * @see nu.validator.htmlparser.common.CharacterHandler#characters(char[], int, int) - */ - public void characters(char[] ch, int start, int length) - throws SAXException { - if (alreadyComplainedAboutThisRun) { - return; - } - if (atStartOfRun) { - char c = ch[start]; - if (pos == 1) { - // there's a single high surrogate in buf - if (isComposingChar(UCharacter.getCodePoint(buf[0], c))) { - err("Text run starts with a composing character."); - } - atStartOfRun = false; - } else { - if (length == 1 && UCharacter.isHighSurrogate(c)) { - buf[0] = c; - pos = 1; - return; - } else { - if (UCharacter.isHighSurrogate(c)) { - if (isComposingChar(UCharacter.getCodePoint(c, - ch[start + 1]))) { - err("Text run starts with a composing character."); - } - } else { - if (isComposingCharOrSurrogate(c)) { - err("Text run starts with a composing character."); - } - } - atStartOfRun = false; - } - } - } - int i = start; - int stop = start + length; - if (pos > 0) { - // there's stuff in buf - while (i < stop && isComposingCharOrSurrogate(ch[i])) { - i++; - } - appendToBuf(ch, start, i); - if (i == stop) { - return; - } else { - if (!Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { - errAboutTextRun(); - } - pos = 0; - } - } - if (i < stop) { - start = i; - i = stop - 1; - while (i > start && isComposingCharOrSurrogate(ch[i])) { - i--; - } - if (i > start) { - if (!Normalizer.isNormalized(ch, start, i, Normalizer.NFC, 0)) { - errAboutTextRun(); - } - } - appendToBuf(ch, i, stop); - } - } - - /** - * Emits an error stating that the current text run or the source - * text is not in NFC. - * - * @throws SAXException if the ErrorHandler throws - */ - private void errAboutTextRun() throws SAXException { - err("Source text is not in Unicode Normalization Form C."); - alreadyComplainedAboutThisRun = true; - } - - /** - * Appends a slice of an UTF-16 code unit array to the internal - * buffer. - * - * @param ch the array from which to copy - * @param start the index of the first element that is copied - * @param end the index of the first element that is not copied - */ - private void appendToBuf(char[] ch, int start, int end) { - if (start == end) { - return; - } - int neededBufLen = pos + (end - start); - if (neededBufLen > buf.length) { - char[] newBuf = new char[neededBufLen]; - System.arraycopy(buf, 0, newBuf, 0, pos); - if (bufHolder == null) { - bufHolder = buf; // keep the original around - } - buf = newBuf; - } - System.arraycopy(ch, start, buf, pos, end - start); - pos += (end - start); - } - - /** - * @see nu.validator.htmlparser.common.CharacterHandler#end() - */ - public void end() throws SAXException { - if (!alreadyComplainedAboutThisRun - && !Normalizer.isNormalized(buf, 0, pos, Normalizer.NFC, 0)) { - errAboutTextRun(); - } - if (bufHolder != null) { - // restore the original small buffer to avoid leaking - // memory if this checker is recycled - buf = bufHolder; - bufHolder = null; - } - } - - public void setErrorHandler(ErrorHandler errorHandler) { - this.errorHandler = errorHandler; - } - -} diff --git a/src/nu/validator/htmlparser/sax/package.html b/src/nu/validator/htmlparser/sax/package.html deleted file mode 100644 index 60532962..00000000 --- a/src/nu/validator/htmlparser/sax/package.html +++ /dev/null @@ -1,29 +0,0 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document through the SAX API.

    - - \ No newline at end of file diff --git a/src/nu/validator/htmlparser/xom/package.html b/src/nu/validator/htmlparser/xom/package.html deleted file mode 100644 index a936d5e3..00000000 --- a/src/nu/validator/htmlparser/xom/package.html +++ /dev/null @@ -1,29 +0,0 @@ - - -Package Overview - - - -

    This package provides an HTML5 parser that exposes the document through the XOM API.

    - - \ No newline at end of file diff --git a/src/nu/validator/saxtree/package.html b/src/nu/validator/saxtree/package.html deleted file mode 100644 index 0c34dad8..00000000 --- a/src/nu/validator/saxtree/package.html +++ /dev/null @@ -1,46 +0,0 @@ - - -Package Overview - - - -

    This package provides SAX Tree: a tree model optimized for creation from SAX -events and replay as SAX events.

    -

    Design Principles

    -
      -
    1. Preserve information exposed through ContentHandler, -LexicalHandler and Locator. -
    2. Creation from SAX events or as part of the parse of a conforming -HTML5 document should be fast.
    3. -
    4. Emitting SAX events based on the tree should be fast.
    5. -
    6. Mutations should be possible but should not make the above -"fast" cases slower.
    7. -
    8. Concurrent reads should work without locking when there are no -concurrent mutations.
    9. -
    10. The user of the API has the responsibility of using the API properly: -for the sake of performance, the model does not check if it is being -used properly. Improper use may, therefore, put the model in and -inconsistent state.
    11. -
    - - \ No newline at end of file diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java deleted file mode 100644 index b25432d4..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/Attributes.java +++ /dev/null @@ -1,257 +0,0 @@ -// Attributes.java - attribute list with Namespace support -// http://www.saxproject.org -// Written by David Megginson -// NO WARRANTY! This class is in the public domain. -// $Id: Attributes.java,v 1.13 2004/03/18 12:28:05 dmegginson Exp $ - -package org.xml.sax; - - -/** - * Interface for a list of XML attributes. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This interface allows access to a list of attributes in - * three different ways:

    - * - *
      - *
    1. by attribute index;
    2. - *
    3. by Namespace-qualified name; or
    4. - *
    5. by qualified (prefixed) name.
    6. - *
    - * - *

    The list will not contain attributes that were declared - * #IMPLIED but not specified in the start tag. It will also not - * contain attributes used as Namespace declarations (xmlns*) unless - * the http://xml.org/sax/features/namespace-prefixes - * feature is set to true (it is false by - * default). - * Because SAX2 conforms to the original "Namespaces in XML" - * recommendation, it normally does not - * give namespace declaration attributes a namespace URI. - *

    - * - *

    Some SAX2 parsers may support using an optional feature flag - * (http://xml.org/sax/features/xmlns-uris) to request - * that those attributes be given URIs, conforming to a later - * backwards-incompatible revision of that recommendation. (The - * attribute's "local name" will be the prefix, or "xmlns" when - * defining a default element namespace.) For portability, handler - * code should always resolve that conflict, rather than requiring - * parsers that can change the setting of that feature flag.

    - * - *

    If the namespace-prefixes feature (see above) is - * false, access by qualified name may not be available; if - * the http://xml.org/sax/features/namespaces feature is - * false, access by Namespace-qualified names may not be - * available.

    - * - *

    This interface replaces the now-deprecated SAX1 {@link - * org.xml.sax.AttributeList AttributeList} interface, which does not - * contain Namespace support. In addition to Namespace support, it - * adds the getIndex methods (below).

    - * - *

    The order of attributes in the list is unspecified, and will - * vary from implementation to implementation.

    - * - * @since SAX 2.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.helpers.AttributesImpl - * @see org.xml.sax.ext.DeclHandler#attributeDecl - */ -public interface Attributes -{ - - - //////////////////////////////////////////////////////////////////// - // Indexed access. - //////////////////////////////////////////////////////////////////// - - - /** - * Return the number of attributes in the list. - * - *

    Once you know the number of attributes, you can iterate - * through the list.

    - * - * @return The number of attributes in the list. - * @see #getURI(int) - * @see #getLocalName(int) - * @see #getQName(int) - * @see #getType(int) - * @see #getValue(int) - */ - public abstract int getLength (); - - - /** - * Look up an attribute's Namespace URI by index. - * - * @param index The attribute index (zero-based). - * @return The Namespace URI, or the empty string if none - * is available, or null if the index is out of - * range. - * @see #getLength - */ - public abstract String getURI (int index); - - - /** - * Look up an attribute's local name by index. - * - * @param index The attribute index (zero-based). - * @return The local name, or the empty string if Namespace - * processing is not being performed, or null - * if the index is out of range. - * @see #getLength - */ - public abstract String getLocalName (int index); - - - /** - * Look up an attribute's XML qualified (prefixed) name by index. - * - * @param index The attribute index (zero-based). - * @return The XML qualified name, or the empty string - * if none is available, or null if the index - * is out of range. - * @see #getLength - */ - public abstract String getQName (int index); - - - /** - * Look up an attribute's type by index. - * - *

    The attribute type is one of the strings "CDATA", "ID", - * "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", - * or "NOTATION" (always in upper case).

    - * - *

    If the parser has not read a declaration for the attribute, - * or if the parser does not report attribute types, then it must - * return the value "CDATA" as stated in the XML 1.0 Recommendation - * (clause 3.3.3, "Attribute-Value Normalization").

    - * - *

    For an enumerated attribute that is not a notation, the - * parser will report the type as "NMTOKEN".

    - * - * @param index The attribute index (zero-based). - * @return The attribute's type as a string, or null if the - * index is out of range. - * @see #getLength - */ - public abstract String getType (int index); - - - /** - * Look up an attribute's value by index. - * - *

    If the attribute value is a list of tokens (IDREFS, - * ENTITIES, or NMTOKENS), the tokens will be concatenated - * into a single string with each token separated by a - * single space.

    - * - * @param index The attribute index (zero-based). - * @return The attribute's value as a string, or null if the - * index is out of range. - * @see #getLength - */ - public abstract String getValue (int index); - - - - //////////////////////////////////////////////////////////////////// - // Name-based query. - //////////////////////////////////////////////////////////////////// - - - /** - * Look up the index of an attribute by Namespace name. - * - * @param uri The Namespace URI, or the empty string if - * the name has no Namespace URI. - * @param localName The attribute's local name. - * @return The index of the attribute, or -1 if it does not - * appear in the list. - */ - public int getIndex (String uri, String localName); - - - /** - * Look up the index of an attribute by XML qualified (prefixed) name. - * - * @param qName The qualified (prefixed) name. - * @return The index of the attribute, or -1 if it does not - * appear in the list. - */ - public int getIndex (String qName); - - - /** - * Look up an attribute's type by Namespace name. - * - *

    See {@link #getType(int) getType(int)} for a description - * of the possible types.

    - * - * @param uri The Namespace URI, or the empty String if the - * name has no Namespace URI. - * @param localName The local name of the attribute. - * @return The attribute type as a string, or null if the - * attribute is not in the list or if Namespace - * processing is not being performed. - */ - public abstract String getType (String uri, String localName); - - - /** - * Look up an attribute's type by XML qualified (prefixed) name. - * - *

    See {@link #getType(int) getType(int)} for a description - * of the possible types.

    - * - * @param qName The XML qualified name. - * @return The attribute type as a string, or null if the - * attribute is not in the list or if qualified names - * are not available. - */ - public abstract String getType (String qName); - - - /** - * Look up an attribute's value by Namespace name. - * - *

    See {@link #getValue(int) getValue(int)} for a description - * of the possible values.

    - * - * @param uri The Namespace URI, or the empty String if the - * name has no Namespace URI. - * @param localName The local name of the attribute. - * @return The attribute value as a string, or null if the - * attribute is not in the list. - */ - public abstract String getValue (String uri, String localName); - - - /** - * Look up an attribute's value by XML qualified (prefixed) name. - * - *

    See {@link #getValue(int) getValue(int)} for a description - * of the possible values.

    - * - * @param qName The XML qualified name. - * @return The attribute value as a string, or null if the - * attribute is not in the list or if qualified names - * are not available. - */ - public abstract String getValue (String qName); - -} - -// end of Attributes.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java deleted file mode 100644 index 37d25014..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/ErrorHandler.java +++ /dev/null @@ -1,139 +0,0 @@ -// SAX error handler. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: ErrorHandler.java,v 1.10 2004/03/08 13:01:00 dmegginson Exp $ - -package org.xml.sax; - - -/** - * Basic interface for SAX error handlers. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    If a SAX application needs to implement customized error - * handling, it must implement this interface and then register an - * instance with the XML reader using the - * {@link org.xml.sax.XMLReader#setErrorHandler setErrorHandler} - * method. The parser will then report all errors and warnings - * through this interface.

    - * - *

    WARNING: If an application does not - * register an ErrorHandler, XML parsing errors will go unreported, - * except that SAXParseExceptions will be thrown for fatal errors. - * In order to detect validity errors, an ErrorHandler that does something - * with {@link #error error()} calls must be registered.

    - * - *

    For XML processing errors, a SAX driver must use this interface - * in preference to throwing an exception: it is up to the application - * to decide whether to throw an exception for different types of - * errors and warnings. Note, however, that there is no requirement that - * the parser continue to report additional errors after a call to - * {@link #fatalError fatalError}. In other words, a SAX driver class - * may throw an exception after reporting any fatalError. - * Also parsers may throw appropriate exceptions for non-XML errors. - * For example, {@link XMLReader#parse XMLReader.parse()} would throw - * an IOException for errors accessing entities or the document.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1+ (sax2r3pre1) - * @see org.xml.sax.XMLReader#setErrorHandler - * @see org.xml.sax.SAXParseException - */ -public interface ErrorHandler { - - - /** - * Receive notification of a warning. - * - *

    SAX parsers will use this method to report conditions that - * are not errors or fatal errors as defined by the XML - * recommendation. The default behaviour is to take no - * action.

    - * - *

    The SAX parser must continue to provide normal parsing events - * after invoking this method: it should still be possible for the - * application to process the document through to the end.

    - * - *

    Filters may use this method to report other, non-XML warnings - * as well.

    - * - * @param exception The warning information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void warning (SAXParseException exception) - throws SAXException; - - - /** - * Receive notification of a recoverable error. - * - *

    This corresponds to the definition of "error" in section 1.2 - * of the W3C XML 1.0 Recommendation. For example, a validating - * parser would use this callback to report the violation of a - * validity constraint. The default behaviour is to take no - * action.

    - * - *

    The SAX parser must continue to provide normal parsing - * events after invoking this method: it should still be possible - * for the application to process the document through to the end. - * If the application cannot do so, then the parser should report - * a fatal error even if the XML recommendation does not require - * it to do so.

    - * - *

    Filters may use this method to report other, non-XML errors - * as well.

    - * - * @param exception The error information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void error (SAXParseException exception) - throws SAXException; - - - /** - * Receive notification of a non-recoverable error. - * - *

    There is an apparent contradiction between the - * documentation for this method and the documentation for {@link - * org.xml.sax.ContentHandler#endDocument}. Until this ambiguity - * is resolved in a future major release, clients should make no - * assumptions about whether endDocument() will or will not be - * invoked when the parser has reported a fatalError() or thrown - * an exception.

    - * - *

    This corresponds to the definition of "fatal error" in - * section 1.2 of the W3C XML 1.0 Recommendation. For example, a - * parser would use this callback to report the violation of a - * well-formedness constraint.

    - * - *

    The application must assume that the document is unusable - * after the parser has invoked this method, and should continue - * (if at all) only for the sake of collecting additional error - * messages: in fact, SAX parsers are free to stop reporting any - * other events once this method has been invoked.

    - * - * @param exception The error information encapsulated in a - * SAX parse exception. - * @exception org.xml.sax.SAXException Any SAX exception, possibly - * wrapping another exception. - * @see org.xml.sax.SAXParseException - */ - public abstract void fatalError (SAXParseException exception) - throws SAXException; - -} - -// end of ErrorHandler.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java deleted file mode 100644 index f8f3484c..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/Locator.java +++ /dev/null @@ -1,136 +0,0 @@ -// SAX locator interface for document events. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: Locator.java,v 1.8 2002/01/30 21:13:47 dbrownell Exp $ - -package org.xml.sax; - - -/** - * Interface for associating a SAX event with a document location. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    If a SAX parser provides location information to the SAX - * application, it does so by implementing this interface and then - * passing an instance to the application using the content - * handler's {@link org.xml.sax.ContentHandler#setDocumentLocator - * setDocumentLocator} method. The application can use the - * object to obtain the location of any other SAX event - * in the XML source document.

    - * - *

    Note that the results returned by the object will be valid only - * during the scope of each callback method: the application - * will receive unpredictable results if it attempts to use the - * locator at any other time, or after parsing completes.

    - * - *

    SAX parsers are not required to supply a locator, but they are - * very strongly encouraged to do so. If the parser supplies a - * locator, it must do so before reporting any other document events. - * If no locator has been set by the time the application receives - * the {@link org.xml.sax.ContentHandler#startDocument startDocument} - * event, the application should assume that a locator is not - * available.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.ContentHandler#setDocumentLocator - */ -public interface Locator { - - - /** - * Return the public identifier for the current document event. - * - *

    The return value is the public identifier of the document - * entity or of the external parsed entity in which the markup - * triggering the event appears.

    - * - * @return A string containing the public identifier, or - * null if none is available. - * @see #getSystemId - */ - public abstract String getPublicId (); - - - /** - * Return the system identifier for the current document event. - * - *

    The return value is the system identifier of the document - * entity or of the external parsed entity in which the markup - * triggering the event appears.

    - * - *

    If the system identifier is a URL, the parser must resolve it - * fully before passing it to the application. For example, a file - * name must always be provided as a file:... URL, and other - * kinds of relative URI are also resolved against their bases.

    - * - * @return A string containing the system identifier, or null - * if none is available. - * @see #getPublicId - */ - public abstract String getSystemId (); - - - /** - * Return the line number where the current document event ends. - * Lines are delimited by line ends, which are defined in - * the XML specification. - * - *

    Warning: The return value from the method - * is intended only as an approximation for the sake of diagnostics; - * it is not intended to provide sufficient information - * to edit the character content of the original XML document. - * In some cases, these "line" numbers match what would be displayed - * as columns, and in others they may not match the source text - * due to internal entity expansion.

    - * - *

    The return value is an approximation of the line number - * in the document entity or external parsed entity where the - * markup triggering the event appears.

    - * - *

    If possible, the SAX driver should provide the line position - * of the first character after the text associated with the document - * event. The first line is line 1.

    - * - * @return The line number, or -1 if none is available. - * @see #getColumnNumber - */ - public abstract int getLineNumber (); - - - /** - * Return the column number where the current document event ends. - * This is one-based number of Java char values since - * the last line end. - * - *

    Warning: The return value from the method - * is intended only as an approximation for the sake of diagnostics; - * it is not intended to provide sufficient information - * to edit the character content of the original XML document. - * For example, when lines contain combining character sequences, wide - * characters, surrogate pairs, or bi-directional text, the value may - * not correspond to the column in a text editor's display.

    - * - *

    The return value is an approximation of the column number - * in the document entity or external parsed entity where the - * markup triggering the event appears.

    - * - *

    If possible, the SAX driver should provide the line position - * of the first character after the text associated with the document - * event. The first column in each line is column 1.

    - * - * @return The column number, or -1 if none is available. - * @see #getLineNumber - */ - public abstract int getColumnNumber (); - -} - -// end of Locator.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java deleted file mode 100644 index 256719ce..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXException.java +++ /dev/null @@ -1,153 +0,0 @@ -// SAX exception class. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: SAXException.java,v 1.7 2002/01/30 21:13:48 dbrownell Exp $ - -package org.xml.sax; - -/** - * Encapsulate a general SAX error or warning. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This class can contain basic error or warning information from - * either the XML parser or the application: a parser writer or - * application writer can subclass it to provide additional - * functionality. SAX handlers may throw this exception or - * any exception subclassed from it.

    - * - *

    If the application needs to pass through other types of - * exceptions, it must wrap those exceptions in a SAXException - * or an exception derived from a SAXException.

    - * - *

    If the parser or application needs to include information about a - * specific location in an XML document, it should use the - * {@link org.xml.sax.SAXParseException SAXParseException} subclass.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.SAXParseException - */ -public class SAXException extends Exception { - - - /** - * Create a new SAXException. - */ - public SAXException () - { - super(); - this.exception = null; - } - - - /** - * Create a new SAXException. - * - * @param message The error or warning message. - */ - public SAXException (String message) { - super(message); - this.exception = null; - } - - - /** - * Create a new SAXException wrapping an existing exception. - * - *

    The existing exception will be embedded in the new - * one, and its message will become the default message for - * the SAXException.

    - * - * @param e The exception to be wrapped in a SAXException. - */ - public SAXException (Exception e) - { - super(); - this.exception = e; - } - - - /** - * Create a new SAXException from an existing exception. - * - *

    The existing exception will be embedded in the new - * one, but the new exception will have its own message.

    - * - * @param message The detail message. - * @param e The exception to be wrapped in a SAXException. - */ - public SAXException (String message, Exception e) - { - super(message); - this.exception = e; - } - - - /** - * Return a detail message for this exception. - * - *

    If there is an embedded exception, and if the SAXException - * has no detail message of its own, this method will return - * the detail message from the embedded exception.

    - * - * @return The error or warning message. - */ - public String getMessage () - { - String message = super.getMessage(); - - if (message == null && exception != null) { - return exception.getMessage(); - } else { - return message; - } - } - - - /** - * Return the embedded exception, if any. - * - * @return The embedded exception, or null if there is none. - */ - public Exception getException () - { - return exception; - } - - - /** - * Override toString to pick up any embedded exception. - * - * @return A string representation of this exception. - */ - public String toString () - { - if (exception != null) { - return exception.toString(); - } else { - return super.toString(); - } - } - - - - ////////////////////////////////////////////////////////////////////// - // Internal state. - ////////////////////////////////////////////////////////////////////// - - - /** - * @serial The embedded exception if tunnelling, or null. - */ - private Exception exception; - -} - -// end of SAXException.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java b/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java deleted file mode 100644 index 1df5e142..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/SAXParseException.java +++ /dev/null @@ -1,269 +0,0 @@ -// SAX exception class. -// http://www.saxproject.org -// No warranty; no copyright -- use this as you will. -// $Id: SAXParseException.java,v 1.11 2004/04/21 13:05:02 dmegginson Exp $ - -package org.xml.sax; - -/** - * Encapsulate an XML parse error or warning. - * - *
    - * This module, both source code and documentation, is in the - * Public Domain, and comes with NO WARRANTY. - * See http://www.saxproject.org - * for further information. - *
    - * - *

    This exception may include information for locating the error - * in the original XML document, as if it came from a {@link Locator} - * object. Note that although the application - * will receive a SAXParseException as the argument to the handlers - * in the {@link org.xml.sax.ErrorHandler ErrorHandler} interface, - * the application is not actually required to throw the exception; - * instead, it can simply read the information in it and take a - * different action.

    - * - *

    Since this exception is a subclass of {@link org.xml.sax.SAXException - * SAXException}, it inherits the ability to wrap another exception.

    - * - * @since SAX 1.0 - * @author David Megginson - * @version 2.0.1 (sax2r2) - * @see org.xml.sax.SAXException - * @see org.xml.sax.Locator - * @see org.xml.sax.ErrorHandler - */ -public class SAXParseException extends SAXException { - - - ////////////////////////////////////////////////////////////////////// - // Constructors. - ////////////////////////////////////////////////////////////////////// - - - /** - * Create a new SAXParseException from a message and a Locator. - * - *

    This constructor is especially useful when an application is - * creating its own exception from within a {@link org.xml.sax.ContentHandler - * ContentHandler} callback.

    - * - * @param message The error or warning message. - * @param locator The locator object for the error or warning (may be - * null). - * @see org.xml.sax.Locator - */ - public SAXParseException (String message, Locator locator) { - super(message); - if (locator != null) { - init(locator.getPublicId(), locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()); - } else { - init(null, null, -1, -1); - } - } - - - /** - * Wrap an existing exception in a SAXParseException. - * - *

    This constructor is especially useful when an application is - * creating its own exception from within a {@link org.xml.sax.ContentHandler - * ContentHandler} callback, and needs to wrap an existing exception that is not a - * subclass of {@link org.xml.sax.SAXException SAXException}.

    - * - * @param message The error or warning message, or null to - * use the message from the embedded exception. - * @param locator The locator object for the error or warning (may be - * null). - * @param e Any exception. - * @see org.xml.sax.Locator - */ - public SAXParseException (String message, Locator locator, - Exception e) { - super(message, e); - if (locator != null) { - init(locator.getPublicId(), locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()); - } else { - init(null, null, -1, -1); - } - } - - - /** - * Create a new SAXParseException. - * - *

    This constructor is most useful for parser writers.

    - * - *

    All parameters except the message are as if - * they were provided by a {@link Locator}. For example, if the - * system identifier is a URL (including relative filename), the - * caller must resolve it fully before creating the exception.

    - * - * - * @param message The error or warning message. - * @param publicId The public identifier of the entity that generated - * the error or warning. - * @param systemId The system identifier of the entity that generated - * the error or warning. - * @param lineNumber The line number of the end of the text that - * caused the error or warning. - * @param columnNumber The column number of the end of the text that - * cause the error or warning. - */ - public SAXParseException (String message, String publicId, String systemId, - int lineNumber, int columnNumber) - { - super(message); - init(publicId, systemId, lineNumber, columnNumber); - } - - - /** - * Create a new SAXParseException with an embedded exception. - * - *

    This constructor is most useful for parser writers who - * need to wrap an exception that is not a subclass of - * {@link org.xml.sax.SAXException SAXException}.

    - * - *

    All parameters except the message and exception are as if - * they were provided by a {@link Locator}. For example, if the - * system identifier is a URL (including relative filename), the - * caller must resolve it fully before creating the exception.

    - * - * @param message The error or warning message, or null to use - * the message from the embedded exception. - * @param publicId The public identifier of the entity that generated - * the error or warning. - * @param systemId The system identifier of the entity that generated - * the error or warning. - * @param lineNumber The line number of the end of the text that - * caused the error or warning. - * @param columnNumber The column number of the end of the text that - * cause the error or warning. - * @param e Another exception to embed in this one. - */ - public SAXParseException (String message, String publicId, String systemId, - int lineNumber, int columnNumber, Exception e) - { - super(message, e); - init(publicId, systemId, lineNumber, columnNumber); - } - - - /** - * Internal initialization method. - * - * @param publicId The public identifier of the entity which generated the exception, - * or null. - * @param systemId The system identifier of the entity which generated the exception, - * or null. - * @param lineNumber The line number of the error, or -1. - * @param columnNumber The column number of the error, or -1. - */ - private void init (String publicId, String systemId, - int lineNumber, int columnNumber) - { - this.publicId = publicId; - this.systemId = systemId; - this.lineNumber = lineNumber; - this.columnNumber = columnNumber; - } - - - /** - * Get the public identifier of the entity where the exception occurred. - * - * @return A string containing the public identifier, or null - * if none is available. - * @see org.xml.sax.Locator#getPublicId - */ - public String getPublicId () - { - return this.publicId; - } - - - /** - * Get the system identifier of the entity where the exception occurred. - * - *

    If the system identifier is a URL, it will have been resolved - * fully.

    - * - * @return A string containing the system identifier, or null - * if none is available. - * @see org.xml.sax.Locator#getSystemId - */ - public String getSystemId () - { - return this.systemId; - } - - - /** - * The line number of the end of the text where the exception occurred. - * - *

    The first line is line 1.

    - * - * @return An integer representing the line number, or -1 - * if none is available. - * @see org.xml.sax.Locator#getLineNumber - */ - public int getLineNumber () - { - return this.lineNumber; - } - - - /** - * The column number of the end of the text where the exception occurred. - * - *

    The first column in a line is position 1.

    - * - * @return An integer representing the column number, or -1 - * if none is available. - * @see org.xml.sax.Locator#getColumnNumber - */ - public int getColumnNumber () - { - return this.columnNumber; - } - - - ////////////////////////////////////////////////////////////////////// - // Internal state. - ////////////////////////////////////////////////////////////////////// - - - /** - * @serial The public identifier, or null. - * @see #getPublicId - */ - private String publicId; - - - /** - * @serial The system identifier, or null. - * @see #getSystemId - */ - private String systemId; - - - /** - * @serial The line number, or -1. - * @see #getLineNumber - */ - private int lineNumber; - - - /** - * @serial The column number, or -1. - * @see #getColumnNumber - */ - private int columnNumber; - -} - -// end of SAXParseException.java diff --git a/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html b/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html deleted file mode 100644 index dd7030e2..00000000 --- a/super/nu/validator/htmlparser/translatable/org/xml/sax/package.html +++ /dev/null @@ -1,297 +0,0 @@ - - - - -

    This package provides the core SAX APIs. -Some SAX1 APIs are deprecated to encourage integration of -namespace-awareness into designs of new applications -and into maintenance of existing infrastructure.

    - -

    See http://www.saxproject.org -for more information about SAX.

    - - -

    SAX2 Standard Feature Flags

    - -

    One of the essential characteristics of SAX2 is that it added -feature flags which can be used to examine and perhaps modify -parser modes, in particular modes such as validation. -Since features are identified by (absolute) URIs, anyone -can define such features. -Currently defined standard feature URIs have the prefix -http://xml.org/sax/features/ before an identifier such as -validation. Turn features on or off using -setFeature. Those standard identifiers are:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Feature IDAccessDefaultDescription
    external-general-entitiesread/writeunspecified Reports whether this parser processes external - general entities; always true if validating. -
    external-parameter-entitiesread/writeunspecified Reports whether this parser processes external - parameter entities; always true if validating. -
    is-standalone(parsing) read-only, (not parsing) nonenot applicable May be examined only during a parse, after the - startDocument() callback has been completed; read-only. - The value is true if the document specified standalone="yes" in - its XML declaration, and otherwise is false. -
    lexical-handler/parameter-entitiesread/writeunspecified A value of "true" indicates that the LexicalHandler will report - the beginning and end of parameter entities. -
    namespacesread/writetrue A value of "true" indicates namespace URIs and unprefixed local names - for element and attribute names will be available. -
    namespace-prefixesread/writefalse A value of "true" indicates that XML qualified names (with prefixes) and - attributes (including xmlns* attributes) will be available. -
    resolve-dtd-urisread/writetrue A value of "true" indicates that system IDs in declarations will - be absolutized (relative to their base URIs) before reporting. - (That is the default behavior for all SAX2 XML parsers.) - A value of "false" indicates those IDs will not be absolutized; - parsers will provide the base URI from - Locator.getSystemId(). - This applies to system IDs passed in
      -
    • DTDHandler.notationDecl(), -
    • DTDHandler.unparsedEntityDecl(), and -
    • DeclHandler.externalEntityDecl(). -
    - It does not apply to EntityResolver.resolveEntity(), - which is not used to report declarations, or to - LexicalHandler.startDTD(), which already provides - the non-absolutized URI. -
    string-interningread/writeunspecified Has a value of "true" if all XML names (for elements, prefixes, - attributes, entities, notations, and local names), - as well as Namespace URIs, will have been interned - using java.lang.String.intern. This supports fast - testing of equality/inequality against string constants, - rather than forcing slower calls to String.equals(). -
    unicode-normalization-checkingread/writefalse Controls whether the parser reports Unicode normalization - errors as described in section 2.13 and Appendix B of the - XML 1.1 Recommendation. If true, Unicode normalization - errors are reported using the ErrorHandler.error() callback. - Such errors are not fatal in themselves (though, obviously, - other Unicode-related encoding errors may be). -
    use-attributes2read-onlynot applicable Returns "true" if the Attributes objects passed by - this parser in ContentHandler.startElement() - implement the org.xml.sax.ext.Attributes2 interface. - That interface exposes additional DTD-related information, - such as whether the attribute was specified in the - source text rather than defaulted. -
    use-locator2read-onlynot applicable Returns "true" if the Locator objects passed by - this parser in ContentHandler.setDocumentLocator() - implement the org.xml.sax.ext.Locator2 interface. - That interface exposes additional entity information, - such as the character encoding and XML version used. -
    use-entity-resolver2read/writetrue Returns "true" if, when setEntityResolver is given - an object implementing the org.xml.sax.ext.EntityResolver2 interface, - those new methods will be used. - Returns "false" to indicate that those methods will not be used. -
    validationread/writeunspecified Controls whether the parser is reporting all validity - errors; if true, all external entities will be read. -
    xmlns-urisread/writefalse Controls whether, when the namespace-prefixes feature - is set, the parser treats namespace declaration attributes as - being in the http://www.w3.org/2000/xmlns/ namespace. - By default, SAX2 conforms to the original "Namespaces in XML" - Recommendation, which explicitly states that such attributes are - not in any namespace. - Setting this optional flag to "true" makes the SAX2 events conform to - a later backwards-incompatible revision of that recommendation, - placing those attributes in a namespace. -
    xml-1.1read-onlynot applicable Returns "true" if the parser supports both XML 1.1 and XML 1.0. - Returns "false" if the parser supports only XML 1.0. -
    - -

    Support for the default values of the -namespaces and namespace-prefixes -properties is required. -Support for any other feature flags is entirely optional. -

    - -

    For default values not specified by SAX2, -each XMLReader implementation specifies its default, -or may choose not to expose the feature flag. -Unless otherwise specified here, -implementations may support changing current values -of these standard feature flags, but not while parsing. -

    - -

    SAX2 Standard Handler and Property IDs

    - -

    For parser interface characteristics that are described -as objects, a separate namespace is defined. The -objects in this namespace are again identified by URI, and -the standard property URIs have the prefix -http://xml.org/sax/properties/ before an identifier such as -lexical-handler or -dom-node. Manage those properties using -setProperty(). Those identifiers are:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Property IDDescription
    declaration-handler Used to see most DTD declarations except those treated - as lexical ("document element name is ...") or which are - mandatory for all SAX parsers (DTDHandler). - The Object must implement org.xml.sax.ext.DeclHandler. -
    document-xml-version May be examined only during a parse, after the startDocument() - callback has been completed; read-only. This property is a - literal string describing the actual XML version of the document, - such as "1.0" or "1.1". -
    dom-node For "DOM Walker" style parsers, which ignore their - parser.parse() parameters, this is used to - specify the DOM (sub)tree being walked by the parser. - The Object must implement the - org.w3c.dom.Node interface. -
    lexical-handler Used to see some syntax events that are essential in some - applications: comments, CDATA delimiters, selected general - entity inclusions, and the start and end of the DTD - (and declaration of document element name). - The Object must implement org.xml.sax.ext.LexicalHandler. -
    xml-string Readable only during a parser callback, this exposes a TBS - chunk of characters responsible for the current event.
    - -

    All of these standard properties are optional; -XMLReader implementations need not support them. -

    - - \ No newline at end of file diff --git a/test-src/nu/validator/htmlparser/tools/HTML2HTML.java b/test-src/nu/validator/htmlparser/tools/HTML2HTML.java deleted file mode 100644 index 5e2cf1f5..00000000 --- a/test-src/nu/validator/htmlparser/tools/HTML2HTML.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -public class HTML2HTML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new HtmlSerializer(out); - - HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALLOW); - - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/HTML2XML.java b/test-src/nu/validator/htmlparser/tools/HTML2XML.java deleted file mode 100644 index 57666f93..00000000 --- a/test-src/nu/validator/htmlparser/tools/HTML2XML.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -public class HTML2XML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new XmlSerializer(out); - - HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); - - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XML2HTML.java b/test-src/nu/validator/htmlparser/tools/XML2HTML.java deleted file mode 100644 index dad89a5b..00000000 --- a/test-src/nu/validator/htmlparser/tools/XML2HTML.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - -public class XML2HTML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new HtmlSerializer(out); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader parser = factory.newSAXParser().getXMLReader(); - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XML2XML.java b/test-src/nu/validator/htmlparser/tools/XML2XML.java deleted file mode 100644 index 2f6aa24d..00000000 --- a/test-src/nu/validator/htmlparser/tools/XML2XML.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2008 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.MalformedURLException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.TransformerException; - -import nu.validator.htmlparser.sax.NameCheckingXmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.xml.sax.ContentHandler; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - -public class XML2XML { - - /** - * @param args - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, - TransformerException { - InputStream in; - OutputStream out; - - switch (args.length) { - case 0: - in = System.in; - out = System.out; - break; - case 1: - in = new FileInputStream(args[0]); - out = System.out; - break; - case 2: - in = new FileInputStream(args[0]); - out = new FileOutputStream(args[1]); - break; - default: - System.err.println("Too many arguments. No arguments to use stdin/stdout. One argument to reading from file and write to stdout. Two arguments to read from first file and write to second."); - System.exit(1); - return; - } - - ContentHandler serializer = new NameCheckingXmlSerializer(out); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader parser = factory.newSAXParser().getXMLReader(); - parser.setErrorHandler(new SystemErrErrorHandler()); - parser.setContentHandler(serializer); - parser.setProperty("http://xml.org/sax/properties/lexical-handler", - serializer); - parser.parse(new InputSource(in)); - out.flush(); - out.close(); - } -} diff --git a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java b/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java deleted file mode 100644 index 05d8193c..00000000 --- a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.net.MalformedURLException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParserFactory; -import javax.xml.transform.Templates; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.sax.SAXResult; -import javax.xml.transform.sax.SAXTransformerFactory; -import javax.xml.transform.sax.TemplatesHandler; -import javax.xml.transform.sax.TransformerHandler; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.dom.HtmlDocumentBuilder; -import nu.validator.htmlparser.sax.HtmlParser; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.sax.XmlSerializer; -import nu.validator.htmlparser.test.SystemErrErrorHandler; - -import org.w3c.dom.Document; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; -import org.xml.sax.ext.LexicalHandler; - -public class XSLT4HTML5 { - - private enum Mode { - STREAMING_SAX, BUFFERED_SAX, DOM, - } - - private static final String TEMPLATE = "--template="; - - private static final String INPUT_HTML = "--input-html="; - - private static final String INPUT_XML = "--input-xml="; - - private static final String OUTPUT_HTML = "--output-html="; - - private static final String OUTPUT_XML = "--output-xml="; - - private static final String MODE = "--mode="; - - /** - * @param args - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - * @throws MalformedURLException - * @throws TransformerException - */ - public static void main(String[] args) throws SAXException, - ParserConfigurationException, MalformedURLException, IOException, TransformerException { - if (args.length == 0) { - System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); - System.exit(0); - } - String template = null; - String input = null; - boolean inputHtml = false; - String output = null; - boolean outputHtml = false; - Mode mode = null; - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - if (arg.startsWith(TEMPLATE)) { - if (template == null) { - template = arg.substring(TEMPLATE.length()); - } else { - System.err.println("Tried to set template twice."); - System.exit(1); - } - } else if (arg.startsWith(INPUT_HTML)) { - if (input == null) { - input = arg.substring(INPUT_HTML.length()); - inputHtml = true; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(INPUT_XML)) { - if (input == null) { - input = arg.substring(INPUT_XML.length()); - inputHtml = false; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(OUTPUT_HTML)) { - if (output == null) { - output = arg.substring(OUTPUT_HTML.length()); - outputHtml = true; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(OUTPUT_XML)) { - if (output == null) { - output = arg.substring(OUTPUT_XML.length()); - outputHtml = false; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(MODE)) { - if (mode == null) { - String modeStr = arg.substring(MODE.length()); - if ("dom".equals(modeStr)) { - mode = Mode.DOM; - } else if ("sax-buffered".equals(modeStr)) { - mode = Mode.BUFFERED_SAX; - } else if ("sax-streaming".equals(modeStr)) { - mode = Mode.STREAMING_SAX; - } else { - System.err.println("Unrecognized mode."); - System.exit(5); - } - } else { - System.err.println("Tried to set mode twice."); - System.exit(4); - } - } - } - - if (template == null) { - System.err.println("No template specified."); - System.exit(6); - } - if (input == null) { - System.err.println("No input specified."); - System.exit(7); - } - if (output == null) { - System.err.println("No output specified."); - System.exit(8); - } - if (mode == null) { - mode = Mode.BUFFERED_SAX; - } - - SystemErrErrorHandler errorHandler = new SystemErrErrorHandler(); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - factory.setValidating(false); - XMLReader reader = factory.newSAXParser().getXMLReader(); - reader.setErrorHandler(errorHandler); - - SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(); - transformerFactory.setErrorListener(errorHandler); - TemplatesHandler templatesHandler = transformerFactory.newTemplatesHandler(); - reader.setContentHandler(templatesHandler); - reader.parse(new File(template).toURI().toASCIIString()); - - Templates templates = templatesHandler.getTemplates(); - - FileOutputStream outputStream = new FileOutputStream(output); - ContentHandler serializer; - if (outputHtml) { - serializer = new HtmlSerializer(outputStream); - } else { - serializer = new XmlSerializer(outputStream); - } - SAXResult result = new SAXResult(new XmlnsDropper(serializer)); - result.setLexicalHandler((LexicalHandler) serializer); - - if (mode == Mode.DOM) { - Document inputDoc; - DocumentBuilder builder; - if (inputHtml) { - builder = new HtmlDocumentBuilder(XmlViolationPolicy.ALTER_INFOSET); - } else { - DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); - factory.setNamespaceAware(true); - try { - builder = builderFactory.newDocumentBuilder(); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); - } - } - inputDoc = builder.parse(new File(input)); - DOMSource inputSource = new DOMSource(inputDoc, - new File(input).toURI().toASCIIString()); - Transformer transformer = templates.newTransformer(); - transformer.setErrorListener(errorHandler); - transformer.transform(inputSource, result); - } else { - if (inputHtml) { - reader = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); - if (mode == Mode.STREAMING_SAX) { - reader.setProperty("http://validator.nu/properties/streamability-violation-policy", XmlViolationPolicy.FATAL); - } - } - TransformerHandler transformerHandler = transformerFactory.newTransformerHandler(templates); - transformerHandler.setResult(result); - reader.setErrorHandler(errorHandler); - reader.setContentHandler(transformerHandler); - reader.setProperty("http://xml.org/sax/properties/lexical-handler", transformerHandler); - reader.parse(new File(input).toURI().toASCIIString()); - } - outputStream.flush(); - outputStream.close(); - } - -} diff --git a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java b/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java deleted file mode 100644 index b364cc52..00000000 --- a/test-src/nu/validator/htmlparser/tools/XSLT4HTML5XOM.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * Copyright (c) 2007 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; - -import nu.validator.htmlparser.common.XmlViolationPolicy; -import nu.validator.htmlparser.sax.HtmlSerializer; -import nu.validator.htmlparser.xom.HtmlBuilder; -import nu.xom.Builder; -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Nodes; -import nu.xom.ParsingException; -import nu.xom.Serializer; -import nu.xom.ValidityException; -import nu.xom.converters.SAXConverter; -import nu.xom.xslt.XSLException; -import nu.xom.xslt.XSLTransform; - -import org.xml.sax.SAXException; - -public class XSLT4HTML5XOM { - - private static final String TEMPLATE = "--template="; - - private static final String INPUT_HTML = "--input-html="; - - private static final String INPUT_XML = "--input-xml="; - - private static final String OUTPUT_HTML = "--output-html="; - - private static final String OUTPUT_XML = "--output-xml="; - - /** - * @param args - * @throws IOException - * @throws ParsingException - * @throws ValidityException - * @throws XSLException - * @throws SAXException - */ - public static void main(String[] args) throws ValidityException, - ParsingException, IOException, XSLException, SAXException { - if (args.length == 0) { - System.out.println("--template=file --input-[html|xml]=file --output-[html|xml]=file --mode=[sax-streaming|sax-buffered|dom]"); - System.exit(0); - } - String template = null; - String input = null; - boolean inputHtml = false; - String output = null; - boolean outputHtml = false; - for (int i = 0; i < args.length; i++) { - String arg = args[i]; - if (arg.startsWith(TEMPLATE)) { - if (template == null) { - template = arg.substring(TEMPLATE.length()); - } else { - System.err.println("Tried to set template twice."); - System.exit(1); - } - } else if (arg.startsWith(INPUT_HTML)) { - if (input == null) { - input = arg.substring(INPUT_HTML.length()); - inputHtml = true; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(INPUT_XML)) { - if (input == null) { - input = arg.substring(INPUT_XML.length()); - inputHtml = false; - } else { - System.err.println("Tried to set input twice."); - System.exit(2); - } - } else if (arg.startsWith(OUTPUT_HTML)) { - if (output == null) { - output = arg.substring(OUTPUT_HTML.length()); - outputHtml = true; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } else if (arg.startsWith(OUTPUT_XML)) { - if (output == null) { - output = arg.substring(OUTPUT_XML.length()); - outputHtml = false; - } else { - System.err.println("Tried to set output twice."); - System.exit(3); - } - } - } - - if (template == null) { - System.err.println("No template specified."); - System.exit(6); - } - if (input == null) { - System.err.println("No input specified."); - System.exit(7); - } - if (output == null) { - System.err.println("No output specified."); - System.exit(8); - } - - Builder builder = new Builder(); - - Document transformationDoc = builder.build(new File(template)); - - XSLTransform transform = new XSLTransform(transformationDoc); - - FileOutputStream outputStream = new FileOutputStream(output); - - Document inputDoc; - if (inputHtml) { - builder = new HtmlBuilder(XmlViolationPolicy.ALTER_INFOSET); - } - inputDoc = builder.build(new File(input)); - Nodes result = transform.transform(inputDoc); - Document outputDoc = new Document((Element) result.get(0)); - if (outputHtml) { - HtmlSerializer htmlSerializer = new HtmlSerializer(outputStream); - SAXConverter converter = new SAXConverter(htmlSerializer); - converter.setLexicalHandler(htmlSerializer); - converter.convert(outputDoc); - } else { - Serializer serializer = new Serializer(outputStream); - serializer.write(outputDoc); - } - outputStream.flush(); - outputStream.close(); - } - -} diff --git a/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java b/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java deleted file mode 100644 index 0e6d4b1c..00000000 --- a/test-src/nu/validator/htmlparser/tools/XmlnsDropper.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2007 Henri Sivonen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.tools; - -import org.xml.sax.Attributes; -import org.xml.sax.ContentHandler; -import org.xml.sax.Locator; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.AttributesImpl; - -/** - * Quick and dirty hack to work around Xalan xmlns weirdness. - * - * @version $Id$ - * @author hsivonen - */ -class XmlnsDropper implements ContentHandler { - - private final ContentHandler delegate; - - /** - * @param delegate - */ - public XmlnsDropper(final ContentHandler delegate) { - this.delegate = delegate; - } - - /** - * @param ch - * @param start - * @param length - * @throws SAXException - * @see org.xml.sax.ContentHandler#characters(char[], int, int) - */ - public void characters(char[] ch, int start, int length) throws SAXException { - delegate.characters(ch, start, length); - } - - /** - * @throws SAXException - * @see org.xml.sax.ContentHandler#endDocument() - */ - public void endDocument() throws SAXException { - delegate.endDocument(); - } - - /** - * @param uri - * @param localName - * @param qName - * @throws SAXException - * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) - */ - public void endElement(String uri, String localName, String qName) throws SAXException { - delegate.endElement(uri, localName, qName); - } - - /** - * @param prefix - * @throws SAXException - * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String) - */ - public void endPrefixMapping(String prefix) throws SAXException { - delegate.endPrefixMapping(prefix); - } - - /** - * @param ch - * @param start - * @param length - * @throws SAXException - * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int) - */ - public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { - delegate.ignorableWhitespace(ch, start, length); - } - - /** - * @param target - * @param data - * @throws SAXException - * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String) - */ - public void processingInstruction(String target, String data) throws SAXException { - delegate.processingInstruction(target, data); - } - - /** - * @param locator - * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) - */ - public void setDocumentLocator(Locator locator) { - delegate.setDocumentLocator(locator); - } - - /** - * @param name - * @throws SAXException - * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String) - */ - public void skippedEntity(String name) throws SAXException { - delegate.skippedEntity(name); - } - - /** - * @throws SAXException - * @see org.xml.sax.ContentHandler#startDocument() - */ - public void startDocument() throws SAXException { - delegate.startDocument(); - } - - /** - * @param uri - * @param localName - * @param qName - * @param atts - * @throws SAXException - * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) - */ - public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { - AttributesImpl ai = new AttributesImpl(); - for (int i = 0; i < atts.getLength(); i++) { - String u = atts.getURI(i); - String t = atts.getType(i); - String v = atts.getValue(i); - String n = atts.getLocalName(i); - String q = atts.getQName(i); - if (q != null) { - if ("xmlns".equals(q) || q.startsWith("xmlns:")) { - continue; - } - } - ai.addAttribute(u, n, q, t, v); - } - delegate.startElement(uri, localName, qName, ai); - } - - /** - * @param prefix - * @param uri - * @throws SAXException - * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String) - */ - public void startPrefixMapping(String prefix, String uri) throws SAXException { - delegate.startPrefixMapping(prefix, uri); - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java deleted file mode 100644 index 98a0226e..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/AnnotationHelperVisitor.java +++ /dev/null @@ -1,159 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.List; - -import japa.parser.ast.expr.AnnotationExpr; -import japa.parser.ast.expr.MarkerAnnotationExpr; -import japa.parser.ast.type.ReferenceType; -import japa.parser.ast.visitor.VoidVisitorAdapter; - -public class AnnotationHelperVisitor extends VoidVisitorAdapter { - - protected List currentAnnotations; - - protected boolean nsUri() { - return hasAnnotation("NsUri"); - } - - protected boolean prefix() { - return hasAnnotation("Prefix"); - } - - protected boolean local() { - return hasAnnotation("Local"); - } - - protected boolean literal() { - return hasAnnotation("Literal"); - } - - protected boolean inline() { - return hasAnnotation("Inline"); - } - - protected boolean noLength() { - return hasAnnotation("NoLength"); - } - - protected boolean unsigned() { - return hasAnnotation("Unsigned"); - } - - protected boolean auto() { - return hasAnnotation("Auto"); - } - - protected boolean virtual() { - return hasAnnotation("Virtual"); - } - - protected boolean override() { - return hasAnnotation("Override"); - } - - protected boolean isConst() { - return hasAnnotation("Const"); - } - - protected boolean characterName() { - return hasAnnotation("CharacterName"); - } - - protected boolean creator() { - return hasAnnotation("Creator"); - } - - protected boolean htmlCreator() { - return hasAnnotation("HtmlCreator"); - } - - protected boolean svgCreator() { - return hasAnnotation("SvgCreator"); - } - - private boolean hasAnnotation(String anno) { - if (currentAnnotations == null) { - return false; - } - for (AnnotationExpr ann : currentAnnotations) { - if (ann instanceof MarkerAnnotationExpr) { - MarkerAnnotationExpr marker = (MarkerAnnotationExpr) ann; - if (marker.getName().getName().equals(anno)) { - return true; - } - } - } - return false; - } - - protected Type convertType(japa.parser.ast.type.Type type, int modifiers) { - if (type instanceof ReferenceType) { - ReferenceType referenceType = (ReferenceType) type; - return new Type(convertTypeName(referenceType.getType().toString()), referenceType.getArrayCount(), noLength(), modifiers); - } else { - return new Type(convertTypeName(type.toString()), 0, false, modifiers); - } - } - - private String convertTypeName(String name) { - if ("String".equals(name)) { - if (local()) { - return "@Local"; - } - if (nsUri()) { - return "@NsUri"; - } - if (prefix()) { - return "@Prefix"; - } - if (literal()) { - return "@Literal"; - } - if (auto()) { - return "@Auto"; - } - if (characterName()) { - return "@CharacterName"; - } - } - return name; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java deleted file mode 100644 index 587b8160..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppOnlyInputStream.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2010 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; - -public class CppOnlyInputStream extends InputStream { - - private static final String DROP = "// CPPONLY:"; - - private final InputStream delegate; - - public CppOnlyInputStream(InputStream delegate) { - this.delegate = new BufferedInputStream(delegate); - } - - @Override public int read() throws IOException { - int c = delegate.read(); - if (c == DROP.charAt(0)) { - delegate.mark(DROP.length()); - for (int i = 1; i < DROP.length(); ++i) { - int d = delegate.read(); - if (d != DROP.charAt(i)) { - delegate.reset(); - return c; - } - } - return delegate.read(); - } - return c; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java deleted file mode 100644 index d75f8fe0..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java +++ /dev/null @@ -1,493 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008-2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class CppTypes { - - /* Please note we aren't looking for the following Atom definitions: - PseudoElementAtom or NonInheritingAnonBoxAtom or InheritingAnonBoxAtom */ - private static final Pattern ATOM_DEF = Pattern.compile("^\\s*Atom\\(\"([^,]+)\",\\s*\"([^\"]*)\"\\).*$"); - - private static Set reservedWords = new HashSet(); - - static { - reservedWords.add("small"); - reservedWords.add("for"); - reservedWords.add("false"); - reservedWords.add("true"); - reservedWords.add("default"); - reservedWords.add("class"); - reservedWords.add("switch"); - reservedWords.add("union"); - reservedWords.add("template"); - reservedWords.add("int"); - reservedWords.add("char"); - reservedWords.add("operator"); - reservedWords.add("or"); - reservedWords.add("and"); - reservedWords.add("not"); - reservedWords.add("xor"); - reservedWords.add("unicode"); - } - - private static final String[] TREE_BUILDER_INCLUDES = { "nsContentUtils", "nsAtom", "nsHtml5AtomTable", - "nsHtml5String", "nsNameSpaceManager", "nsIContent", "nsTraceRefcnt", "jArray", "nsHtml5DocumentMode", - "nsHtml5ArrayCopy", "nsHtml5Parser", "nsGkAtoms", "nsHtml5TreeOperation", "nsHtml5StateSnapshot", - "nsHtml5StackNode", "nsHtml5TreeOpExecutor", "nsHtml5StreamParser", "nsAHtml5TreeBuilderState", - "nsHtml5Highlighter", "nsHtml5PlainTextUtils", "nsHtml5ViewSourceUtils", "mozilla/ImportScanner", - "mozilla/Likely", "nsIContentHandle", "nsHtml5OplessBuilder", }; - - private static final String[] TOKENIZER_INCLUDES = { "nsAtom", - "nsHtml5AtomTable", "nsHtml5String", "nsIContent", "nsTraceRefcnt", - "jArray", "nsHtml5DocumentMode", "nsHtml5ArrayCopy", - "nsHtml5NamedCharacters", "nsHtml5NamedCharactersAccel", - "nsGkAtoms", "nsAHtml5TreeBuilderState", "nsHtml5Macros", - "nsHtml5Highlighter", "nsHtml5TokenizerLoopPolicies" }; - - private static final String[] INCLUDES = { "nsAtom", "nsHtml5AtomTable", - "nsHtml5String", "nsNameSpaceManager", "nsIContent", - "nsTraceRefcnt", "jArray", "nsHtml5ArrayCopy", - "nsAHtml5TreeBuilderState", "nsGkAtoms", "nsHtml5ByteReadable", - "nsHtml5Macros", "nsIContentHandle", "nsHtml5Portability", - "nsHtml5ContentCreatorFunction"}; - - private static final String[] OTHER_DECLATIONS = {}; - - private static final String[] TREE_BUILDER_OTHER_DECLATIONS = {}; - - private static final String[] NAMED_CHARACTERS_INCLUDES = { "jArray", - "nscore", "nsDebug", "mozilla/Logging", "nsMemory" }; - - private static final String[] FORWARD_DECLARATIONS = { "nsHtml5StreamParser" }; - - private static final String[] CLASSES_THAT_NEED_SUPPLEMENT = { - "MetaScanner", "Tokenizer", "TreeBuilder", "UTF16Buffer", }; - - private static final String[] STATE_LOOP_POLICIES = { - "nsHtml5ViewSourcePolicy", "nsHtml5SilentPolicy" }; - - private final Map atomMap = new HashMap(); - - private final Writer atomWriter; - - public CppTypes(File atomList, File generatedAtomFile) { - if (atomList == null) { - atomWriter = null; - } else { - try { - ingestAtoms(atomList); - atomWriter = new OutputStreamWriter(new FileOutputStream( - generatedAtomFile), "utf-8"); - this.start(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - private void ingestAtoms(File atomList) throws IOException { - // This doesn't need to be efficient, so let's make it easy to write. - BufferedReader atomReader = new BufferedReader( - new InputStreamReader(new FileInputStream(atomList), "utf-8")); - try { - String line; - boolean startedParsing = false; - while ((line = atomReader.readLine()) != null) { - // only start parsing lines after this comment - if (line.trim().startsWith("# START ATOMS")) { - startedParsing = true; - } else if (!startedParsing) { - continue; - } - // stop parsing lines after this comment - if (line.trim().startsWith("# END ATOMS")) { - return; - } - if (!line.trim().startsWith("Atom")) { - continue; - } - Matcher m = ATOM_DEF.matcher(line); - if (!m.matches()) { - throw new RuntimeException("Malformed atom definition: " + line); - } - atomMap.put(m.group(2), m.group(1)); - } - throw new RuntimeException( - "Atom list did not have a marker for generated section."); - } finally { - atomReader.close(); - } - } - - public void start() { - try { - - if (atomWriter != null) { - atomWriter.write("# THIS FILE IS GENERATED BY THE HTML PARSER TRANSLATOR AND WILL BE OVERWRITTEN!\n"); - atomWriter.write("from Atom import Atom\n\n"); - atomWriter.write("HTML_PARSER_ATOMS = [\n"); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public void finished() { - try { - if (atomWriter != null) { - atomWriter.write("]\n"); - atomWriter.flush(); - atomWriter.close(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public String classPrefix() { - return "nsHtml5"; - } - - public String booleanType() { - return "bool"; - } - - public String byteType() { - return "int8_t"; - } - - public String charType() { - return "char16_t"; - } - - /** - * Only used for named characters. - * - * @return - */ - public String unsignedShortType() { - return "uint16_t"; - } - - public String intType() { - return "int32_t"; - } - - public String unsignedIntType() { - return "uint32_t"; - } - - public String stringType() { - return "nsHtml5String"; - } - - public String weakLocalType() { - return "nsAtom*"; - } - - public String localType() { - return "RefPtr"; - } - - public String prefixType() { - return "nsStaticAtom*"; - } - - public String nsUriType() { - return "int32_t"; - } - - public String falseLiteral() { - return "false"; - } - - public String trueLiteral() { - return "true"; - } - - public String nullLiteral() { - return "nullptr"; - } - - public String encodingDeclarationHandlerType() { - return "nsHtml5StreamParser*"; - } - - public String nodeType() { - return "nsIContentHandle*"; - } - - public String htmlCreatorType() { - return "mozilla::dom::HTMLContentCreatorFunction"; - } - - public String svgCreatorType() { - return "mozilla::dom::SVGContentCreatorFunction"; - } - - public String creatorType() { - return "nsHtml5ContentCreatorFunction"; - } - - public String xhtmlNamespaceLiteral() { - return "kNameSpaceID_XHTML"; - } - - public String svgNamespaceLiteral() { - return "kNameSpaceID_SVG"; - } - - public String xmlnsNamespaceLiteral() { - return "kNameSpaceID_XMLNS"; - } - - public String xmlNamespaceLiteral() { - return "kNameSpaceID_XML"; - } - - public String noNamespaceLiteral() { - return "kNameSpaceID_None"; - } - - public String xlinkNamespaceLiteral() { - return "kNameSpaceID_XLink"; - } - - public String mathmlNamespaceLiteral() { - return "kNameSpaceID_MathML"; - } - - public String arrayTemplate() { - return "jArray"; - } - - public String autoArrayTemplate() { - return "autoJArray"; - } - - public String localForLiteral(String literal) { - String atom = atomMap.get(literal); - if (atom == null) { - atom = createAtomName(literal); - atomMap.put(literal, atom); - if (atomWriter != null) { - try { - atomWriter.write(" # ATOM GENERATED BY HTML PARSER TRANSLATOR (WILL BE AUTOMATICALLY OVERWRITTEN):\n Atom(\"" + atom + "\", \"" + literal - + "\"),\n"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - return "nsGkAtoms::" + atom; - } - - private String createAtomName(String literal) { - String candidate = literal.replaceAll("[^a-zA-Z0-9_]", "_"); - if ("".equals(candidate)) { - candidate = "emptystring"; - } - while (atomMap.values().contains(candidate) - || reservedWords.contains(candidate)) { - candidate = candidate + '_'; - } - return candidate; - } - - public String stringForLiteral(String literal) { - return '"' + literal + '"'; - } - - public String staticArrayTemplate() { - return "staticJArray"; - } - - public String newArrayCreator() { - return "newJArray"; - } - - public String[] boilerplateIncludes(String javaClass) { - if ("TreeBuilder".equals(javaClass)) { - return TREE_BUILDER_INCLUDES; - } else if ("Tokenizer".equals(javaClass)) { - return TOKENIZER_INCLUDES; - } else { - return INCLUDES; - } - } - - public String[] boilerplateDeclarations(String javaClass) { - if ("TreeBuilder".equals(javaClass)) { - return TREE_BUILDER_OTHER_DECLATIONS; - } else { - return OTHER_DECLATIONS; - } - } - - public String[] namedCharactersIncludes() { - return NAMED_CHARACTERS_INCLUDES; - } - - public String[] boilerplateForwardDeclarations() { - return FORWARD_DECLARATIONS; - } - - public String documentModeHandlerType() { - return "nsHtml5TreeBuilder*"; - } - - public String documentModeType() { - return "nsHtml5DocumentMode"; - } - - public String arrayCopy() { - return "nsHtml5ArrayCopy::arraycopy"; - } - - public String maxInteger() { - return "INT32_MAX"; - } - - public String constructorBoilerplate(String className) { - return "MOZ_COUNT_CTOR(" + className + ");"; - } - - public String destructorBoilerplate(String className) { - return "MOZ_COUNT_DTOR(" + className + ");"; - } - - public String literalType() { - return "const char*"; - } - - public boolean hasSupplement(String javaClass) { - return Arrays.binarySearch(CLASSES_THAT_NEED_SUPPLEMENT, javaClass) > -1; - } - - public String internerType() { - return "nsHtml5AtomTable*"; - } - - public String treeBuilderStateInterface() { - return "nsAHtml5TreeBuilderState"; - } - - public String treeBuilderStateType() { - return "nsAHtml5TreeBuilderState*"; - } - - public String arrayLengthMacro() { - return "MOZ_ARRAY_LENGTH"; - } - - public String staticAssert() { - return "static_assert"; - } - - public String continueMacro() { - return "NS_HTML5_CONTINUE"; - } - - public String breakMacro() { - return "NS_HTML5_BREAK"; - } - - public String characterNameType() { - return "nsHtml5CharacterName&"; - } - - public String characterNameTypeDeclaration() { - return "nsHtml5CharacterName"; - } - - public String transition() { - return "P::transition"; - } - - public String tokenizerErrorCondition() { - return "P::reportErrors"; - } - - public String firstTransitionArg() { - return "mViewSource.get()"; - } - - public String errorHandler() { - return this.unlikely() + "(mViewSource)"; - } - - public String unlikely() { - return "MOZ_UNLIKELY"; - } - - public String completedCharacterReference() { - return "P::completedNamedCharacterReference(mViewSource.get())"; - } - - public String[] stateLoopPolicies() { - return STATE_LOOP_POLICIES; - } - - public String assertionMacro() { - return "MOZ_ASSERT"; - } - - public String releaseAssertionMacro() { - return "MOZ_RELEASE_ASSERT"; - } - - public String crashMacro() { - return "MOZ_CRASH"; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java deleted file mode 100755 index e832e3bb..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java +++ /dev/null @@ -1,2446 +0,0 @@ -/* - * Copyright (C) 2007 Júlio Vilmar Gesser. - * Copyright (C) 2008 Mozilla Foundation - * - * This file is part of HTML Parser C++ Translator. It was derived from DumpVisitor - * which was part of Java 1.5 parser and Abstract Syntax Tree and came with the following notice: - * - * Java 1.5 parser and Abstract Syntax Tree is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Java 1.5 parser and Abstract Syntax Tree is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Java 1.5 parser and Abstract Syntax Tree. If not, see . - */ -/* - * Created on 05/10/2006 - */ -package nu.validator.htmlparser.cpptranslate; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; - -import japa.parser.ast.BlockComment; -import japa.parser.ast.CompilationUnit; -import japa.parser.ast.ImportDeclaration; -import japa.parser.ast.LineComment; -import japa.parser.ast.Node; -import japa.parser.ast.PackageDeclaration; -import japa.parser.ast.TypeParameter; -import japa.parser.ast.body.AnnotationDeclaration; -import japa.parser.ast.body.AnnotationMemberDeclaration; -import japa.parser.ast.body.BodyDeclaration; -import japa.parser.ast.body.ClassOrInterfaceDeclaration; -import japa.parser.ast.body.ConstructorDeclaration; -import japa.parser.ast.body.EmptyMemberDeclaration; -import japa.parser.ast.body.EmptyTypeDeclaration; -import japa.parser.ast.body.EnumConstantDeclaration; -import japa.parser.ast.body.EnumDeclaration; -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.InitializerDeclaration; -import japa.parser.ast.body.JavadocComment; -import japa.parser.ast.body.MethodDeclaration; -import japa.parser.ast.body.ModifierSet; -import japa.parser.ast.body.Parameter; -import japa.parser.ast.body.TypeDeclaration; -import japa.parser.ast.body.VariableDeclarator; -import japa.parser.ast.body.VariableDeclaratorId; -import japa.parser.ast.expr.ArrayAccessExpr; -import japa.parser.ast.expr.ArrayCreationExpr; -import japa.parser.ast.expr.ArrayInitializerExpr; -import japa.parser.ast.expr.AssignExpr; -import japa.parser.ast.expr.BinaryExpr; -import japa.parser.ast.expr.BooleanLiteralExpr; -import japa.parser.ast.expr.CastExpr; -import japa.parser.ast.expr.CharLiteralExpr; -import japa.parser.ast.expr.ClassExpr; -import japa.parser.ast.expr.ConditionalExpr; -import japa.parser.ast.expr.DoubleLiteralExpr; -import japa.parser.ast.expr.EnclosedExpr; -import japa.parser.ast.expr.Expression; -import japa.parser.ast.expr.FieldAccessExpr; -import japa.parser.ast.expr.InstanceOfExpr; -import japa.parser.ast.expr.IntegerLiteralExpr; -import japa.parser.ast.expr.IntegerLiteralMinValueExpr; -import japa.parser.ast.expr.LongLiteralExpr; -import japa.parser.ast.expr.LongLiteralMinValueExpr; -import japa.parser.ast.expr.MarkerAnnotationExpr; -import japa.parser.ast.expr.MemberValuePair; -import japa.parser.ast.expr.MethodCallExpr; -import japa.parser.ast.expr.NameExpr; -import japa.parser.ast.expr.NormalAnnotationExpr; -import japa.parser.ast.expr.NullLiteralExpr; -import japa.parser.ast.expr.ObjectCreationExpr; -import japa.parser.ast.expr.QualifiedNameExpr; -import japa.parser.ast.expr.SingleMemberAnnotationExpr; -import japa.parser.ast.expr.StringLiteralExpr; -import japa.parser.ast.expr.SuperExpr; -import japa.parser.ast.expr.ThisExpr; -import japa.parser.ast.expr.UnaryExpr; -import japa.parser.ast.expr.VariableDeclarationExpr; -import japa.parser.ast.stmt.AssertStmt; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.stmt.BreakStmt; -import japa.parser.ast.stmt.CatchClause; -import japa.parser.ast.stmt.ContinueStmt; -import japa.parser.ast.stmt.DoStmt; -import japa.parser.ast.stmt.EmptyStmt; -import japa.parser.ast.stmt.ExplicitConstructorInvocationStmt; -import japa.parser.ast.stmt.ExpressionStmt; -import japa.parser.ast.stmt.ForStmt; -import japa.parser.ast.stmt.ForeachStmt; -import japa.parser.ast.stmt.IfStmt; -import japa.parser.ast.stmt.LabeledStmt; -import japa.parser.ast.stmt.ReturnStmt; -import japa.parser.ast.stmt.Statement; -import japa.parser.ast.stmt.SwitchEntryStmt; -import japa.parser.ast.stmt.SwitchStmt; -import japa.parser.ast.stmt.SynchronizedStmt; -import japa.parser.ast.stmt.ThrowStmt; -import japa.parser.ast.stmt.TryStmt; -import japa.parser.ast.stmt.TypeDeclarationStmt; -import japa.parser.ast.stmt.WhileStmt; -import japa.parser.ast.type.ClassOrInterfaceType; -import japa.parser.ast.type.PrimitiveType; -import japa.parser.ast.type.ReferenceType; -import japa.parser.ast.type.Type; -import japa.parser.ast.type.VoidType; -import japa.parser.ast.type.WildcardType; - -/** - * @author Julio Vilmar Gesser - * @author Henri Sivonen - */ - -public class CppVisitor extends AnnotationHelperVisitor { - - private static final String[] CLASS_NAMES = { "AttributeName", - "ElementName", "HtmlAttributes", "LocatorImpl", "MetaScanner", - "NamedCharacters", "NamedCharactersAccel", "Portability", - "StackNode", "Tokenizer", "TreeBuilder", "UTF16Buffer" }; - - private static final String[] METHODS_WITH_UNLIKELY_CONDITIONS = { - "appendStrBuf" }; - - public class SourcePrinter { - - private int level = 0; - - private boolean indented = false; - - private final StringBuilder buf = new StringBuilder(); - - public void indent() { - level++; - } - - public void unindent() { - level--; - } - - private void makeIndent() { - for (int i = 0; i < level; i++) { - buf.append(" "); - } - } - - public void printWithoutIndent(String arg) { - indented = false; - buf.append(arg); - } - - public void print(String arg) { - if (!indented) { - makeIndent(); - indented = true; - } - buf.append(arg); - } - - public void printLn(String arg) { - print(arg); - printLn(); - } - - public void printLn() { - buf.append("\n"); - indented = false; - } - - public String getSource() { - return buf.toString(); - } - - @Override public String toString() { - return getSource(); - } - } - - private boolean supportErrorReporting = true; - - protected SourcePrinter printer = new SourcePrinter(); - - private SourcePrinter staticInitializerPrinter = new SourcePrinter(); - - private SourcePrinter tempPrinterHolder; - - protected final CppTypes cppTypes; - - protected String className = ""; - - protected int currentArrayCount; - - protected Set forLoopsWithCondition = new HashSet(); - - protected boolean inPrimitiveNoLengthFieldDeclarator = false; - - protected boolean inField = false; - - protected boolean inArray = false; - - protected final SymbolTable symbolTable; - - protected String definePrefix; - - protected String javaClassName; - - protected boolean suppressPointer = false; - - private final List staticReleases = new LinkedList(); - - private boolean inConstructorBody = false; - - private String currentMethod = null; - - private Set labels = null; - - private boolean destructor; - - protected boolean inStatic = false; - - private boolean reportTransitions = false; - - private int stateLoopCallCount = 0; - - /** - * @param cppTypes - */ - public CppVisitor(CppTypes cppTypes, SymbolTable symbolTable) { - this.cppTypes = cppTypes; - this.symbolTable = symbolTable; - staticInitializerPrinter.indent(); - } - - public String getSource() { - return printer.getSource(); - } - - private String classNameFromExpression(Expression e) { - if (e instanceof NameExpr) { - NameExpr nameExpr = (NameExpr) e; - String name = nameExpr.getName(); - if (Arrays.binarySearch(CLASS_NAMES, name) > -1) { - return name; - } - } - return null; - } - - protected void printModifiers(int modifiers) { - } - - private void printMembers(List members, - LocalSymbolTable arg) { - for (BodyDeclaration member : members) { - if ("Tokenizer".equals(javaClassName) - && member instanceof MethodDeclaration - && "stateLoop".equals(((MethodDeclaration) member).getName())) { - reportTransitions = true; - } - member.accept(this, arg); - reportTransitions = false; - } - } - - private void printTypeArgs(List args, LocalSymbolTable arg) { - // if (args != null) { - // printer.print("<"); - // for (Iterator i = args.iterator(); i.hasNext();) { - // Type t = i.next(); - // t.accept(this, arg); - // if (i.hasNext()) { - // printer.print(", "); - // } - // } - // printer.print(">"); - // } - } - - private void printTypeParameters(List args, - LocalSymbolTable arg) { - // if (args != null) { - // printer.print("<"); - // for (Iterator i = args.iterator(); i.hasNext();) { - // TypeParameter t = i.next(); - // t.accept(this, arg); - // if (i.hasNext()) { - // printer.print(", "); - // } - // } - // printer.print(">"); - // } - } - - public void visit(Node n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(CompilationUnit n, LocalSymbolTable arg) { - if (n.getTypes() != null) { - for (Iterator i = n.getTypes().iterator(); i.hasNext();) { - i.next().accept(this, arg); - printer.printLn(); - if (i.hasNext()) { - printer.printLn(); - } - } - } - } - - public void visit(PackageDeclaration n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(NameExpr n, LocalSymbolTable arg) { - if ("mappingLangToXmlLang".equals(n.getName())) { - printer.print("0"); - } else if ("LANG_NS".equals(n.getName())) { - printer.print("ALL_NO_NS"); - } else if ("LANG_PREFIX".equals(n.getName())) { - printer.print("ALL_NO_PREFIX"); - } else if ("HTML_LOCAL".equals(n.getName())) { - printer.print(cppTypes.localForLiteral("html")); - } else if ("documentModeHandler".equals(n.getName())) { - printer.print("this"); - } else if ("errorHandler".equals(n.getName())) { - printer.print(cppTypes.errorHandler()); - } else if ("MOZ_FALLTHROUGH".equals(n.getName())) { - printer.print("[[fallthrough]]"); - } else { - printer.print(n.getName()); - } - } - - public void visit(QualifiedNameExpr n, LocalSymbolTable arg) { - n.getQualifier().accept(this, arg); - printer.print("."); - printer.print(n.getName()); - } - - public void visit(ImportDeclaration n, LocalSymbolTable arg) { - throw new IllegalStateException(n.getClass().getName()); - } - - public void visit(ClassOrInterfaceDeclaration n, LocalSymbolTable arg) { - javaClassName = n.getName(); - className = cppTypes.classPrefix() + javaClassName; - definePrefix = makeDefinePrefix(className); - - startClassDeclaration(); - - if (n.getMembers() != null) { - printMembers(n.getMembers(), arg); - } - - endClassDeclaration(); - } - - private String makeDefinePrefix(String name) { - StringBuilder sb = new StringBuilder(); - boolean prevWasLowerCase = true; - for (int i = 0; i < name.length(); i++) { - char c = name.charAt(i); - if (c >= 'a' && c <= 'z') { - sb.append((char) (c - 0x20)); - prevWasLowerCase = true; - } else if (c >= 'A' && c <= 'Z') { - if (prevWasLowerCase) { - sb.append('_'); - } - sb.append(c); - prevWasLowerCase = false; - } else if (c >= '0' && c <= '9') { - sb.append(c); - prevWasLowerCase = false; - } - } - sb.append('_'); - return sb.toString(); - } - - protected void endClassDeclaration() { - printer.printLn("void"); - printer.print(className); - printer.printLn("::initializeStatics()"); - printer.printLn("{"); - printer.print(staticInitializerPrinter.getSource()); - printer.printLn("}"); - printer.printLn(); - - printer.printLn("void"); - printer.print(className); - printer.printLn("::releaseStatics()"); - printer.printLn("{"); - printer.indent(); - for (String del : staticReleases) { - printer.print(del); - printer.printLn(";"); - } - printer.unindent(); - printer.printLn("}"); - printer.printLn(); - - if (cppTypes.hasSupplement(javaClassName)) { - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn("CppSupplement.h\""); - } - } - - protected void startClassDeclaration() { - printer.print("#define "); - printer.print(className); - printer.printLn("_cpp__"); - printer.printLn(); - - String[] incs = cppTypes.boilerplateIncludes(javaClassName); - for (int i = 0; i < incs.length; i++) { - String inc = incs[i]; - printer.print("#include \""); - printer.print(inc); - printer.printLn(".h\""); - } - - printer.printLn(); - - for (int i = 0; i < Main.H_LIST.length; i++) { - String klazz = Main.H_LIST[i]; - if (!klazz.equals(javaClassName)) { - printer.print("#include \""); - printer.print(cppTypes.classPrefix()); - printer.print(klazz); - printer.printLn(".h\""); - } - } - - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn(".h\""); - printer.printLn(); - } - - public void visit(EmptyTypeDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - printer.print(";"); - } - - public void visit(JavadocComment n, LocalSymbolTable arg) { - printer.print("/**"); - printer.print(n.getContent()); - printer.printLn("*/"); - } - - public void visit(ClassOrInterfaceType n, LocalSymbolTable arg) { - if (n.getScope() != null) { - n.getScope().accept(this, arg); - printer.print("."); - throw new IllegalStateException("Can't translate nested classes."); - } - String name = n.getName(); - if ("String".equals(name)) { - if (local()) { - name = inField || inArray ? cppTypes.localType() : cppTypes.weakLocalType(); - } else if (prefix()) { - name = cppTypes.prefixType(); - } else if (nsUri()) { - name = cppTypes.nsUriType(); - } else if (literal()) { - name = cppTypes.literalType(); - } else if (characterName()) { - name = cppTypes.characterNameType(); - } else { - name = cppTypes.stringType(); - } - } else if ("T".equals(name) || "Object".equals(name)) { - if (htmlCreator()) { - name = cppTypes.htmlCreatorType(); - } else if (svgCreator()) { - name = cppTypes.svgCreatorType(); - } else if (creator()) { - name = cppTypes.creatorType(); - } else { - name = cppTypes.nodeType(); - } - } else if ("TokenHandler".equals(name)) { - name = cppTypes.classPrefix() + "TreeBuilder*"; - } else if ("EncodingDeclarationHandler".equals(name)) { - name = cppTypes.encodingDeclarationHandlerType(); - } else if ("Interner".equals(name)) { - name = cppTypes.internerType(); - } else if ("TreeBuilderState".equals(name)) { - name = cppTypes.treeBuilderStateType(); - } else if ("DocumentModeHandler".equals(name)) { - name = cppTypes.documentModeHandlerType(); - } else if ("DocumentMode".equals(name)) { - name = cppTypes.documentModeType(); - } else { - name = cppTypes.classPrefix() + name + (suppressPointer ? "" : "*"); - } - printer.print(name); - printTypeArgs(n.getTypeArgs(), arg); - } - - protected boolean inHeader() { - return false; - } - - public void visit(TypeParameter n, LocalSymbolTable arg) { - printer.print(n.getName()); - if (n.getTypeBound() != null) { - printer.print(" extends "); - for (Iterator i = n.getTypeBound().iterator(); i.hasNext();) { - ClassOrInterfaceType c = i.next(); - c.accept(this, arg); - if (i.hasNext()) { - printer.print(" & "); - } - } - } - } - - public void visit(PrimitiveType n, LocalSymbolTable arg) { - switch (n.getType()) { - case Boolean: - printer.print(cppTypes.booleanType()); - break; - case Byte: - printer.print(cppTypes.byteType()); - break; - case Char: - printer.print(cppTypes.charType()); - break; - case Double: - throw new IllegalStateException("Unsupported primitive."); - case Float: - throw new IllegalStateException("Unsupported primitive."); - case Int: - if (unsigned()) { - printer.print(cppTypes.unsignedIntType()); - } else { - printer.print(cppTypes.intType()); - } - break; - case Long: - throw new IllegalStateException("Unsupported primitive."); - case Short: - throw new IllegalStateException("Unsupported primitive."); - } - } - - public void visit(ReferenceType n, LocalSymbolTable arg) { - if (isConst()) { - printer.print("const "); - } - boolean wasInArray = inArray; - if (n.getArrayCount() > 0) { - inArray = true; - } - if (noLength()) { - n.getType().accept(this, arg); - for (int i = 0; i < n.getArrayCount(); i++) { - if (!inPrimitiveNoLengthFieldDeclarator) { - printer.print("*"); - } - } - } else { - for (int i = 0; i < n.getArrayCount(); i++) { - if (inStatic) { - printer.print(cppTypes.staticArrayTemplate()); - } else { - if (auto()) { - printer.print(cppTypes.autoArrayTemplate()); - } else { - printer.print(cppTypes.arrayTemplate()); - } - } - printer.print("<"); - } - n.getType().accept(this, arg); - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">"); - } - } - if (n.getArrayCount() > 0) { - inArray = wasInArray; - } - } - - public void visit(WildcardType n, LocalSymbolTable arg) { - printer.print("?"); - if (n.getExtends() != null) { - printer.print(" extends "); - n.getExtends().accept(this, arg); - } - if (n.getSuper() != null) { - printer.print(" super "); - n.getSuper().accept(this, arg); - } - } - - public void visit(FieldDeclaration n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - fieldDeclaration(n, arg); - currentAnnotations = null; - } - - protected boolean isNonToCharArrayMethodCall(Expression exp) { - if (exp instanceof MethodCallExpr) { - MethodCallExpr mce = (MethodCallExpr) exp; - return !"toCharArray".equals(mce.getName()); - } else { - return false; - } - } - - protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { - inField = true; - tempPrinterHolder = printer; - printer = staticInitializerPrinter; - int modifiers = n.getModifiers(); - List variables = n.getVariables(); - VariableDeclarator declarator = variables.get(0); - if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) - && !(n.getType() instanceof PrimitiveType) - && declarator.getInit() != null) { - if (n.getType() instanceof ReferenceType) { - ReferenceType rt = (ReferenceType) n.getType(); - currentArrayCount = rt.getArrayCount(); - if (currentArrayCount > 0) { - if (currentArrayCount != 1) { - throw new IllegalStateException( - "Multidimensional arrays not supported. " + n); - } - if (noLength()) { - if (rt.getType() instanceof PrimitiveType) { - inPrimitiveNoLengthFieldDeclarator = true; - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - - printer.print(" = "); - - declarator.getInit().accept(this, arg); - - printer.printLn(";"); - printer = staticInitializerPrinter; - } else { - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - - printer.printLn(" = 0;"); - printer = staticInitializerPrinter; - - staticReleases.add("delete[] " - + declarator.getId().getName()); - - ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); - - declarator.getId().accept(this, arg); - printer.print(" = new "); - // suppressPointer = true; - rt.getType().accept(this, arg); - // suppressPointer = false; - printer.print("["); - printer.print("" + aie.getValues().size()); - printer.printLn("];"); - - printArrayInit(declarator.getId(), aie.getValues(), - arg); - } - } else if ((rt.getType() instanceof PrimitiveType) || "String".equals(rt.getType().toString())) { - printer = tempPrinterHolder; - printer.print("static "); - rt.getType().accept(this, arg); - printer.print(" const "); - declarator.getId().accept(this, arg); - printer.print("_DATA[] = "); - declarator.getInit().accept(this, arg); - printer.printLn(";"); - printer.print(cppTypes.staticArrayTemplate()); - printer.print("<"); - suppressPointer = true; - rt.getType().accept(this, arg); - suppressPointer = false; - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print("> "); - printer.print(className); - printer.print("::"); - declarator.getId().accept(this, arg); - printer.print(" = { "); - declarator.getId().accept(this, arg); - printer.print("_DATA, "); - printer.print(cppTypes.arrayLengthMacro()); - printer.print("("); - declarator.getId().accept(this, arg); - printer.printLn("_DATA) };"); - printer = staticInitializerPrinter; - } else if (isNonToCharArrayMethodCall(declarator.getInit())) { - staticReleases.add(declarator.getId().getName() - + ".release()"); - declarator.getId().accept(this, arg); - printer.print(" = "); - if (declarator.getInit() instanceof ArrayInitializerExpr) { - - ArrayInitializerExpr aie = (ArrayInitializerExpr) declarator.getInit(); - printer.print(cppTypes.arrayTemplate()); - printer.print("<"); - suppressPointer = true; - rt.getType().accept(this, arg); - suppressPointer = false; - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">::"); - printer.print(cppTypes.newArrayCreator()); - printer.print("("); - printer.print("" + aie.getValues().size()); - printer.printLn(");"); - printArrayInit(declarator.getId(), aie.getValues(), - arg); - } else { - declarator.getInit().accept(this, arg); - printer.printLn(";"); - } - } - } else { - if (ModifierSet.isStatic(modifiers)) { - printer = tempPrinterHolder; - n.getType().accept(this, arg); - printer.print(" "); - printer.print(className); - printer.print("::"); - String clazzName = n.getType().toString(); - String field = declarator.getId().toString(); - if (symbolTable.isAttributeOrElementName(clazzName, field)) { - if ("AttributeName".equals(clazzName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(clazzName)) { - printer.print("ELT_"); - } - } - declarator.getId().accept(this, arg); - printer.print(" = "); - printer.print(cppTypes.nullLiteral()); - printer.printLn(";"); - printer = staticInitializerPrinter; - } - - if ("AttributeName".equals(n.getType().toString())) { - printer.print("ATTR_"); - staticReleases.add("delete ATTR_" - + declarator.getId().getName()); - } else if ("ElementName".equals(n.getType().toString())) { - printer.print("ELT_"); - staticReleases.add("delete ELT_" - + declarator.getId().getName()); - } else { - staticReleases.add("delete " - + declarator.getId().getName()); - } - declarator.accept(this, arg); - printer.printLn(";"); - } - } else { - throw new IllegalStateException( - "Non-reference, non-primitive fields not supported."); - } - } - currentArrayCount = 0; - printer = tempPrinterHolder; - inPrimitiveNoLengthFieldDeclarator = false; - inField = false; - } - - private void printArrayInit(VariableDeclaratorId variableDeclaratorId, - List values, LocalSymbolTable arg) { - for (int i = 0; i < values.size(); i++) { - Expression exp = values.get(i); - variableDeclaratorId.accept(this, arg); - printer.print("["); - printer.print("" + i); - printer.print("] = "); - if (exp instanceof NameExpr) { - if ("AttributeName".equals(javaClassName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(javaClassName)) { - printer.print("ELT_"); - } - } - exp.accept(this, arg); - printer.printLn(";"); - } - } - - public void visit(VariableDeclarator n, LocalSymbolTable arg) { - n.getId().accept(this, arg); - - if (n.getInit() != null) { - printer.print(" = "); - n.getInit().accept(this, arg); - } - } - - public void visit(VariableDeclaratorId n, LocalSymbolTable arg) { - printer.print(n.getName()); - if (noLength()) { - for (int i = 0; i < currentArrayCount; i++) { - if (inPrimitiveNoLengthFieldDeclarator) { - printer.print("[]"); - } - } - } - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print("[]"); - } - } - - public void visit(ArrayInitializerExpr n, LocalSymbolTable arg) { - printer.print("{"); - if (n.getValues() != null) { - printer.print(" "); - for (Iterator i = n.getValues().iterator(); i.hasNext();) { - Expression expr = i.next(); - expr.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - printer.print(" "); - } - printer.print("}"); - } - - public void visit(VoidType n, LocalSymbolTable arg) { - printer.print("void"); - } - - public void visit(ArrayAccessExpr n, LocalSymbolTable arg) { - n.getName().accept(this, arg); - printer.print("["); - n.getIndex().accept(this, arg); - printer.print("]"); - } - - public void visit(ArrayCreationExpr n, LocalSymbolTable arg) { - // printer.print("new "); - // n.getType().accept(this, arg); - // printTypeArgs(n.getTypeArgs(), arg); - - inArray = true; - if (n.getDimensions() != null) { - if (noLength()) { - for (Expression dim : n.getDimensions()) { - printer.print("new "); - n.getType().accept(this, arg); - printer.print("["); - dim.accept(this, arg); - printer.print("]"); - } - } else { - for (Expression dim : n.getDimensions()) { - printer.print(cppTypes.arrayTemplate()); - printer.print("<"); - n.getType().accept(this, arg); - printer.print(", "); - printer.print(cppTypes.intType()); - printer.print(">::"); - printer.print(cppTypes.newArrayCreator()); - printer.print("("); - dim.accept(this, arg); - printer.print(")"); - } - } - if (n.getArrayCount() > 0) { - throw new IllegalStateException( - "Nested array allocation not supported. " - + n.toString()); - } - } else { - throw new IllegalStateException( - "Array initializer as part of array creation not supported. " - + n.toString()); - } - inArray = false; - } - - public void visit(AssignExpr n, LocalSymbolTable arg) { - if (inConstructorBody) { - n.getTarget().accept(this, arg); - printer.print("("); - n.getValue().accept(this, arg); - printer.print(")"); - } else { - n.getTarget().accept(this, arg); - printer.print(" "); - switch (n.getOperator()) { - case assign: - printer.print("="); - break; - case and: - printer.print("&="); - break; - case or: - printer.print("|="); - break; - case xor: - printer.print("^="); - break; - case plus: - printer.print("+="); - break; - case minus: - printer.print("-="); - break; - case rem: - printer.print("%="); - break; - case slash: - printer.print("/="); - break; - case star: - printer.print("*="); - break; - case lShift: - printer.print("<<="); - break; - case rSignedShift: - printer.print(">>="); - break; - case rUnsignedShift: - printer.print(">>>="); - break; - } - printer.print(" "); - n.getValue().accept(this, arg); - } - } - - public void visit(BinaryExpr n, LocalSymbolTable arg) { - Expression right = n.getRight(); - switch (n.getOperator()) { - case notEquals: - if (right instanceof NullLiteralExpr) { - printer.print("!!"); - n.getLeft().accept(this, arg); - return; - } else if (right instanceof IntegerLiteralExpr) { - IntegerLiteralExpr ile = (IntegerLiteralExpr) right; - if ("0".equals(ile.getValue())) { - n.getLeft().accept(this, arg); - return; - } - } - case equals: - if (right instanceof NullLiteralExpr) { - printer.print("!"); - n.getLeft().accept(this, arg); - return; - } else if (right instanceof IntegerLiteralExpr) { - IntegerLiteralExpr ile = (IntegerLiteralExpr) right; - if ("0".equals(ile.getValue())) { - printer.print("!"); - n.getLeft().accept(this, arg); - return; - } - } - default: - // fall thru - } - - n.getLeft().accept(this, arg); - printer.print(" "); - switch (n.getOperator()) { - case or: - printer.print("||"); - break; - case and: - printer.print("&&"); - break; - case binOr: - printer.print("|"); - break; - case binAnd: - printer.print("&"); - break; - case xor: - printer.print("^"); - break; - case equals: - printer.print("=="); - break; - case notEquals: - printer.print("!="); - break; - case less: - printer.print("<"); - break; - case greater: - printer.print(">"); - break; - case lessEquals: - printer.print("<="); - break; - case greaterEquals: - printer.print(">="); - break; - case lShift: - printer.print("<<"); - break; - case rSignedShift: - printer.print(">>"); - break; - case rUnsignedShift: - printer.print(">>>"); - break; - case plus: - printer.print("+"); - break; - case minus: - printer.print("-"); - break; - case times: - printer.print("*"); - break; - case divide: - printer.print("/"); - break; - case remainder: - printer.print("%"); - break; - } - printer.print(" "); - n.getRight().accept(this, arg); - } - - public void visit(CastExpr n, LocalSymbolTable arg) { - printer.print("("); - n.getType().accept(this, arg); - printer.print(") "); - n.getExpr().accept(this, arg); - } - - public void visit(ClassExpr n, LocalSymbolTable arg) { - n.getType().accept(this, arg); - printer.print(".class"); - } - - public void visit(ConditionalExpr n, LocalSymbolTable arg) { - n.getCondition().accept(this, arg); - printer.print(" ? "); - n.getThenExpr().accept(this, arg); - printer.print(" : "); - n.getElseExpr().accept(this, arg); - } - - public void visit(EnclosedExpr n, LocalSymbolTable arg) { - printer.print("("); - n.getInner().accept(this, arg); - printer.print(")"); - } - - public void visit(FieldAccessExpr n, LocalSymbolTable arg) { - Expression scope = n.getScope(); - String field = n.getField(); - if (inConstructorBody && (scope instanceof ThisExpr)) { - printer.print(field); - } else if ("length".equals(field) && !(scope instanceof ThisExpr)) { - scope.accept(this, arg); - printer.print(".length"); - } else if ("MAX_VALUE".equals(field) - && "Integer".equals(scope.toString())) { - printer.print(cppTypes.maxInteger()); - } else { - String clazzName = classNameFromExpression(scope); - if (clazzName == null) { - if ("DocumentMode".equals(scope.toString())) { - // printer.print(cppTypes.documentModeType()); - // printer.print("."); - } else if ("creator".equals(scope.toString()) || "this.creator".equals(scope.toString())) { - scope.accept(this, arg); - printer.print("."); - } else { - scope.accept(this, arg); - printer.print("->"); - } - } else { - printer.print(cppTypes.classPrefix()); - printer.print(clazzName); - printer.print("::"); - if (symbolTable.isAttributeOrElementName(clazzName, field)) { - if ("AttributeName".equals(clazzName)) { - printer.print("ATTR_"); - } else if ("ElementName".equals(clazzName)) { - printer.print("ELT_"); - } - } - } - printer.print(field); - } - } - - public void visit(InstanceOfExpr n, LocalSymbolTable arg) { - n.getExpr().accept(this, arg); - printer.print(" instanceof "); - n.getType().accept(this, arg); - } - - public void visit(CharLiteralExpr n, LocalSymbolTable arg) { - printCharLiteral(n.getValue()); - } - - private void printCharLiteral(String val) { - if (val.length() != 1) { - printer.print("'"); - printer.print(val); - printer.print("'"); - return; - } - char c = val.charAt(0); - switch (c) { - case 0: - printer.print("'\\0'"); - break; - case '\n': - printer.print("'\\n'"); - break; - case '\t': - printer.print("'\\t'"); - break; - case 0xB: - printer.print("'\\v'"); - break; - case '\b': - printer.print("'\\b'"); - break; - case '\r': - printer.print("'\\r'"); - break; - case 0xC: - printer.print("'\\f'"); - break; - case 0x7: - printer.print("'\\a'"); - break; - case '\\': - printer.print("'\\\\'"); - break; - case '?': - printer.print("'\\?'"); - break; - case '\'': - printer.print("'\\''"); - break; - case '"': - printer.print("'\\\"'"); - break; - default: - if (c >= 0x20 && c <= 0x7F) { - printer.print("'" + c); - printer.print("'"); - } else { - printer.print("0x"); - printer.print(Integer.toHexString(c)); - } - break; - } - } - - public void visit(DoubleLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(IntegerLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(LongLiteralExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(IntegerLiteralMinValueExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(LongLiteralMinValueExpr n, LocalSymbolTable arg) { - printer.print(n.getValue()); - } - - public void visit(StringLiteralExpr n, LocalSymbolTable arg) { - String val = n.getValue(); - if ("http://www.w3.org/1999/xhtml".equals(val)) { - printer.print(cppTypes.xhtmlNamespaceLiteral()); - } else if ("http://www.w3.org/2000/svg".equals(val)) { - printer.print(cppTypes.svgNamespaceLiteral()); - } else if ("http://www.w3.org/2000/xmlns/".equals(val)) { - printer.print(cppTypes.xmlnsNamespaceLiteral()); - } else if ("http://www.w3.org/XML/1998/namespace".equals(val)) { - printer.print(cppTypes.xmlNamespaceLiteral()); - } else if ("http://www.w3.org/1999/xlink".equals(val)) { - printer.print(cppTypes.xlinkNamespaceLiteral()); - } else if ("http://www.w3.org/1998/Math/MathML".equals(val)) { - printer.print(cppTypes.mathmlNamespaceLiteral()); - } else if ("".equals(val) && "AttributeName".equals(javaClassName)) { - printer.print(cppTypes.noNamespaceLiteral()); - } else if (val.startsWith("-/") || val.startsWith("+//") - || val.startsWith("http://") || val.startsWith("XSLT")) { - printer.print(cppTypes.stringForLiteral(val)); - } else if (("hidden".equals(val) || "isindex".equals(val) - || "text/html".equals(val) - || "application/xhtml+xml".equals(val) || "content-type".equals(val)) - && "TreeBuilder".equals(javaClassName)) { - printer.print(cppTypes.stringForLiteral(val)); - } else if ("isQuirky".equals(currentMethod) && "html".equals(val)) { - printer.print(cppTypes.stringForLiteral(val)); - } else { - printer.print(cppTypes.localForLiteral(val)); - } - } - - public void visit(BooleanLiteralExpr n, LocalSymbolTable arg) { - if (n.getValue()) { - printer.print(cppTypes.trueLiteral()); - } else { - printer.print(cppTypes.falseLiteral()); - } - } - - public void visit(NullLiteralExpr n, LocalSymbolTable arg) { - printer.print(cppTypes.nullLiteral()); - } - - public void visit(ThisExpr n, LocalSymbolTable arg) { - if (n.getClassExpr() != null) { - n.getClassExpr().accept(this, arg); - printer.print("."); - } - printer.print("this"); - } - - public void visit(SuperExpr n, LocalSymbolTable arg) { - if (n.getClassExpr() != null) { - n.getClassExpr().accept(this, arg); - printer.print("."); - } - printer.print("super"); - } - - public void visit(MethodCallExpr n, LocalSymbolTable arg) { - if ("releaseArray".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".release()"); - } else if ("releaseString".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".Release()"); - } else if ("deleteArray".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - printer.print("delete[] "); - n.getArgs().get(0).accept(this, arg); - } else if ("delete".equals(n.getName()) - && "Portability".equals(n.getScope().toString())) { - printer.print("delete "); - n.getArgs().get(0).accept(this, arg); - } else if (("retainElement".equals(n.getName()) || "releaseElement".equals(n.getName())) - && "Portability".equals(n.getScope().toString())) { - // ignore for now - } else if ("transition".equals(n.getName()) - && n.getScope() == null) { - visitTransition(n, arg); - } else if ("arraycopy".equals(n.getName()) - && "System".equals(n.getScope().toString())) { - printer.print(cppTypes.arrayCopy()); - printer.print("("); - if (n.getArgs().get(0).toString().equals( - n.getArgs().get(2).toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(", "); - n.getArgs().get(1).accept(this, arg); - printer.print(", "); - n.getArgs().get(3).accept(this, arg); - printer.print(", "); - n.getArgs().get(4).accept(this, arg); - } else if (n.getArgs().get(1).toString().equals("0") - && n.getArgs().get(3).toString().equals("0")) { - n.getArgs().get(0).accept(this, arg); - printer.print(", "); - n.getArgs().get(2).accept(this, arg); - printer.print(", "); - n.getArgs().get(4).accept(this, arg); - } else { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } else if ("binarySearch".equals(n.getName()) - && "Arrays".equals(n.getScope().toString())) { - n.getArgs().get(0).accept(this, arg); - printer.print(".binarySearch("); - n.getArgs().get(1).accept(this, arg); - printer.print(")"); - } else { - Expression scope = n.getScope(); - if (scope != null) { - if (scope instanceof StringLiteralExpr) { - StringLiteralExpr strLit = (StringLiteralExpr) scope; - String str = strLit.getValue(); - if (!"toCharArray".equals(n.getName())) { - throw new IllegalStateException( - "Unsupported method call on string literal: " - + n.getName()); - } - printer.print("{ "); - for (int i = 0; i < str.length(); i++) { - char c = str.charAt(i); - if (i != 0) { - printer.print(", "); - } - printCharLiteral("" + c); - } - printer.print(" }"); - return; - } else { - String clazzName = classNameFromExpression(scope); - if (clazzName == null) { - scope.accept(this, arg); - if ("length".equals(n.getName()) - || "charAt".equals(n.getName()) - || "creator".equals(scope.toString())) { - printer.print("."); - } else { - printer.print("->"); - } - } else { - printer.print(cppTypes.classPrefix()); - printer.print(clazzName); - printer.print("::"); - } - } - } - printTypeArgs(n.getTypeArgs(), arg); - printer.print(n.getName()); - if ("stateLoop".equals(n.getName()) - && "Tokenizer".equals(javaClassName) - && cppTypes.stateLoopPolicies().length > 0) { - printer.print("<"); - printer.print(cppTypes.stateLoopPolicies()[stateLoopCallCount]); - printer.print(">"); - stateLoopCallCount++; - } - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } - } - - public void visit(ObjectCreationExpr n, LocalSymbolTable arg) { - if (n.getScope() != null) { - n.getScope().accept(this, arg); - printer.print("."); - } - - printer.print("new "); - - suppressPointer = true; - printTypeArgs(n.getTypeArgs(), arg); - n.getType().accept(this, arg); - suppressPointer = false; - - if ("AttributeName".equals(n.getType().getName())) { - List args = n.getArgs(); - while (args != null && args.size() > 3) { - args.remove(3); - } - } - - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - if (n.getAnonymousClassBody() != null) { - printer.printLn(" {"); - printer.indent(); - printMembers(n.getAnonymousClassBody(), arg); - printer.unindent(); - printer.print("}"); - } - } - - public void visit(UnaryExpr n, LocalSymbolTable arg) { - switch (n.getOperator()) { - case positive: - printer.print("+"); - break; - case negative: - printer.print("-"); - break; - case inverse: - printer.print("~"); - break; - case not: - printer.print("!"); - break; - case preIncrement: - printer.print("++"); - break; - case preDecrement: - printer.print("--"); - break; - } - - n.getExpr().accept(this, arg); - - switch (n.getOperator()) { - case posIncrement: - printer.print("++"); - break; - case posDecrement: - printer.print("--"); - break; - } - } - - public void visit(ConstructorDeclaration n, LocalSymbolTable arg) { - if ("TreeBuilder".equals(javaClassName)) { - return; - } - - arg = new LocalSymbolTable(javaClassName, symbolTable); - - // if (n.getJavaDoc() != null) { - // n.getJavaDoc().accept(this, arg); - // } - currentAnnotations = n.getAnnotations(); - - printModifiers(n.getModifiers()); - - printMethodNamespace(); - printConstructorExplicit(n.getParameters()); - printer.print(className); - currentAnnotations = null; - - printer.print("("); - if (n.getParameters() != null) { - for (Iterator i = n.getParameters().iterator(); i.hasNext();) { - Parameter p = i.next(); - p.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - printConstructorBody(n.getBlock(), arg); - } - - protected void printConstructorExplicit(List params) { - } - - protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { - inConstructorBody = true; - List statements = block.getStmts(); - List nonAssigns = new LinkedList(); - int i = 0; - boolean needOutdent = false; - for (Statement statement : statements) { - if (statement instanceof ExpressionStmt - && ((ExpressionStmt) statement).getExpression() instanceof AssignExpr) { - printer.printLn(); - if (i == 0) { - // : firstMember(arg) - printer.indent(); - printer.print(": "); - needOutdent = true; - } else { - // , secondMember(arg) - printer.print(", "); - } - statement.accept(this, arg); - i++; - } else { - nonAssigns.add(statement); - } - } - if (needOutdent) { - printer.unindent(); - } - inConstructorBody = false; - printer.printLn(); - printer.printLn("{"); - printer.indent(); - String boilerplate = cppTypes.constructorBoilerplate(className); - if (boilerplate != null) { - printer.printLn(boilerplate); - } - for (Statement statement : nonAssigns) { - statement.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - printer.printLn("}"); - printer.printLn(); - } - - public void visit(MethodDeclaration n, LocalSymbolTable arg) { - arg = new LocalSymbolTable(javaClassName, symbolTable); - if (isPrintableMethod(n.getModifiers()) - && !(n.getName().equals("endCoalescing") || n.getName().equals( - "startCoalescing"))) { - printMethodDeclaration(n, arg); - } - } - - private boolean isPrintableMethod(int modifiers) { - return !(ModifierSet.isAbstract(modifiers) || (ModifierSet.isProtected(modifiers) && !(ModifierSet.isFinal(modifiers) || "Tokenizer".equals(javaClassName)))); - } - - protected void printMethodDeclaration(MethodDeclaration n, - LocalSymbolTable arg) { - if (n.getName().startsWith("fatal") || n.getName().startsWith("err") - || n.getName().startsWith("warn") - || n.getName().startsWith("maybeErr") - || n.getName().startsWith("maybeWarn") - || n.getName().startsWith("note") - || "releaseArray".equals(n.getName()) - || "releaseString".equals(n.getName()) - || "deleteArray".equals(n.getName()) - || "delete".equals(n.getName())) { - return; - } - - currentMethod = n.getName(); - - destructor = "destructor".equals(currentMethod); - - // if (n.getJavaDoc() != null) { - // n.getJavaDoc().accept(this, arg); - // } - currentAnnotations = n.getAnnotations(); - boolean isInline = inline(); - if (isInline && !inHeader()) { - return; - } - - if (destructor) { - printModifiers(ModifierSet.PUBLIC); - } else { - printModifiers(n.getModifiers()); - } - - if ("stateLoop".equals(currentMethod) - && "Tokenizer".equals(javaClassName) - && cppTypes.stateLoopPolicies().length > 0) { - printer.print("template"); - if (inHeader()) { - printer.print(" "); - } else { - printer.printLn(); - } - } - - printTypeParameters(n.getTypeParameters(), arg); - if (n.getTypeParameters() != null) { - printer.print(" "); - } - if (!destructor) { - n.getType().accept(this, arg); - printer.print(" "); - } - printMethodNamespace(); - if (destructor) { - printer.print("~"); - printer.print(className); - } else { - printer.print(n.getName()); - } - - printer.print("("); - if (n.getParameters() != null) { - for (Iterator i = n.getParameters().iterator(); i.hasNext();) { - Parameter p = i.next(); - p.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - - for (int i = 0; i < n.getArrayCount(); i++) { - printer.print("[]"); - } - - if (override() && inHeader()) { - printer.print(" override"); - } - - currentAnnotations = null; - - if (inHeader() == isInline) { - printMethodBody(n.getBody(), arg); - } else { - printer.printLn(";"); - } - } - - private void printMethodBody(BlockStmt n, LocalSymbolTable arg) { - if (n == null) { - printer.print(";"); - } else { - printer.printLn(); - printer.printLn("{"); - printer.indent(); - if (destructor) { - String boilerplate = cppTypes.destructorBoilerplate(className); - if (boilerplate != null) { - printer.printLn(boilerplate); - } - } - if (n.getStmts() != null) { - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - } - printer.unindent(); - printer.print("}"); - } - printer.printLn(); - printer.printLn(); - } - - protected void printMethodNamespace() { - printer.printLn(); - printer.print(className); - printer.print("::"); - } - - public void visit(Parameter n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - - arg.putLocalType(n.getId().getName(), convertType(n.getType(), - n.getModifiers())); - - n.getType().accept(this, arg); - if (n.isVarArgs()) { - printer.print("..."); - } - printer.print(" "); - n.getId().accept(this, arg); - currentAnnotations = null; - } - - public void visit(ExplicitConstructorInvocationStmt n, LocalSymbolTable arg) { - if (n.isThis()) { - printTypeArgs(n.getTypeArgs(), arg); - printer.print("this"); - } else { - if (n.getExpr() != null) { - n.getExpr().accept(this, arg); - printer.print("."); - } - printTypeArgs(n.getTypeArgs(), arg); - printer.print("super"); - } - printer.print("("); - if (n.getArgs() != null) { - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(");"); - } - - public void visit(VariableDeclarationExpr n, LocalSymbolTable arg) { - currentAnnotations = n.getAnnotations(); - - arg.putLocalType(n.getVars().get(0).toString(), convertType( - n.getType(), n.getModifiers())); - - n.getType().accept(this, arg); - printer.print(" "); - - for (Iterator i = n.getVars().iterator(); i.hasNext();) { - VariableDeclarator v = i.next(); - v.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - currentAnnotations = null; - } - - public void visit(TypeDeclarationStmt n, LocalSymbolTable arg) { - n.getTypeDeclaration().accept(this, arg); - } - - public void visit(AssertStmt n, LocalSymbolTable arg) { - String message = null; - Expression msg = n.getMessage(); - boolean hasCheck = true; - if (msg != null) { - if (msg instanceof StringLiteralExpr) { - StringLiteralExpr sle = (StringLiteralExpr) msg; - message = sle.getValue(); - } else { - throw new RuntimeException("Bad assertion message."); - } - } - String macro = cppTypes.assertionMacro(); - if (message != null && message.startsWith("RELEASE: ")) { - message = message.substring("RELEASE: ".length()); - macro = cppTypes.releaseAssertionMacro(); - Expression check = n.getCheck(); - if (check instanceof BooleanLiteralExpr) { - BooleanLiteralExpr expr = (BooleanLiteralExpr) check; - if (!expr.getValue()) { - hasCheck = false; - macro = cppTypes.crashMacro(); - } - } - } - if (macro != null) { - printer.print(macro); - printer.print("("); - if (hasCheck) { - n.getCheck().accept(this, arg); - } - if (message != null) { - if (hasCheck) { - printer.print(", "); - } - printer.print("\""); - for (int i = 0; i < message.length(); i++) { - char c = message.charAt(i); - if (c == '"') { - printer.print("\""); - } else if (c >= ' ' && c <= '~') { - printer.print("" + c); - } else { - throw new RuntimeException("Bad assertion message string."); - } - } - printer.print("\""); - } - printer.print(");"); - } - } - - public void visit(BlockStmt n, LocalSymbolTable arg) { - printer.printLn("{"); - if (n.getStmts() != null) { - printer.indent(); - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - } - printer.print("}"); - - } - - public void visit(LabeledStmt n, LocalSymbolTable arg) { - // Only conditionless for loops are needed and supported - // Not implementing general Java continue semantics in order - // to keep the generated C++ more readable. - Statement stmt = n.getStmt(); - if (stmt instanceof ForStmt) { - ForStmt forLoop = (ForStmt) stmt; - if (!(forLoop.getInit() == null && forLoop.getCompare() == null && forLoop.getUpdate() == null)) { - forLoopsWithCondition.add(n.getLabel()); - } - } else { - throw new IllegalStateException( - "Only for loop supported as labeled statement. Line: " - + n.getBeginLine()); - } - String label = n.getLabel(); - if (labels.contains(label)) { - printer.unindent(); - printer.print(label); - printer.indent(); - printer.printLn(":"); - } - stmt.accept(this, arg); - printer.printLn(); - label += "_end"; - if (labels.contains(label)) { - printer.unindent(); - printer.print(label); - printer.indent(); - printer.print(":;"); - } - } - - public void visit(EmptyStmt n, LocalSymbolTable arg) { - printer.print(";"); - } - - public void visit(ExpressionStmt n, LocalSymbolTable arg) { - Expression e = n.getExpression(); - if (isCompletedCharacterReference(e)) { - printer.print(cppTypes.completedCharacterReference()); - printer.print(";"); - return; - } - boolean needsCondition = isTokenizerErrorReportingExpression(e); - if (!needsCondition && isDroppedExpression(e)) { - return; - } - if (needsCondition) { - printer.print("if ("); - printer.print(cppTypes.tokenizerErrorCondition()); - printer.printLn(") {"); - printer.indent(); - } - e.accept(this, arg); - if (!inConstructorBody) { - printer.print(";"); - } - if (needsCondition) { - printer.printLn(); - printer.unindent(); - printer.print("}"); - } - } - - private void visitTransition(MethodCallExpr call, LocalSymbolTable arg) { - List args = call.getArgs(); - if (reportTransitions) { - printer.print(cppTypes.transition()); - printer.print("("); - printer.print(cppTypes.firstTransitionArg()); - printer.print(", "); - args.get(1).accept(this, arg); - printer.print(", "); - args.get(2).accept(this, arg); - printer.print(", "); - args.get(3).accept(this, arg); - printer.print(")"); - } else { - args.get(1).accept(this, arg); - } - } - - private boolean isTokenizerErrorReportingExpression(Expression e) { - if (!reportTransitions) { - return false; - } - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (supportErrorReporting && !name.startsWith("errHtml4") - && ("stateLoop".equals(currentMethod)) - && (name.startsWith("err") || name.startsWith("maybeErr"))) { - return true; - } - } - return false; - } - - private boolean isCompletedCharacterReference(Expression e) { - if (!reportTransitions) { - return false; - } - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (name.equals("completedNamedCharacterReference")) { - return true; - } - } - return false; - } - - private boolean isDroppedExpression(Expression e) { - if (e instanceof MethodCallExpr) { - MethodCallExpr methodCallExpr = (MethodCallExpr) e; - String name = methodCallExpr.getName(); - if (name.startsWith("fatal") || name.startsWith("note") - || name.startsWith("errHtml4") || name.startsWith("warn") - || name.startsWith("maybeWarn")) { - return true; - } - if (supportErrorReporting - && ("stateLoop".equals(currentMethod) && !reportTransitions) - && (name.startsWith("err") || name.startsWith("maybeErr"))) { - return true; - } - if (name.equals("completedNamedCharacterReference") - && !reportTransitions) { - return true; - } - } - return false; - } - - public void visit(SwitchStmt n, LocalSymbolTable arg) { - printer.print("switch ("); - n.getSelector().accept(this, arg); - printer.printLn(") {"); - if (n.getEntries() != null) { - printer.indent(); - for (SwitchEntryStmt e : n.getEntries()) { - e.accept(this, arg); - } - printer.unindent(); - } - printer.print("}"); - - } - - public void visit(SwitchEntryStmt n, LocalSymbolTable arg) { - if (n.getLabel() != null) { - boolean isMenuitem = n.getLabel().toString().equals("MENUITEM"); - if (isMenuitem) { - printer.printWithoutIndent("#ifdef ENABLE_VOID_MENUITEM\n"); - } - printer.print("case "); - n.getLabel().accept(this, arg); - printer.print(":"); - if (isMenuitem) { - printer.printWithoutIndent("\n#endif"); - } - } else { - printer.print("default:"); - } - if (isNoStatement(n.getStmts())) { - printer.printLn(); - printer.indent(); - if (n.getLabel() == null) { - printer.printLn("; // fall through"); - } - printer.unindent(); - } else { - printer.printLn(" {"); - printer.indent(); - for (Statement s : n.getStmts()) { - s.accept(this, arg); - printer.printLn(); - } - printer.unindent(); - printer.printLn("}"); - } - } - - private boolean isNoStatement(List stmts) { - if (stmts == null) { - return true; - } - for (Statement statement : stmts) { - if (!isDroppableStatement(statement)) { - return false; - } - } - return true; - } - - private boolean isDroppableStatement(Statement statement) { - if (statement instanceof AssertStmt) { - return true; - } else if (statement instanceof ExpressionStmt) { - ExpressionStmt es = (ExpressionStmt) statement; - if (isDroppedExpression(es.getExpression())) { - return true; - } - } - return false; - } - - public void visit(BreakStmt n, LocalSymbolTable arg) { - if (n.getId() != null) { - printer.print(cppTypes.breakMacro()); - printer.print("("); - printer.print(n.getId()); - printer.print(")"); - } else { - printer.print("break"); - } - printer.print(";"); - } - - public void visit(ReturnStmt n, LocalSymbolTable arg) { - printer.print("return"); - if (n.getExpr() != null) { - printer.print(" "); - n.getExpr().accept(this, arg); - } - printer.print(";"); - } - - public void visit(EnumDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - printer.print("enum "); - printer.print(n.getName()); - - currentAnnotations = null; - - if (n.getImplements() != null) { - printer.print(" implements "); - for (Iterator i = n.getImplements().iterator(); i.hasNext();) { - ClassOrInterfaceType c = i.next(); - c.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - - printer.printLn(" {"); - printer.indent(); - if (n.getEntries() != null) { - printer.printLn(); - for (Iterator i = n.getEntries().iterator(); i.hasNext();) { - EnumConstantDeclaration e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - if (n.getMembers() != null) { - printer.printLn(";"); - printMembers(n.getMembers(), arg); - } else { - if (n.getEntries() != null) { - printer.printLn(); - } - } - printer.unindent(); - printer.print("}"); - } - - public void visit(EnumConstantDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printer.print(n.getName()); - - currentAnnotations = null; - - if (n.getArgs() != null) { - printer.print("("); - for (Iterator i = n.getArgs().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - printer.print(")"); - } - - if (n.getClassBody() != null) { - printer.printLn(" {"); - printer.indent(); - printMembers(n.getClassBody(), arg); - printer.unindent(); - printer.printLn("}"); - } - } - - public void visit(EmptyMemberDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - printer.print(";"); - } - - public void visit(InitializerDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - if (n.isStatic()) { - printer.print("static "); - } - n.getBlock().accept(this, arg); - } - - public void visit(IfStmt n, LocalSymbolTable arg) { - if (TranslatorUtils.isDocumentModeHandlerNullCheck(n.getCondition())) { - Statement then = n.getThenStmt(); - if (then instanceof BlockStmt) { - BlockStmt block = (BlockStmt) then; - List statements = block.getStmts(); - if (statements != null && statements.size() == 1) { - statements.get(0).accept(this, arg); - } else { - then.accept(this, arg); - } - } else { - then.accept(this, arg); - } - } else if (!TranslatorUtils.isErrorHandlerIf(n.getCondition(), supportErrorReporting)) { - if (TranslatorUtils.isErrorOnlyBlock(n.getThenStmt(), supportErrorReporting)) { - if (n.getElseStmt() != null - && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { - printer.print("if ("); - if (n.getCondition() instanceof BinaryExpr) { - BinaryExpr binExpr = (BinaryExpr) n.getCondition(); - switch (binExpr.getOperator()) { - case equals: - binExpr.getLeft().accept(this, arg); - printer.print(" != "); - binExpr.getRight().accept(this, arg); - break; - case notEquals: - binExpr.getLeft().accept(this, arg); - printer.print(" == "); - binExpr.getRight().accept(this, arg); - break; - default: - printer.print("!("); - formatCondition(n.getCondition(), arg); - printer.print(")"); - break; - } - } else { - printer.print("!("); - formatCondition(n.getCondition(), arg); - printer.print(")"); - } - printer.print(") "); - n.getElseStmt().accept(this, arg); - } - } else { - boolean unlikely = (currentMethod != null) - && (Arrays.binarySearch( - METHODS_WITH_UNLIKELY_CONDITIONS, - currentMethod) >= 0); - printer.print("if ("); - if (unlikely) { - printer.print(cppTypes.unlikely()); - printer.print("("); - } - formatCondition(n.getCondition(), arg); - if (unlikely) { - printer.print(")"); - } - printer.print(") "); - n.getThenStmt().accept(this, arg); - if (n.getElseStmt() != null - && !TranslatorUtils.isErrorOnlyBlock(n.getElseStmt(), supportErrorReporting)) { - printer.print(" else "); - n.getElseStmt().accept(this, arg); - } - } - } - } - - private void formatCondition(Expression expr, LocalSymbolTable arg) { - if (expr instanceof BinaryExpr) { - BinaryExpr binExpr = (BinaryExpr) expr; - switch (binExpr.getOperator()) { - case notEquals: - if (binExpr.getRight() instanceof NullLiteralExpr) { - binExpr.getLeft().accept(this, arg); - return; - } - break; - default: - break; - } - } - expr.accept(this, arg); - } - - - public void visit(WhileStmt n, LocalSymbolTable arg) { - printer.print("while ("); - n.getCondition().accept(this, arg); - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ContinueStmt n, LocalSymbolTable arg) { - // Not supporting the general Java continue semantics. - // Instead, making the generated code more readable for the - // case at hand. - if (n.getId() != null) { - printer.print(cppTypes.continueMacro()); - printer.print("("); - printer.print(n.getId()); - printer.print(")"); - if (forLoopsWithCondition.contains(n.getId())) { - throw new IllegalStateException( - "Continue attempted with a loop that has a condition. " - + className + " " + n.getId()); - } - } else { - printer.print("continue"); - } - printer.print(";"); - } - - public void visit(DoStmt n, LocalSymbolTable arg) { - printer.print("do "); - n.getBody().accept(this, arg); - printer.print(" while ("); - n.getCondition().accept(this, arg); - printer.print(");"); - } - - public void visit(ForeachStmt n, LocalSymbolTable arg) { - printer.print("for ("); - n.getVariable().accept(this, arg); - printer.print(" : "); - n.getIterable().accept(this, arg); - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ForStmt n, LocalSymbolTable arg) { - printer.print("for ("); - if (n.getInit() != null) { - for (Iterator i = n.getInit().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(";"); - if (n.getCompare() != null) { - printer.print(" "); - n.getCompare().accept(this, arg); - } - printer.print(";"); - if (n.getUpdate() != null) { - printer.print(" "); - for (Iterator i = n.getUpdate().iterator(); i.hasNext();) { - Expression e = i.next(); - e.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(") "); - n.getBody().accept(this, arg); - } - - public void visit(ThrowStmt n, LocalSymbolTable arg) { - printer.print("throw "); - n.getExpr().accept(this, arg); - printer.print(";"); - } - - public void visit(SynchronizedStmt n, LocalSymbolTable arg) { - printer.print("synchronized ("); - n.getExpr().accept(this, arg); - printer.print(") "); - n.getBlock().accept(this, arg); - } - - public void visit(TryStmt n, LocalSymbolTable arg) { - printer.print("try "); - n.getTryBlock().accept(this, arg); - if (n.getCatchs() != null) { - for (CatchClause c : n.getCatchs()) { - c.accept(this, arg); - } - } - if (n.getFinallyBlock() != null) { - printer.print(" finally "); - n.getFinallyBlock().accept(this, arg); - } - } - - public void visit(CatchClause n, LocalSymbolTable arg) { - printer.print(" catch ("); - n.getExcept().accept(this, arg); - printer.print(") "); - n.getCatchBlock().accept(this, arg); - - } - - public void visit(AnnotationDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - printer.print("@interface "); - printer.print(n.getName()); - currentAnnotations = null; - printer.printLn(" {"); - printer.indent(); - if (n.getMembers() != null) { - printMembers(n.getMembers(), arg); - } - printer.unindent(); - printer.print("}"); - } - - public void visit(AnnotationMemberDeclaration n, LocalSymbolTable arg) { - if (n.getJavaDoc() != null) { - n.getJavaDoc().accept(this, arg); - } - currentAnnotations = n.getAnnotations(); - // if (annotations != null) { - // for (AnnotationExpr a : annotations) { - // a.accept(this, arg); - // printer.printLn(); - // } - // } - printModifiers(n.getModifiers()); - - n.getType().accept(this, arg); - printer.print(" "); - printer.print(n.getName()); - currentAnnotations = null; - printer.print("()"); - if (n.getDefaultValue() != null) { - printer.print(" default "); - n.getDefaultValue().accept(this, arg); - } - printer.print(";"); - } - - public void visit(MarkerAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - } - - public void visit(SingleMemberAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - printer.print("("); - n.getMemberValue().accept(this, arg); - printer.print(")"); - } - - public void visit(NormalAnnotationExpr n, LocalSymbolTable arg) { - printer.print("@"); - n.getName().accept(this, arg); - printer.print("("); - if (n.getPairs() != null) { - for (Iterator i = n.getPairs().iterator(); i.hasNext();) { - MemberValuePair m = i.next(); - m.accept(this, arg); - if (i.hasNext()) { - printer.print(", "); - } - } - } - printer.print(")"); - } - - public void visit(MemberValuePair n, LocalSymbolTable arg) { - printer.print(n.getName()); - printer.print(" = "); - n.getValue().accept(this, arg); - } - - public void visit(LineComment n, LocalSymbolTable arg) { - printer.print("//"); - printer.printLn(n.getContent()); - } - - public void visit(BlockComment n, LocalSymbolTable arg) { - printer.print("/*"); - printer.print(n.getContent()); - printer.printLn("*/"); - } - - public void setLabels(Set labels) { - this.labels = labels; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java b/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java deleted file mode 100644 index 475a793b..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/GkAtomParser.java +++ /dev/null @@ -1,72 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class GkAtomParser { - - /* Please note we aren't looking for the following Atom definitions: - PseudoElementAtom or NonInheritingAnonBoxAtom or InheritingAnonBoxAtom */ - private static final Pattern ATOM = Pattern.compile("^Atom\\(\"([^,]+)\",\\s*\"([^\"]*)\"\\).*$"); - - private final BufferedReader reader; - - public GkAtomParser(Reader reader) { - this.reader = new BufferedReader(reader); - } - - public Map parse() throws IOException { - Map map = new HashMap(); - String line; - while((line = reader.readLine()) != null) { - Matcher m = ATOM.matcher(line.trim()); - if (m.matches()) { - map.put(m.group(2), m.group(1)); - } - } - return map; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java deleted file mode 100644 index 6161746d..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/HVisitor.java +++ /dev/null @@ -1,291 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.List; - -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.MethodDeclaration; -import japa.parser.ast.body.ModifierSet; -import japa.parser.ast.body.Parameter; -import japa.parser.ast.body.VariableDeclarator; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.type.PrimitiveType; -import japa.parser.ast.type.ReferenceType; - -public class HVisitor extends CppVisitor { - - private enum Visibility { - NONE, PRIVATE, PUBLIC, PROTECTED, - } - - private Visibility previousVisibility = Visibility.NONE; - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printMethodNamespace() - */ - @Override protected void printMethodNamespace() { - } - - public HVisitor(CppTypes cppTypes, SymbolTable symbolTable) { - super(cppTypes, symbolTable); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#startClassDeclaration() - */ - @Override protected void startClassDeclaration() { - printer.print("#ifndef "); - printer.print(className); - printer.printLn("_h"); - printer.print("#define "); - printer.print(className); - printer.printLn("_h"); - - printer.printLn(); - - String[] incs = cppTypes.boilerplateIncludes(javaClassName); - for (int i = 0; i < incs.length; i++) { - String inc = incs[i]; - if (className.equals(inc)) { - continue; - } - printer.print("#include \""); - printer.print(inc); - printer.printLn(".h\""); - } - - printer.printLn(); - - String[] forwDecls = cppTypes.boilerplateForwardDeclarations(); - for (int i = 0; i < forwDecls.length; i++) { - String decl = forwDecls[i]; - printer.print("class "); - printer.print(decl); - printer.printLn(";"); - } - - printer.printLn(); - - for (int i = 0; i < Main.H_LIST.length; i++) { - String klazz = Main.H_LIST[i]; - if (!(klazz.equals(javaClassName) || klazz.equals("StackNode"))) { - printer.print("class "); - printer.print(cppTypes.classPrefix()); - printer.print(klazz); - printer.printLn(";"); - } - } - - printer.printLn(); - - String[] otherDecls = cppTypes.boilerplateDeclarations(javaClassName); - for (int i = 0; i < otherDecls.length; i++) { - String decl = otherDecls[i]; - printer.printLn(decl); - } - - printer.printLn(); - - printer.print("class "); - printer.print(className); - if ("StateSnapshot".equals(javaClassName) || "TreeBuilder".equals(javaClassName)) { - printer.print(" : public "); - printer.print(cppTypes.treeBuilderStateInterface()); - } - printer.printLn(); - printer.printLn("{"); - printer.indent(); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#endClassDeclaration() - */ - @Override protected void endClassDeclaration() { - printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); - printer.printLn("void initializeStatics();"); - printModifiers(ModifierSet.PUBLIC | ModifierSet.STATIC); - printer.printLn("void releaseStatics();"); - - printer.unindent(); - - if (cppTypes.hasSupplement(javaClassName)) { - printer.printLn(); - printer.print("#include \""); - printer.print(className); - printer.printLn("HSupplement.h\""); - } - - printer.printLn("};"); - printer.printLn(); - printer.print("#endif"); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printModifiers(int) - */ - @Override protected void printModifiers(int modifiers) { - if (ModifierSet.isPrivate(modifiers)) { - if (previousVisibility != Visibility.PRIVATE) { - printer.unindent(); - printer.printLn("private:"); - printer.indent(); - previousVisibility = Visibility.PRIVATE; - } - } else if (ModifierSet.isProtected(modifiers)) { - if (previousVisibility != Visibility.PROTECTED) { - printer.unindent(); - printer.printLn("protected:"); - printer.indent(); - previousVisibility = Visibility.PROTECTED; - } - } else { - if (previousVisibility != Visibility.PUBLIC) { - printer.unindent(); - printer.printLn("public:"); - printer.indent(); - previousVisibility = Visibility.PUBLIC; - } - } - if (inline()) { - printer.print("inline "); - } - if (virtual()) { - printer.print("virtual "); - } - if (ModifierSet.isStatic(modifiers)) { - printer.print("static "); - } - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#fieldDeclaration(japa.parser.ast.body.FieldDeclaration, java.lang.LocalSymbolTable) - */ - @Override protected void fieldDeclaration(FieldDeclaration n, LocalSymbolTable arg) { - inField = true; - int modifiers = n.getModifiers(); - List variables = n.getVariables(); - VariableDeclarator declarator = variables.get(0); - if (ModifierSet.isStatic(modifiers) && ModifierSet.isFinal(modifiers) - && n.getType() instanceof PrimitiveType) { - PrimitiveType type = (PrimitiveType) n.getType(); - if (type.getType() != PrimitiveType.Primitive.Int) { - throw new IllegalStateException( - "Only int constant #defines supported."); - } - if (variables.size() != 1) { - throw new IllegalStateException( - "More than one variable declared by one declarator."); - } - printModifiers(modifiers); - printer.print("const "); - n.getType().accept(this, arg); - printer.print(" "); - declarator.getId().accept(this, arg); - printer.print(" = "); - declarator.getInit().accept(this, arg); - printer.printLn(";"); - printer.printLn(); - symbolTable.addPrimitiveConstant(javaClassName, declarator.getId().toString()); - } else { - if (n.getType() instanceof ReferenceType) { - ReferenceType rt = (ReferenceType) n.getType(); - currentArrayCount = rt.getArrayCount(); - if (currentArrayCount > 0 - && (rt.getType() instanceof PrimitiveType) && declarator.getInit() != null) { - if (!ModifierSet.isStatic(modifiers)) { - throw new IllegalStateException( - "Non-static array case not supported here." + declarator); - } - if (noLength()) { - inPrimitiveNoLengthFieldDeclarator = true; - } - } - } - printModifiers(modifiers); - inStatic = ModifierSet.isStatic(modifiers); - n.getType().accept(this, arg); - printer.print(" "); - if (ModifierSet.isStatic(modifiers)) { - if ("AttributeName".equals(n.getType().toString())) { - printer.print("ATTR_"); - } else if ("ElementName".equals(n.getType().toString())) { - printer.print("ELT_"); - } - } - declarator.getId().accept(this, arg); - printer.printLn(";"); - currentArrayCount = 0; - inStatic = false; - inPrimitiveNoLengthFieldDeclarator = false; - } - inField = false; - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorExplicit(java.util.List) - */ - @Override protected void printConstructorExplicit(List params) { - if (params != null && params.size() == 1) { - printer.print("explicit "); - } - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#printConstructorBody(japa.parser.ast.stmt.BlockStmt, java.lang.LocalSymbolTable) - */ - @Override protected void printConstructorBody(BlockStmt block, LocalSymbolTable arg) { - printer.printLn(";"); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#visit(japa.parser.ast.body.MethodDeclaration, java.lang.LocalSymbolTable) - */ - @Override public void visit(MethodDeclaration n, LocalSymbolTable arg) { - arg = new LocalSymbolTable(javaClassName, symbolTable); - printMethodDeclaration(n, arg); - } - - /** - * @see nu.validator.htmlparser.cpptranslate.CppVisitor#inHeader() - */ - @Override protected boolean inHeader() { - return true; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java deleted file mode 100644 index f27d465a..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LabelVisitor.java +++ /dev/null @@ -1,84 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.stmt.BreakStmt; -import japa.parser.ast.stmt.ContinueStmt; -import japa.parser.ast.visitor.VoidVisitorAdapter; - -import java.util.HashSet; -import java.util.Set; - -public class LabelVisitor extends VoidVisitorAdapter { - - private final Set labels = new HashSet(); - - public LabelVisitor() { - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.BreakStmt, java.lang.Object) - */ - @Override - public void visit(BreakStmt n, Object arg) { - String label = n.getId(); - if (label != null) { - labels.add(label + "_end"); - } - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.stmt.ContinueStmt, java.lang.Object) - */ - @Override - public void visit(ContinueStmt n, Object arg) { - String label = n.getId(); - if (label != null) { - labels.add(label); - } - } - - /** - * Returns the labels. - * - * @return the labels - */ - public Set getLabels() { - return labels; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java b/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java deleted file mode 100644 index e4030f43..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LicenseExtractor.java +++ /dev/null @@ -1,75 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; - -public class LicenseExtractor { - - private final Reader reader; - - public LicenseExtractor(File file) throws IOException { - this.reader = new InputStreamReader(new FileInputStream(file), "utf-8"); - } - - public String extract() throws IOException { - boolean prevWasAsterisk = false; - StringBuilder sb = new StringBuilder(); - int c; - while ((c = reader.read()) != -1) { - sb.append((char)c); - switch (c) { - case '*': - prevWasAsterisk = true; - continue; - case '/': - if (prevWasAsterisk) { - return sb.toString(); - } - default: - prevWasAsterisk = false; - continue; - } - } - return ""; - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java b/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java deleted file mode 100644 index a9375e88..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/LocalSymbolTable.java +++ /dev/null @@ -1,89 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.HashMap; -import java.util.Map; - -public class LocalSymbolTable { - - private final Map locals = new HashMap(); - - private final String javaClassName; - - private final SymbolTable delegate; - - /** - * @param javaClassName - * @param delegate - */ - public LocalSymbolTable(String javaClassName, SymbolTable delegate) { - this.javaClassName = javaClassName; - this.delegate = delegate; - } - - public void putLocalType(String name, Type type) { - locals.put(name, type); - } - - /** - * @param klazz - * @param variable - * @return - * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getFieldType(java.lang.String, java.lang.String) - */ - public Type getVariableType(String klazz, String variable) { - if (klazz == null) { - Type type = locals.get(variable); - if (type != null) { - return type; - } - } - return delegate.getFieldType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), variable); - } - - /** - * @param klazz may be null or "this" - * @param method - * @return - * @see nu.validator.htmlparser.cpptranslate.SymbolTable#getMethodReturnType(java.lang.String, java.lang.String) - */ - public Type getMethodReturnType(String klazz, String method) { - return delegate.getMethodReturnType(((klazz == null || "this".equals(klazz)) ? javaClassName : klazz), method); - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/Main.java b/translator-src/nu/validator/htmlparser/cpptranslate/Main.java deleted file mode 100644 index 741b7419..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/Main.java +++ /dev/null @@ -1,145 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; - -import japa.parser.JavaParser; -import japa.parser.ParseException; -import japa.parser.ast.CompilationUnit; - -public class Main { - - static final String[] H_LIST = { - "AttributeName", - "ElementName", - "Tokenizer", - "TreeBuilder", - "MetaScanner", - "StackNode", - "UTF16Buffer", - "StateSnapshot", - "Portability", - }; - - private static final String[] CPP_LIST = { - "AttributeName", - "ElementName", - "Tokenizer", - "TreeBuilder", - "MetaScanner", - "StackNode", - "UTF16Buffer", - "StateSnapshot", - }; - - /** - * @param args - * @throws ParseException - * @throws IOException - */ - public static void main(String[] args) throws ParseException, IOException { - CppTypes cppTypes = new CppTypes(new File(args[2]), new File(args[3])); - SymbolTable symbolTable = new SymbolTable(); - - File javaDirectory = new File(args[0]); - File targetDirectory = new File(args[1]); - File cppDirectory = targetDirectory; - File javaCopyDirectory = new File(targetDirectory, "javasrc"); - - for (int i = 0; i < H_LIST.length; i++) { - parseFile(cppTypes, javaDirectory, cppDirectory, H_LIST[i], ".h", new HVisitor(cppTypes, symbolTable)); - copyFile(new File(javaDirectory, H_LIST[i] + ".java"), new File(javaCopyDirectory, H_LIST[i] + ".java")); - } - for (int i = 0; i < CPP_LIST.length; i++) { - parseFile(cppTypes, javaDirectory, cppDirectory, CPP_LIST[i], ".cpp", new CppVisitor(cppTypes, symbolTable)); - } - cppTypes.finished(); - } - - private static void copyFile(File input, File output) throws IOException { - if (input.getCanonicalFile().equals(output.getCanonicalFile())) { - return; // files are the same! - } - // This is horribly inefficient, but perf is not really much of a concern here. - FileInputStream in = new FileInputStream(input); - FileOutputStream out = new FileOutputStream(output); - int b; - while ((b = in.read()) != -1) { - out.write(b); - } - out.flush(); - out.close(); - in.close(); - } - - private static void parseFile(CppTypes cppTypes, File javaDirectory, - File cppDirectory, String className, String fne, CppVisitor visitor) - throws FileNotFoundException, UnsupportedEncodingException, - IOException { - File file = null; - try { - file = new File(javaDirectory, className + ".java"); - String license = new LicenseExtractor(file).extract(); - CompilationUnit cu = JavaParser.parse(new NoCppInputStream( - new CppOnlyInputStream(new FileInputStream(file))), "utf-8"); - LabelVisitor labelVisitor = new LabelVisitor(); - cu.accept(labelVisitor, null); - visitor.setLabels(labelVisitor.getLabels()); - cu.accept(visitor, null); - FileOutputStream out = new FileOutputStream(new File(cppDirectory, - cppTypes.classPrefix() + className + fne)); - OutputStreamWriter w = new OutputStreamWriter(out, "utf-8"); - w.write(license); - w.write("\n\n/*\n * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.\n * Please edit " - + className + ".java instead and regenerate.\n */\n\n"); - w.write(visitor.getSource()); - w.close(); - } catch (ParseException e) { - System.err.println(file); - e.printStackTrace(); - } - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java b/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java deleted file mode 100644 index 86f9ae7f..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/NoCppInputStream.java +++ /dev/null @@ -1,86 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.IOException; -import java.io.InputStream; - -public class NoCppInputStream extends InputStream { - - private final static char[] START = "[NOCPP[".toCharArray(); - - private final static char[] END = "]NOCPP]".toCharArray(); - - private int state; - - private final InputStream delegate; - - - - /** - * @param delegate - */ - public NoCppInputStream(InputStream delegate) { - this.delegate = delegate; - this.state = 0; - } - - @Override public int read() throws IOException { - int c; - if (state == START.length) { - int endState = 0; - while (endState != END.length) { - c = delegate.read(); - if (END[endState] == c) { - endState++; - } else { - endState = 0; - } - } - state = 0; - } - c = delegate.read(); - if (START[state] == c) { - state++; - } else { - state = 0; - } - return c; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java b/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java deleted file mode 100644 index 305f516a..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/StringLiteralParser.java +++ /dev/null @@ -1,70 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class StringLiteralParser { - - private static final Pattern STRING_DECL = Pattern.compile("^.*\\(([^ ]+) = new nsString\\(\\)\\)->Assign\\(NS_LITERAL_STRING\\(\"([^\"]*)\"\\)\\);.*$"); - - private final BufferedReader reader; - - public StringLiteralParser(Reader reader) { - this.reader = new BufferedReader(reader); - } - - public Map parse() throws IOException { - Map map = new HashMap(); - String line; - while((line = reader.readLine()) != null) { - Matcher m = STRING_DECL.matcher(line); - if (m.matches()) { - map.put(m.group(2), m.group(1)); - } - } - return map; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java b/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java deleted file mode 100644 index e24247f7..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/StringPair.java +++ /dev/null @@ -1,73 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -public class StringPair { - - /** - * @param first - * @param second - */ - public StringPair(String first, String second) { - this.first = first; - this.second = second; - } - - private final String first; - - private final String second; - - /** - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override public boolean equals(Object o) { - if (o instanceof StringPair) { - StringPair other = (StringPair) o; - return first.equals(other.first) && second.equals(other.second); - } - return false; - } - - /** - * @see java.lang.Object#hashCode() - */ - @Override public int hashCode() { - return first.hashCode() ^ second.hashCode(); - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java b/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java deleted file mode 100644 index 09ba5a00..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTable.java +++ /dev/null @@ -1,93 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -public class SymbolTable { - - private final Set primitiveConstants = new HashSet(); - - private final Map fields = new HashMap(); - - private final Map methodReturns = new HashMap(); - - /** - * This is a sad hack to work around the fact the there's no real symbol - * table yet. - * - * @param field - * @return - */ - public boolean isAttributeOrElementName(String klazz, String field) { - if (isPrimitiveConstant(klazz, field)) { - return false; - } - return !("ATTRIBUTE_HASHES".equals(field) - || "ATTRIBUTE_NAMES".equals(field) - || "ELEMENT_HASHES".equals(field) - || "ELEMENT_NAMES".equals(field) || "ALL_NO_NS".equals(field)); - } - - public void addPrimitiveConstant(String klazz, String field) { - primitiveConstants.add(new StringPair(klazz, field)); - } - - public void putFieldType(String klazz, String field, Type type) { - fields.put(new StringPair(klazz, field), type); - } - - public void putMethodReturnType(String klazz, String method, Type type) { - methodReturns.put(new StringPair(klazz, method), type); - } - - public boolean isPrimitiveConstant(String klazz, String field) { - return primitiveConstants.contains(new StringPair(klazz, field)); - } - - public Type getFieldType(String klazz, String field) { - return fields.get(new StringPair(klazz, field)); - } - - public Type getMethodReturnType(String klazz, String method) { - return methodReturns.get(new StringPair(klazz, method)); - } -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java b/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java deleted file mode 100644 index 00f7c574..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/SymbolTableVisitor.java +++ /dev/null @@ -1,71 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.body.ClassOrInterfaceDeclaration; -import japa.parser.ast.body.FieldDeclaration; -import japa.parser.ast.body.MethodDeclaration; - -public class SymbolTableVisitor extends AnnotationHelperVisitor { - - private String javaClassName; - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.FieldDeclaration, java.lang.Object) - */ - @Override public void visit(FieldDeclaration n, SymbolTable arg) { - currentAnnotations = n.getAnnotations(); - arg.putFieldType(javaClassName, n.getVariables().get(0).getId().getName(), convertType(n.getType(), n.getModifiers())); - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.MethodDeclaration, java.lang.Object) - */ - @Override public void visit(MethodDeclaration n, SymbolTable arg) { - currentAnnotations = n.getAnnotations(); - arg.putMethodReturnType(javaClassName, n.getName(), convertType(n.getType(), n.getModifiers())); - } - - /** - * @see japa.parser.ast.visitor.VoidVisitorAdapter#visit(japa.parser.ast.body.ClassOrInterfaceDeclaration, java.lang.Object) - */ - @Override public void visit(ClassOrInterfaceDeclaration n, SymbolTable arg) { - javaClassName = n.getName(); - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java b/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java deleted file mode 100644 index 866db093..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/TranslatorUtils.java +++ /dev/null @@ -1,81 +0,0 @@ -package nu.validator.htmlparser.cpptranslate; - -import japa.parser.ast.expr.BinaryExpr; -import japa.parser.ast.expr.BinaryExpr.Operator; -import japa.parser.ast.expr.Expression; -import japa.parser.ast.expr.MethodCallExpr; -import japa.parser.ast.expr.NameExpr; -import japa.parser.ast.expr.NullLiteralExpr; -import japa.parser.ast.stmt.BlockStmt; -import japa.parser.ast.stmt.ExpressionStmt; -import japa.parser.ast.stmt.Statement; - -import java.util.List; - -public class TranslatorUtils { - public static boolean isErrorOnlyBlock(Statement elseStmt, boolean supportErrorReporting) { - if (supportErrorReporting) { - return false; - } - if (elseStmt instanceof BlockStmt) { - BlockStmt block = (BlockStmt) elseStmt; - List statements = block.getStmts(); - if (statements == null) { - return false; - } - if (statements.size() != 1) { - return false; - } - Statement statement = statements.get(0); - if (statement instanceof ExpressionStmt) { - ExpressionStmt exprStmt = (ExpressionStmt) statement; - Expression expr = exprStmt.getExpression(); - if (expr instanceof MethodCallExpr) { - MethodCallExpr call = (MethodCallExpr) expr; - if (call.getName().startsWith("err")) { - return true; - } - } - } - } - return false; - } - - public static boolean isErrorHandlerIf(Expression condition, boolean supportErrorReporting) { - if (supportErrorReporting) { - return false; - } - while (condition instanceof BinaryExpr) { - BinaryExpr binex = (BinaryExpr) condition; - condition = binex.getLeft(); - if (condition instanceof NameExpr) { - NameExpr name = (NameExpr) condition; - if ("errorHandler".equals(name.getName())) { - return true; - } - } - } - return false; - } - - public static boolean isDocumentModeHandlerNullCheck(Expression condition) { - if (condition instanceof BinaryExpr) { - BinaryExpr binex = (BinaryExpr) condition; - if (binex.getOperator() != Operator.notEquals) { - return false; - } - if (!(binex.getRight() instanceof NullLiteralExpr)) { - return false; - } - Expression left = binex.getLeft(); - if (left instanceof NameExpr) { - NameExpr name = (NameExpr) left; - if ("documentModeHandler".equals(name.getName())) { - return true; - } - } - } - return false; - } - -} diff --git a/translator-src/nu/validator/htmlparser/cpptranslate/Type.java b/translator-src/nu/validator/htmlparser/cpptranslate/Type.java deleted file mode 100644 index 783a3bbd..00000000 --- a/translator-src/nu/validator/htmlparser/cpptranslate/Type.java +++ /dev/null @@ -1,99 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2009 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.cpptranslate; - -public class Type { - - /** - * @param type - * @param arrayCount - * @param noLength - * @param modifiers - */ - public Type(String type, int arrayCount, boolean noLength, int modifiers) { - this.type = type; - this.arrayCount = arrayCount; - this.noLength = noLength; - this.modifiers = modifiers; - } - - private final String type; - - private final int arrayCount; - - private final boolean noLength; - - private final int modifiers; - - /** - * Returns the type. - * - * @return the type - */ - public String getType() { - return type; - } - - /** - * Returns the arrayCount. - * - * @return the arrayCount - */ - public int getArrayCount() { - return arrayCount; - } - - /** - * Returns the noLength. - * - * @return the noLength - */ - public boolean isNoLength() { - return noLength; - } - - /** - * Returns the modifiers. - * - * @return the modifiers - */ - public int getModifiers() { - return modifiers; - } - -} diff --git a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java b/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java deleted file mode 100644 index 69ddb318..00000000 --- a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharacters.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2008-2009 Mozilla Foundation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -package nu.validator.htmlparser.generator; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class GenerateNamedCharacters { - - private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - - private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); - - private static String toUString(int c) { - String hexString = Integer.toHexString(c); - switch (hexString.length()) { - case 1: - return "\\u000" + hexString; - case 2: - return "\\u00" + hexString; - case 3: - return "\\u0" + hexString; - case 4: - return "\\u" + hexString; - default: - throw new RuntimeException("Unreachable."); - } - } - - private static int charToIndex(char c) { - if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } - throw new IllegalArgumentException("Bad char in named character name: " - + c); - } - - private static boolean allZero(int[] arr) { - for (int i = 0; i < arr.length; i++) { - if (arr[i] != 0) { - return false; - } - } - return true; - } - - /** - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - TreeMap entities = new TreeMap(); - BufferedReader reader = new BufferedReader(new InputStreamReader( - System.in, "utf-8")); - String line; - while ((line = reader.readLine()) != null) { - Matcher m = LINE_PATTERN.matcher(line); - while (m.find()) { - String value; - if (m.group(3) != null) { - // two BMP chars - int firstIntVal = Integer.parseInt(m.group(2), 16); - int secondIntVal = Integer.parseInt(m.group(3), 16); - value = ("" + (char)firstIntVal) + (char)secondIntVal; - } else { - // one code point - int intVal = Integer.parseInt(m.group(2), 16); - if (intVal <= 0xFFFF) { - value = "" + (char)intVal; - } else { - int high = (LEAD_OFFSET + (intVal >> 10)); - int low = (0xDC00 + (intVal & 0x3FF)); - value = ("" + (char)high) + (char)low; - } - } - entities.put(m.group(1), value); - } - } - - // Java initializes arrays to zero. Zero is our magic value for no hilo - // value. - int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; - - String firstName = entities.entrySet().iterator().next().getKey(); - int firstKey = charToIndex(firstName.charAt(0)); - int secondKey = firstName.charAt(1); - int row = 0; - int lo = 0; - - System.out.print("static final @NoLength @CharacterName String[] NAMES = {\n"); - for (Map.Entry entity : entities.entrySet()) { - String name = entity.getKey(); - int newFirst = charToIndex(name.charAt(0)); - int newSecond = name.charAt(1); - assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; - if (firstKey != newFirst || secondKey != newSecond) { - hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; - lo = row; - firstKey = newFirst; - secondKey = newSecond; - } - System.out.print("\""); - System.out.print(name.substring(2)); - System.out.print("\",\n"); - row++; - } - System.out.print("};\n"); - - hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; - - System.out.print("static final @NoLength char[][] VALUES = {\n"); - for (Map.Entry entity : entities.entrySet()) { - String value = entity.getValue(); - System.out.print("{"); - if (value.length() == 1) { - char c = value.charAt(0); - if (c == '\'') { - System.out.print("\'\\\'\'"); - } else if (c == '\n') { - System.out.print("\'\\n\'"); - } else if (c == '\\') { - System.out.print("\'\\\\\'"); - } else if (c <= 0xFFFF) { - System.out.print("\'"); - System.out.print(toUString(c)); - System.out.print("\'"); - } - } else { - System.out.print("\'"); - System.out.print(toUString(value.charAt(0))); - System.out.print("\', \'"); - System.out.print(toUString(value.charAt(1))); - System.out.print("\'"); - } - System.out.print("},\n"); - } - System.out.print("};\n"); - - System.out.print("static final @NoLength int[][] HILO_ACCEL = {\n"); - for (int i = 0; i < hiLoTable.length; i++) { - if (allZero(hiLoTable[i])) { - System.out.print("null,\n"); - } else { - System.out.print("{"); - for (int j = 0; j < hiLoTable[i].length; j++) { - System.out.print(hiLoTable[i][j]); - System.out.print(", "); - } - System.out.print("},\n"); - } - } - System.out.print("};\n"); - } - -} diff --git a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java b/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java deleted file mode 100644 index ea9d47b1..00000000 --- a/translator-src/nu/validator/htmlparser/generator/GenerateNamedCharactersCpp.java +++ /dev/null @@ -1,579 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is HTML Parser C++ Translator code. - * - * The Initial Developer of the Original Code is - * Mozilla Foundation. - * Portions created by the Initial Developer are Copyright (C) 2008 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Henri Sivonen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -package nu.validator.htmlparser.generator; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.Map; -import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import nu.validator.htmlparser.cpptranslate.CppTypes; - -public class GenerateNamedCharactersCpp { - - /** - * The license for the output of this program except for data files. - */ - private static final String OUTPUT_LICENSE = "/*\n" - + " * Copyright (c) 2008-2010 Mozilla Foundation\n" - + " *\n" - + " * Permission is hereby granted, free of charge, to any person obtaining a \n" - + " * copy of this software and associated documentation files (the \"Software\"), \n" - + " * to deal in the Software without restriction, including without limitation \n" - + " * the rights to use, copy, modify, merge, publish, distribute, sublicense, \n" - + " * and/or sell copies of the Software, and to permit persons to whom the \n" - + " * Software is furnished to do so, subject to the following conditions:\n" - + " *\n" - + " * The above copyright notice and this permission notice shall be included in \n" - + " * all copies or substantial portions of the Software.\n" - + " *\n" - + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \n" - + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \n" - + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL \n" - + " * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \n" - + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING \n" - + " * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \n" - + " * DEALINGS IN THE SOFTWARE.\n" + " */\n\n"; - - /** - * The license for the generated data files. - */ - private static final String DATA_LICENSE = "/*\n" - + " * Copyright 2004-2010 Apple Computer, Inc., Mozilla Foundation, and Opera \n" - + " * Software ASA.\n" - + " * \n" - + " * You are granted a license to use, reproduce and create derivative works of \n" - + " * this document.\n" + " */\n\n"; - - private static final int LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - - private static final Pattern LINE_PATTERN = Pattern.compile(" ([^<]*) U\\+(\\S*) (?:U\\+(\\S*) )?"); - - private static String toHexString(int c) { - String hexString = Integer.toHexString(c); - switch (hexString.length()) { - case 1: - return "0x000" + hexString; - case 2: - return "0x00" + hexString; - case 3: - return "0x0" + hexString; - case 4: - return "0x" + hexString; - default: - throw new RuntimeException("Unreachable."); - } - } - - /** - * @param args - * @throws IOException - */ - public static void main(String[] args) throws IOException { - TreeMap entities = new TreeMap(); - BufferedReader reader = new BufferedReader(new InputStreamReader( - new FileInputStream(args[0]), "utf-8")); - String line; - while ((line = reader.readLine()) != null) { - Matcher m = LINE_PATTERN.matcher(line); - while (m.find()) { - String value; - if (m.group(3) != null) { - // two BMP chars - int firstIntVal = Integer.parseInt(m.group(2), 16); - int secondIntVal = Integer.parseInt(m.group(3), 16); - value = ("" + (char)firstIntVal) + (char)secondIntVal; - } else { - // one code point - int intVal = Integer.parseInt(m.group(2), 16); - if (intVal <= 0xFFFF) { - value = "" + (char)intVal; - } else { - int high = (LEAD_OFFSET + (intVal >> 10)); - int low = (0xDC00 + (intVal & 0x3FF)); - value = ("" + (char)high) + (char)low; - } - } - entities.put(m.group(1), value); - } - } - - CppTypes cppTypes = new CppTypes(null, null); - File targetDirectory = new File(args[1]); - - generateH(targetDirectory, cppTypes, entities); - generateInclude(targetDirectory, cppTypes, entities); - generateCpp(targetDirectory, cppTypes, entities); - generateAccelH(targetDirectory, cppTypes, entities); - generateAccelCpp(targetDirectory, cppTypes, entities); - } - - private static void generateAccelCpp(File targetDirectory, - CppTypes cppTypes, TreeMap entities) throws IOException { - String includeFile = cppTypes.classPrefix() - + "NamedCharactersInclude.h"; - File cppFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersAccel.cpp"); - Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), - "utf-8"); - - out.write(DATA_LICENSE); - out.write('\n'); - out.write("#include \"" + cppTypes.classPrefix() - + "NamedCharactersAccel.h\"\n"); - out.write("\n"); - - // Java initializes arrays to zero. Zero is our magic value for no hilo - // value. - int[][] hiLoTable = new int['z' + 1]['Z' - 'A' + 1 + 'z' - 'a' + 1]; - - String firstName = entities.entrySet().iterator().next().getKey(); - int firstKey = charToIndex(firstName.charAt(0)); - int secondKey = firstName.charAt(1); - int row = 0; - int lo = 0; - - for (Map.Entry entity : entities.entrySet()) { - String name = entity.getKey(); - int newFirst = charToIndex(name.charAt(0)); - int newSecond = name.charAt(1); - assert !(newFirst == 0 && newSecond == 0) : "Not prepared for name starting with AA"; - if (firstKey != newFirst || secondKey != newSecond) { - hiLoTable[secondKey][firstKey] = ((row - 1) << 16) | lo; - lo = row; - firstKey = newFirst; - secondKey = newSecond; - } - row++; - } - - hiLoTable[secondKey][firstKey] = ((entities.size() - 1) << 16) | lo; - - for (int i = 0; i < hiLoTable.length; i++) { - if (!allZero(hiLoTable[i])) { - out.write("static " + cppTypes.intType() + " const HILO_ACCEL_" - + i + "[] = {\n"); - for (int j = 0; j < hiLoTable[i].length; j++) { - if (j != 0) { - out.write(", "); - } - out.write("" + hiLoTable[i][j]); - } - out.write("\n};\n\n"); - } - } - - out.write("const int32_t* const " + cppTypes.classPrefix() - + "NamedCharactersAccel::HILO_ACCEL[] = {\n"); - for (int i = 0; i < hiLoTable.length; i++) { - if (i != 0) { - out.write(",\n"); - } - if (allZero(hiLoTable[i])) { - out.write(" 0"); - } else { - out.write(" HILO_ACCEL_" + i); - } - } - out.write("\n};\n\n"); - - out.flush(); - out.close(); - } - - private static void generateAccelH(File targetDirectory, CppTypes cppTypes, - TreeMap entities) throws IOException { - File hFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersAccel.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(hFile), - "utf-8"); - out.write(DATA_LICENSE); - out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); - out.write("#define " + cppTypes.classPrefix() + "NamedCharactersAccel_h\n"); - out.write('\n'); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write('\n'); - - out.write("class " + cppTypes.classPrefix() + "NamedCharactersAccel\n"); - out.write("{\n"); - out.write(" public:\n"); - out.write(" static const " + cppTypes.intType() - + "* const HILO_ACCEL[];\n"); - out.write("};\n"); - - out.write("\n#endif // " + cppTypes.classPrefix() - + "NamedCharactersAccel_h\n"); - out.flush(); - out.close(); - } - - private static void generateH(File targetDirectory, CppTypes cppTypes, - Map entities) throws IOException { - File hFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharacters.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(hFile), - "utf-8"); - out.write(OUTPUT_LICENSE); - out.write("#ifndef " + cppTypes.classPrefix() + "NamedCharacters_h\n"); - out.write("#define " + cppTypes.classPrefix() + "NamedCharacters_h\n"); - out.write('\n'); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write("\nstruct "); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write(" {\n "); - out.write(cppTypes.unsignedShortType()); - out.write(" nameStart;\n "); - out.write(cppTypes.unsignedShortType()); - out.write(" nameLen;\n #ifdef DEBUG\n "); - out.write(cppTypes.intType()); - out.write(" n;\n #endif\n "); - out.write(cppTypes.intType()); - out.write(" length() const;\n "); - out.write(cppTypes.charType()); - out.write(" charAt("); - out.write(cppTypes.intType()); - out.write(" index) const;\n};\n\n"); - - out.write("class " + cppTypes.classPrefix() + "NamedCharacters\n"); - out.write("{\n"); - out.write(" public:\n"); - out.write(" static const " + cppTypes.characterNameTypeDeclaration() + " NAMES[];\n"); - out.write(" static const " + cppTypes.charType() + " VALUES[][2];\n"); - out.write(" static " + cppTypes.charType() + "** WINDOWS_1252;\n"); - out.write(" static void initializeStatics();\n"); - out.write(" static void releaseStatics();\n"); - out.write("};\n"); - - out.write("\n#endif // " + cppTypes.classPrefix() - + "NamedCharacters_h\n"); - out.flush(); - out.close(); - } - - private static void generateInclude(File targetDirectory, - CppTypes cppTypes, Map entities) throws IOException { - File includeFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharactersInclude.h"); - Writer out = new OutputStreamWriter(new FileOutputStream(includeFile), - "utf-8"); - - out.write(DATA_LICENSE); - out.write("/* Data generated from the table of named character references found at\n"); - out.write(" *\n"); - out.write(" * http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html#named-character-references\n"); - out.write(" *\n"); - out.write(" * Files that #include this file must #define NAMED_CHARACTER_REFERENCE as a\n"); - out.write(" * macro of four parameters:\n"); - out.write(" *\n"); - out.write(" * 1. a unique integer N identifying the Nth [0,1,..] macro expansion in this file,\n"); - out.write(" * 2. a comma-separated sequence of characters comprising the character name,\n"); - out.write(" * without the first two letters or 0 if the sequence would be empty. \n"); - out.write(" * See Tokenizer.java.\n"); - out.write(" * 3. the length of this sequence of characters,\n"); - out.write(" * 4. placeholder flag (0 if argument #is not a placeholder and 1 if it is),\n"); - out.write(" * 5. a comma-separated sequence of char16_t literals corresponding\n"); - out.write(" * to the code-point(s) of the named character.\n"); - out.write(" *\n"); - out.write(" * The macro expansion doesn't have to refer to all or any of these parameters,\n"); - out.write(" * but common sense dictates that it should involve at least one of them.\n"); - out.write(" */\n"); - out.write("\n"); - out.write("// This #define allows the NAMED_CHARACTER_REFERENCE macro to accept comma-\n"); - out.write("// separated sequences as single macro arguments. Using commas directly would\n"); - out.write("// split the sequence into multiple macro arguments.\n"); - out.write("#define _ ,\n"); - out.write("\n"); - - int i = 0; - for (Map.Entry entity : entities.entrySet()) { - out.write("NAMED_CHARACTER_REFERENCE(" + i++ + ", "); - String name = entity.getKey(); - writeNameInitializer(out, name, " _ "); - out.write(", " + (name.length() - 2) + ", "); - out.write((name.length() == 2 ? "1" : "0") + ", "); - writeValueInitializer(out, entity.getValue(), " _ "); - out.write(")\n"); - } - - out.write("\n"); - out.write("#undef _\n"); - - out.flush(); - out.close(); - } - - private static void writeNameInitializer(Writer out, - String name, String separator) - throws IOException { - out.write("/* " + name.charAt(0) + " " + name.charAt(1) + " */ "); - if (name.length() == 2) { - out.write("0"); - } else { - for (int i = 2; i < name.length(); i++) { - out.write("'" + name.charAt(i) + "'"); - if (i < name.length() - 1) - out.write(separator); - } - } - } - - private static void writeValueInitializer(Writer out, - String value, String separator) - throws IOException { - if (value.length() == 1) { - out.write(toHexString(value.charAt(0))); - out.write(separator); - out.write("0"); - } else { - out.write(toHexString(value.charAt(0))); - out.write(separator); - out.write(toHexString(value.charAt(1))); - } - } - - private static void defineMacroAndInclude(Writer out, String expansion, - String includeFile) throws IOException { - out.write("#define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + expansion + "\n"); - out.write("#include \"" + includeFile + "\"\n"); - out.write("#undef NAMED_CHARACTER_REFERENCE\n"); - } - - private static void defineMacroAndInclude(Writer out, String expansion, - String debugExpansion, String includeFile) throws IOException { - out.write("#ifdef DEBUG\n"); - out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + debugExpansion + "\n"); - out.write("#else\n"); - out.write(" #define NAMED_CHARACTER_REFERENCE(N, CHARS, LEN, FLAG, VALUE) \\\n" - + expansion + "\n"); - out.write("#endif\n"); - out.write("#include \"" + includeFile + "\"\n"); - out.write("#undef NAMED_CHARACTER_REFERENCE\n"); - } - - private static void writeStaticMemberDeclaration(Writer out, - CppTypes cppTypes, String type, String name) throws IOException { - out.write(type + " " + cppTypes.classPrefix() + "NamedCharacters::" - + name + ";\n"); - } - - private static int charToIndex(char c) { - if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } - throw new IllegalArgumentException("Bad char in named character name: " - + c); - } - - private static boolean allZero(int[] arr) { - for (int i = 0; i < arr.length; i++) { - if (arr[i] != 0) { - return false; - } - } - return true; - } - - private static void generateCpp(File targetDirectory, CppTypes cppTypes, - Map entities) throws IOException { - String includeFile = cppTypes.classPrefix() - + "NamedCharactersInclude.h"; - File cppFile = new File(targetDirectory, cppTypes.classPrefix() - + "NamedCharacters.cpp"); - Writer out = new OutputStreamWriter(new FileOutputStream(cppFile), - "utf-8"); - - out.write(OUTPUT_LICENSE); - out.write("#define " + cppTypes.classPrefix() - + "NamedCharacters_cpp_\n"); - - String[] includes = cppTypes.namedCharactersIncludes(); - for (int i = 0; i < includes.length; i++) { - String include = includes[i]; - out.write("#include \"" + include + ".h\"\n"); - } - - out.write('\n'); - out.write("#include \"" + cppTypes.classPrefix() - + "NamedCharacters.h\"\n"); - out.write("\n"); - - out.write("const " + cppTypes.charType() + " " + cppTypes.classPrefix() - + "NamedCharacters::VALUES[][2] = {\n"); - defineMacroAndInclude(out, "{ VALUE },", includeFile); - // The useless terminator entry makes the above macro simpler with - // compilers that whine about a comma after the last item - out.write("{0, 0} };\n\n"); - - String staticMemberType = cppTypes.charType() + "**"; - writeStaticMemberDeclaration(out, cppTypes, staticMemberType, - "WINDOWS_1252"); - - out.write("static " + cppTypes.charType() - + " const WINDOWS_1252_DATA[] = {\n"); - out.write(" 0x20AC,\n"); - out.write(" 0x0081,\n"); - out.write(" 0x201A,\n"); - out.write(" 0x0192,\n"); - out.write(" 0x201E,\n"); - out.write(" 0x2026,\n"); - out.write(" 0x2020,\n"); - out.write(" 0x2021,\n"); - out.write(" 0x02C6,\n"); - out.write(" 0x2030,\n"); - out.write(" 0x0160,\n"); - out.write(" 0x2039,\n"); - out.write(" 0x0152,\n"); - out.write(" 0x008D,\n"); - out.write(" 0x017D,\n"); - out.write(" 0x008F,\n"); - out.write(" 0x0090,\n"); - out.write(" 0x2018,\n"); - out.write(" 0x2019,\n"); - out.write(" 0x201C,\n"); - out.write(" 0x201D,\n"); - out.write(" 0x2022,\n"); - out.write(" 0x2013,\n"); - out.write(" 0x2014,\n"); - out.write(" 0x02DC,\n"); - out.write(" 0x2122,\n"); - out.write(" 0x0161,\n"); - out.write(" 0x203A,\n"); - out.write(" 0x0153,\n"); - out.write(" 0x009D,\n"); - out.write(" 0x017E,\n"); - out.write(" 0x0178\n"); - out.write("};\n\n"); - - out.write("/**\n"); - out.write(" * To avoid having lots of pointers in the |charData| array, below,\n"); - out.write(" * which would cause us to have to do lots of relocations at library\n"); - out.write(" * load time, store all the string data for the names in one big array.\n"); - out.write(" * Then use tricks with enums to help us build an array that contains\n"); - out.write(" * the positions of each within the big arrays.\n"); - out.write(" */\n\n"); - - out.write("static const " + cppTypes.byteType() + " ALL_NAMES[] = {\n"); - - defineMacroAndInclude(out, "CHARS ,", includeFile); - - out.write("};\n\n"); - - out.write("enum NamePositions {\n"); - out.write(" DUMMY_INITIAL_NAME_POSITION = 0,\n"); - - out.write("/* enums don't take up space, so generate _START and _END */\n"); - defineMacroAndInclude(out, - "NAME_##N##_DUMMY, /* automatically one higher than previous */ \\\n" - + "NAME_##N##_START = NAME_##N##_DUMMY - 1, \\\n" - + "NAME_##N##_END = NAME_##N##_START + LEN + FLAG,", - includeFile); - - out.write(" DUMMY_FINAL_NAME_VALUE\n"); - out.write("};\n\n"); - - String arrayLengthMacro = cppTypes.arrayLengthMacro(); - String staticAssert = cppTypes.staticAssert(); - if (staticAssert != null && arrayLengthMacro != null) { - out.write(staticAssert + "(" + arrayLengthMacro - + "(ALL_NAMES) < 0x10000, \"Start positions should fit in 16 bits\");\n\n"); - } - - out.write("const " + cppTypes.characterNameTypeDeclaration() + " " + cppTypes.classPrefix() - + "NamedCharacters::NAMES[] = {\n"); - defineMacroAndInclude(out, "{ NAME_##N##_START, LEN, },", "{ NAME_##N##_START, LEN, N },", includeFile); - out.write("};\n\n"); - - out.write(cppTypes.intType()); - out.write("\n"); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write("::length() const\n{\n return nameLen;\n}\n\n"); - out.write(cppTypes.charType()); - out.write("\n"); - out.write(cppTypes.characterNameTypeDeclaration()); - out.write("::charAt("); - out.write("int32_t"); - out.write(" index) const\n{\n return static_cast<"); - out.write(cppTypes.charType()); - out.write("> (ALL_NAMES[nameStart + index]);\n}\n\n"); - - out.write("void\n"); - out.write(cppTypes.classPrefix() - + "NamedCharacters::initializeStatics()\n"); - out.write("{\n"); - out.write(" WINDOWS_1252 = new " + cppTypes.charType() + "*[32];\n"); - out.write(" for (" + cppTypes.intType() + " i = 0; i < 32; ++i) {\n"); - out.write(" WINDOWS_1252[i] = (" + cppTypes.charType() - + "*)&(WINDOWS_1252_DATA[i]);\n"); - out.write(" }\n"); - out.write("}\n"); - out.write("\n"); - - out.write("void\n"); - out.write(cppTypes.classPrefix() - + "NamedCharacters::releaseStatics()\n"); - out.write("{\n"); - out.write(" delete[] WINDOWS_1252;\n"); - out.write("}\n"); - out.flush(); - out.close(); - } -} diff --git a/xom/pom.xml b/xom/pom.xml new file mode 100644 index 00000000..4f3f6521 --- /dev/null +++ b/xom/pom.xml @@ -0,0 +1,53 @@ + + + 4.0.0 + + + nu.validator.htmlparser + parent + 2.0 + + + xom + + xom + + + + nu.validator.htmlparser + htmlparser + + + xom + xom + 1.3.5 + + + xml-apis + xml-apis + + + + + diff --git a/xom/src/main/java/module-info.java b/xom/src/main/java/module-info.java new file mode 100644 index 00000000..b42cd04f --- /dev/null +++ b/xom/src/main/java/module-info.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Anthony Vanelverdinghe + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * Provides an implementation of the HTML5 parsing algorithm in Java for applications. + * The parser is designed to work as a drop-in replacement for the XML parser in applications + * that already support XHTML 1.x content with an XML parser and use XOM to interface with the parser. + */ +@SuppressWarnings({"requires-automatic", "requires-transitive-automatic"}) +module nu.validator.htmlparser.xom { + requires transitive java.xml; + requires transitive nu.xom; + requires transitive nu.validator.htmlparser; + + exports nu.validator.htmlparser.xom; +} diff --git a/src/nu/validator/htmlparser/xom/FormPointer.java b/xom/src/main/java/nu/validator/htmlparser/xom/FormPointer.java similarity index 100% rename from src/nu/validator/htmlparser/xom/FormPointer.java rename to xom/src/main/java/nu/validator/htmlparser/xom/FormPointer.java diff --git a/src/nu/validator/htmlparser/xom/FormPtrElement.java b/xom/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java similarity index 100% rename from src/nu/validator/htmlparser/xom/FormPtrElement.java rename to xom/src/main/java/nu/validator/htmlparser/xom/FormPtrElement.java diff --git a/src/nu/validator/htmlparser/xom/HtmlBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java similarity index 96% rename from src/nu/validator/htmlparser/xom/HtmlBuilder.java rename to xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java index d5884723..2dcb1d76 100644 --- a/src/nu/validator/htmlparser/xom/HtmlBuilder.java +++ b/xom/src/main/java/nu/validator/htmlparser/xom/HtmlBuilder.java @@ -233,7 +233,7 @@ private void tokenize(InputSource is) throws ParsingException, IOException, * @param is the InputSource * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Document build(InputSource is) throws ParsingException, IOException { lazyInit(); @@ -249,7 +249,7 @@ public Document build(InputSource is) throws ParsingException, IOException { * @param context the name of the context element (HTML namespace assumed) * @return the fragment * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Nodes buildFragment(InputSource is, String context) throws IOException, ParsingException { @@ -263,10 +263,10 @@ public Nodes buildFragment(InputSource is, String context) * Parse a fragment from SAX InputSource. * @param is the InputSource * @param contextLocal the local name of the context element - * @parem contextNamespace the namespace of the context element + * @param contextNamespace the namespace of the context element * @return the fragment * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong */ public Nodes buildFragment(InputSource is, String contextLocal, String contextNamespace) throws IOException, ParsingException { @@ -281,7 +281,7 @@ public Nodes buildFragment(InputSource is, String contextLocal, String contextNa * @param file the file * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.File) */ @Override @@ -296,7 +296,7 @@ public Document build(File file) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.InputStream, java.lang.String) */ @Override @@ -312,7 +312,7 @@ public Document build(InputStream stream, String uri) * @param stream the stream * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.InputStream) */ @Override @@ -327,7 +327,7 @@ public Document build(InputStream stream) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.Reader, java.lang.String) */ @Override @@ -343,7 +343,7 @@ public Document build(Reader stream, String uri) throws ParsingException, * @param stream the reader * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.io.Reader) */ @Override @@ -358,7 +358,7 @@ public Document build(Reader stream) throws ParsingException, * @param uri the base URI * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.lang.String, java.lang.String) */ @Override @@ -372,7 +372,7 @@ public Document build(String content, String uri) throws ParsingException, * @param uri the URI of the document * @return the document * @throws ParsingException in case of an XML violation - * @throws IOException if IO goes wrang + * @throws IOException if IO goes wrong * @see nu.xom.Builder#build(java.lang.String) */ @Override @@ -412,7 +412,7 @@ public void setTransitionHander(TransitionHandler handler) { /** * Indicates whether NFC normalization of source is being checked. * @return true if NFC normalization of source is being checked. - * @see nu.validator.htmlparser.impl.Tokenizer#isCheckingNormalization() + * @see nu.validator.htmlparser.io.Driver#isCheckingNormalization() */ public boolean isCheckingNormalization() { return checkingNormalization; @@ -421,7 +421,7 @@ public boolean isCheckingNormalization() { /** * Toggles the checking of the NFC normalization of source. * @param enable true to check normalization - * @see nu.validator.htmlparser.impl.Tokenizer#setCheckingNormalization(boolean) + * @see nu.validator.htmlparser.io.Driver#setCheckingNormalization(boolean) */ public void setCheckingNormalization(boolean enable) { this.checkingNormalization = enable; @@ -646,7 +646,7 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { * Sets the encoding sniffing heuristics. * * @param heuristics the heuristics to set - * @see nu.validator.htmlparser.impl.Tokenizer#setHeuristics(nu.validator.htmlparser.common.Heuristics) + * @see nu.validator.htmlparser.io.Driver#setHeuristics(nu.validator.htmlparser.common.Heuristics) */ public void setHeuristics(Heuristics heuristics) { this.heuristics = heuristics; @@ -687,6 +687,7 @@ public XmlViolationPolicy getNamePolicy() { * Does nothing. * @deprecated */ + @Deprecated public void setBogusXmlnsPolicy( XmlViolationPolicy bogusXmlnsPolicy) { } @@ -696,6 +697,7 @@ public void setBogusXmlnsPolicy( * @deprecated * @return XmlViolationPolicy.ALTER_INFOSET */ + @Deprecated public XmlViolationPolicy getBogusXmlnsPolicy() { return XmlViolationPolicy.ALTER_INFOSET; } diff --git a/src/nu/validator/htmlparser/xom/ModalDocument.java b/xom/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java similarity index 100% rename from src/nu/validator/htmlparser/xom/ModalDocument.java rename to xom/src/main/java/nu/validator/htmlparser/xom/ModalDocument.java diff --git a/src/nu/validator/htmlparser/xom/Mode.java b/xom/src/main/java/nu/validator/htmlparser/xom/Mode.java similarity index 100% rename from src/nu/validator/htmlparser/xom/Mode.java rename to xom/src/main/java/nu/validator/htmlparser/xom/Mode.java diff --git a/src/nu/validator/htmlparser/xom/SimpleNodeFactory.java b/xom/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java similarity index 100% rename from src/nu/validator/htmlparser/xom/SimpleNodeFactory.java rename to xom/src/main/java/nu/validator/htmlparser/xom/SimpleNodeFactory.java diff --git a/src/nu/validator/htmlparser/xom/XOMTreeBuilder.java b/xom/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java similarity index 100% rename from src/nu/validator/htmlparser/xom/XOMTreeBuilder.java rename to xom/src/main/java/nu/validator/htmlparser/xom/XOMTreeBuilder.java diff --git a/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java b/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java new file mode 100644 index 00000000..6fde4174 --- /dev/null +++ b/xom/src/main/java/nu/validator/htmlparser/xom/package-info.java @@ -0,0 +1,26 @@ +/* + Copyright (c) 2007 Henri Sivonen + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + */ + +/** + * This package provides an HTML5 parser that exposes the document through the XOM API. + */ +package nu.validator.htmlparser.xom; diff --git a/test-src/nu/validator/htmlparser/test/XomTest.java b/xom/src/test/java/nu/validator/htmlparser/test/XomTest.java similarity index 100% rename from test-src/nu/validator/htmlparser/test/XomTest.java rename to xom/src/test/java/nu/validator/htmlparser/test/XomTest.java