@@ -1946,8 +1946,20 @@ Token DocHtmlRow::parse()
19461946
19471947 // get next token
19481948 Token tok=parser ()->tokenizer .lex ();
1949- // skip whitespace
1950- while (tok.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA)) tok=parser ()->tokenizer .lex ();
1949+ // skip whitespace and tbody, thead and tfoot tags
1950+ while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA,TokenRetval::TK_HTMLTAG))
1951+ {
1952+ if (retval.is (TokenRetval::TK_HTMLTAG))
1953+ {
1954+ HtmlTagType tagId=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
1955+ if ((tagId==HtmlTagType::HTML_TBODY) || (tagId==HtmlTagType::HTML_THEAD) || (tagId==HtmlTagType::HTML_TFOOT)) retval=parser ()->tokenizer .lex ();
1956+ else break ;
1957+ }
1958+ else
1959+ {
1960+ retval=parser ()->tokenizer .lex ();
1961+ }
1962+ }
19511963 // should find a html tag now
19521964 if (tok.is (TokenRetval::TK_HTMLTAG))
19531965 {
@@ -1996,8 +2008,20 @@ Token DocHtmlRow::parse()
19962008 {
19972009 // get next token
19982010 retval=parser ()->tokenizer .lex ();
1999- // skip whitespace after </td> or </th>
2000- while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA)) retval=parser ()->tokenizer .lex ();
2011+ // skip whitespace and tbody, thead and tfoot tags after </td> or </th>
2012+ while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA,TokenRetval::TK_HTMLTAG))
2013+ {
2014+ if (retval.is (TokenRetval::TK_HTMLTAG))
2015+ {
2016+ HtmlTagType tagId1=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
2017+ if ((tagId1==HtmlTagType::HTML_TBODY) || (tagId1==HtmlTagType::HTML_THEAD) || (tagId1==HtmlTagType::HTML_TFOOT)) retval=parser ()->tokenizer .lex ();
2018+ else break ;
2019+ }
2020+ else
2021+ {
2022+ retval=parser ()->tokenizer .lex ();
2023+ }
2024+ }
20012025 // printf("DocHtmlRow:retval= next=%s name=%s endTag=%d\n",retval.to_string(),qPrint(parser()->context.token->name),parser()->context.token->endTag);
20022026 HtmlTagType tagId=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
20032027 if (tok.is (TokenRetval::TK_HTMLTAG))
@@ -2135,25 +2159,25 @@ Token DocHtmlTable::parse()
21352159getrow:
21362160 // get next token
21372161 Token tok=parser ()->tokenizer .lex ();
2138- // skip whitespace
2139- while (tok.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA)) tok=parser ()->tokenizer .lex ();
2140- // should find a html tag now
2141- if (tok.is (TokenRetval::TK_HTMLTAG))
2162+ // skip whitespace and tbody, thead and tfoot tags
2163+ while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA,TokenRetval::TK_HTMLTAG))
21422164 {
2143- HtmlTagType tagId=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
2144- if (tagId==HtmlTagType::HTML_THEAD && !parser ()->context .token ->endTag ) // found <thead> tag
2145- {
2146- goto getrow;
2147- }
2148- else if (tagId==HtmlTagType::HTML_TBODY && !parser ()->context .token ->endTag ) // found <tbody> tag
2165+ if (retval.is (TokenRetval::TK_HTMLTAG))
21492166 {
2150- goto getrow;
2167+ HtmlTagType tagId1=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
2168+ if ((tagId1==HtmlTagType::HTML_TBODY) || (tagId1==HtmlTagType::HTML_THEAD) || (tagId1==HtmlTagType::HTML_TFOOT)) retval=parser ()->tokenizer .lex ();
2169+ else break ;
21512170 }
2152- else if (tagId==HtmlTagType::HTML_TFOOT && ! parser ()-> context . token -> endTag ) // found <tfoot> tag
2171+ else
21532172 {
2154- goto getrow ;
2173+ retval= parser ()-> tokenizer . lex () ;
21552174 }
2156- else if (tagId==HtmlTagType::HTML_TR && !parser ()->context .token ->endTag ) // found <tr> tag
2175+ }
2176+ // should find a html tag now
2177+ if (tok.is (TokenRetval::TK_HTMLTAG))
2178+ {
2179+ HtmlTagType tagId=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
2180+ if (tagId==HtmlTagType::HTML_TR && !parser ()->context .token ->endTag ) // found <tr> tag
21572181 {
21582182 // no caption, just rows
21592183 retval = Token::make_RetVal_TableRow ();
@@ -2202,8 +2226,20 @@ Token DocHtmlTable::parse()
22022226 {
22032227 // get next token
22042228 retval=parser ()->tokenizer .lex ();
2205- // skip whitespace after </td> or </th>
2206- while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA)) retval=parser ()->tokenizer .lex ();
2229+ // skip whitespace and tbody, thead and tfoot tags after </td> or </th>
2230+ while (retval.is_any_of (TokenRetval::TK_WHITESPACE,TokenRetval::TK_NEWPARA,TokenRetval::TK_HTMLTAG))
2231+ {
2232+ if (retval.is (TokenRetval::TK_HTMLTAG))
2233+ {
2234+ HtmlTagType tagId1=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
2235+ if ((tagId1==HtmlTagType::HTML_TBODY) || (tagId1==HtmlTagType::HTML_THEAD) || (tagId1==HtmlTagType::HTML_TFOOT)) retval=parser ()->tokenizer .lex ();
2236+ else break ;
2237+ }
2238+ else
2239+ {
2240+ retval=parser ()->tokenizer .lex ();
2241+ }
2242+ }
22072243 // printf("DocHtmlTable::retval= next=%s name=%s endTag=%d\n",retval.to_string(),qPrint(parser()->context.token->name),parser()->context.token->endTag);
22082244 HtmlTagType tagId=Mappers::htmlTagMapper->map (parser ()->context .token ->name );
22092245 if (tagId==HtmlTagType::HTML_TR && !parser ()->context .token ->endTag )
@@ -2214,18 +2250,6 @@ Token DocHtmlTable::parse()
22142250 {
22152251 retval = Token::make_RetVal_EndTable ();
22162252 }
2217- else if (tagId==HtmlTagType::HTML_TBODY && parser ()->context .token ->endTag )
2218- {
2219- // for time being ignore </t....> tag
2220- }
2221- else if (tagId==HtmlTagType::HTML_THEAD && parser ()->context .token ->endTag )
2222- {
2223- // for time being ignore </t....> tag
2224- }
2225- else if (tagId==HtmlTagType::HTML_TFOOT && parser ()->context .token ->endTag )
2226- {
2227- // for time being ignore </t....> tag
2228- }
22292253 else // found some other tag
22302254 {
22312255 warn_doc_error (parser ()->context .fileName ,parser ()->tokenizer .getLineNr ()," expected <tr> or </table> tag but "
0 commit comments