Skip to content

Commit da3c12c

Browse files
author
Fedorov Alexandr
committed
Add new parsing logic to css selector parser and enhance tokenizer workflow
DEVSIX-8925
1 parent 8c8a996 commit da3c12c

File tree

13 files changed

+483
-14
lines changed

13 files changed

+483
-14
lines changed

styled-xml-parser/src/main/java/com/itextpdf/styledxmlparser/css/parse/CssDeclarationValueTokenizer.java

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ protected Token getNextToken() {
155155
--functionDepth;
156156
buff.append(curChar);
157157
if (functionDepth == 0) {
158-
return new Token(buff.toString(), TokenType.FUNCTION);
158+
return new Token(buff.toString(), TokenType.FUNCTION, (char) 0, isSpaceNext());
159159
}
160160
} else if (curChar == '"' || curChar == '\'') {
161161
stringQuote = curChar;
@@ -168,7 +168,7 @@ protected Token getNextToken() {
168168
} else if (curChar == ']') {
169169
inString = false;
170170
buff.append(curChar);
171-
return new Token(buff.toString(), TokenType.STRING, stringQuote);
171+
return new Token(buff.toString(), TokenType.STRING, (char) 0, isSpaceNext());
172172
} else if (curChar == ',' && !inString && functionDepth == 0) {
173173
if (buff.length() == 0) {
174174
return new Token(",", TokenType.COMMA);
@@ -181,7 +181,7 @@ protected Token getNextToken() {
181181
buff.append(curChar);
182182
}
183183
if (!inString) {
184-
return new Token(buff.toString(), functionDepth > 0 ? TokenType.FUNCTION : TokenType.UNKNOWN);
184+
return new Token(buff.toString(), functionDepth > 0 ? TokenType.FUNCTION : TokenType.UNKNOWN, (char) 0, true);
185185
}
186186
} else {
187187
buff.append(curChar);
@@ -191,6 +191,10 @@ protected Token getNextToken() {
191191
return new Token(buff.toString(), TokenType.FUNCTION);
192192
}
193193

194+
private boolean isSpaceNext(){
195+
return src.length() - 1 > index && src.charAt(index + 1) == ' ';
196+
}
197+
194198
/**
195199
* Processes a function token.
196200
*
@@ -199,11 +203,11 @@ protected Token getNextToken() {
199203
*/
200204
private void processFunctionToken(Token token, StringBuilder functionBuffer) {
201205
if (token.isString()) {
202-
if (stringQuote != 0) {
206+
if (stringQuote != 0 && token.getStringQuote() != 0 ) {
203207
functionBuffer.append(stringQuote);
204208
}
205209
functionBuffer.append(token.getValue());
206-
if (stringQuote != 0) {
210+
if (stringQuote != 0 && token.getStringQuote() != 0) {
207211
functionBuffer.append(stringQuote);
208212
}
209213
} else {
@@ -234,20 +238,27 @@ public static class Token {
234238

235239
private final char stringQuote;
236240

241+
private final boolean hasSpace;
242+
237243
/**
238244
* Creates a new {@link Token} instance.
239245
*
240246
* @param value the value
241247
* @param type the type
242248
*/
243249
public Token(String value, TokenType type) {
244-
this(value, type, (char) 0);
250+
this(value, type, (char) 0, false);
245251
}
246252

247253
Token(String value, TokenType type, char stringQuote) {
254+
this(value, type, stringQuote, false);
255+
}
256+
257+
Token(String value, TokenType type, char stringQuote, boolean hasSpace) {
248258
this.value = value;
249259
this.type = type;
250260
this.stringQuote = stringQuote;
261+
this.hasSpace = hasSpace;
251262
}
252263

253264
/**
@@ -277,6 +288,16 @@ public char getStringQuote() {
277288
return stringQuote;
278289
}
279290

291+
292+
/**
293+
* Gets the flag if token contains whitespace.
294+
*
295+
* @return true, if containing whitespace
296+
*/
297+
public boolean hasSpace() {
298+
return hasSpace;
299+
}
300+
280301
/**
281302
* Checks if the token is a string.
282303
*

styled-xml-parser/src/main/java/com/itextpdf/styledxmlparser/css/parse/CssRuleSetParser.java

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,17 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.styledxmlparser.css.parse;
2424

2525
import com.itextpdf.commons.utils.MessageFormatUtil;
26-
import com.itextpdf.styledxmlparser.logs.StyledXmlParserLogMessageConstant;
2726
import com.itextpdf.styledxmlparser.css.CssDeclaration;
2827
import com.itextpdf.styledxmlparser.css.CssRuleSet;
28+
import com.itextpdf.styledxmlparser.css.parse.CssDeclarationValueTokenizer.Token;
2929
import com.itextpdf.styledxmlparser.css.selector.CssSelector;
3030
import com.itextpdf.styledxmlparser.css.util.CssUtils;
31-
32-
import org.slf4j.Logger;
33-
import org.slf4j.LoggerFactory;
31+
import com.itextpdf.styledxmlparser.logs.StyledXmlParserLogMessageConstant;
3432

3533
import java.util.ArrayList;
3634
import java.util.List;
35+
import org.slf4j.Logger;
36+
import org.slf4j.LoggerFactory;
3737

3838
/**
3939
* Utilities class to parse CSS rule sets.
@@ -102,15 +102,19 @@ public static List<CssRuleSet> parseRuleSet(String selectorStr, String propertie
102102
List<CssRuleSet> ruleSets = new ArrayList<>();
103103

104104
//check for rules like p, {…}
105-
String[] selectors = selectorStr.split(",");
105+
106+
String[] selectors = splitByTokens(selectorStr);
107+
106108
for (int i = 0; i < selectors.length; i++) {
107109
selectors[i] = CssUtils.removeDoubleSpacesAndTrim(selectors[i]);
108-
if (selectors[i].length() == 0)
110+
if (selectors[i].isEmpty()) {
109111
return ruleSets;
112+
}
110113
}
114+
111115
for (String currentSelectorStr : selectors) {
112116
try {
113-
ruleSets.add(new CssRuleSet( new CssSelector(currentSelectorStr), declarations));
117+
ruleSets.add(new CssRuleSet(new CssSelector(currentSelectorStr), declarations));
114118
} catch (Exception exc) {
115119
logger.error(MessageFormatUtil.format(StyledXmlParserLogMessageConstant.ERROR_PARSING_CSS_SELECTOR,
116120
currentSelectorStr), exc);
@@ -124,6 +128,40 @@ public static List<CssRuleSet> parseRuleSet(String selectorStr, String propertie
124128
return ruleSets;
125129
}
126130

131+
static String[] splitByTokens(String selectorGroup) {
132+
List<String> selectors = new ArrayList<>();
133+
StringBuilder currentSelector = new StringBuilder();
134+
135+
CssDeclarationValueTokenizer cssDeclarationValueTokenizer = new CssDeclarationValueTokenizer(selectorGroup);
136+
137+
Token nextValidToken = cssDeclarationValueTokenizer.getNextValidToken();
138+
while (nextValidToken != null) {
139+
if (nextValidToken.getValue().equals(",")) {
140+
selectors.add(currentSelector.toString());
141+
currentSelector.setLength(0);
142+
} else {
143+
if (nextValidToken.isString() && nextValidToken.getStringQuote() != 0) {
144+
currentSelector
145+
.append(nextValidToken.getStringQuote())
146+
.append(nextValidToken.getValue())
147+
.append(nextValidToken.getStringQuote());
148+
} else {
149+
currentSelector.append(nextValidToken.getValue());
150+
if (nextValidToken.hasSpace()) {
151+
currentSelector.append(' ');
152+
}
153+
}
154+
}
155+
nextValidToken = cssDeclarationValueTokenizer.getNextValidToken();
156+
}
157+
158+
159+
selectors.add(currentSelector.toString());
160+
161+
162+
return selectors.toArray(new String[0]);
163+
}
164+
127165
/**
128166
* Splits CSS properties into an array of {@link String} values.
129167
*

styled-xml-parser/src/main/java/com/itextpdf/styledxmlparser/css/selector/CssSelector.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,25 @@ public boolean matches(INode element) {
6363
return matches(element, selectorItems.size() - 1);
6464
}
6565

66+
/**
67+
* Checks if the node not matches all the selectors.
68+
*
69+
* @param element the node
70+
* @return true, if node doesn't match all the selectors
71+
*/
72+
public boolean notMatches(INode element) {
73+
int counter = 0;
74+
while (counter != selectorItems.size()) {
75+
boolean matches = matches(element, selectorItems.size() - counter - 1);
76+
if (matches) {
77+
return false;
78+
} else {
79+
counter++;
80+
}
81+
}
82+
return true;
83+
}
84+
6685
/**
6786
* Checks if a node matches the selector.
6887
*

styled-xml-parser/src/main/java/com/itextpdf/styledxmlparser/css/selector/item/CssPseudoClassNotSelectorItem.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ This file is part of the iText (R) project.
2424

2525
import com.itextpdf.styledxmlparser.css.CommonCssConstants;
2626
import com.itextpdf.styledxmlparser.css.parse.CssSelectorParser;
27+
import com.itextpdf.styledxmlparser.css.selector.CssSelector;
2728
import com.itextpdf.styledxmlparser.css.selector.ICssSelector;
2829
import com.itextpdf.styledxmlparser.node.ICustomElementNode;
2930
import com.itextpdf.styledxmlparser.node.IDocumentNode;
@@ -44,11 +45,16 @@ public List<ICssSelectorItem> getArgumentsSelector() {
4445
return CssSelectorParser.parseSelectorItems(arguments);
4546
}
4647

48+
// TODO DEVSIX-9069 Add notMatches to ICssSelector interface
4749
@Override
4850
public boolean matches(INode node) {
4951
if (!(node instanceof IElementNode) || node instanceof ICustomElementNode || node instanceof IDocumentNode) {
5052
return false;
5153
}
54+
if (argumentsSelector instanceof CssSelector){
55+
return ((CssSelector) argumentsSelector).notMatches(node);
56+
}
57+
5258
return !argumentsSelector.matches(node);
5359
}
5460
}

0 commit comments

Comments
 (0)