Skip to content

Commit 94e0828

Browse files
authored
Merge pull request github#12793 from aibaars/js-yaml-extractor
JavaScript: switch to shared YamlPopulator
2 parents aa8291e + dcca0e0 commit 94e0828

File tree

1 file changed

+7
-263
lines changed

1 file changed

+7
-263
lines changed
Lines changed: 7 additions & 263 deletions
Original file line numberDiff line numberDiff line change
@@ -1,284 +1,28 @@
11
package com.semmle.js.extractor;
22

3-
import com.semmle.util.data.StringUtil;
4-
import com.semmle.util.exception.CatastrophicError;
5-
import com.semmle.util.exception.UserError;
6-
import com.semmle.util.locations.LineTable;
7-
import com.semmle.util.trap.TrapWriter;
8-
import com.semmle.util.trap.TrapWriter.Label;
9-
import com.semmle.util.trap.TrapWriter.Table;
10-
113
import java.util.Collections;
124

13-
import org.yaml.snakeyaml.composer.Composer;
14-
import org.yaml.snakeyaml.error.Mark;
15-
import org.yaml.snakeyaml.error.MarkedYAMLException;
16-
import org.yaml.snakeyaml.events.AliasEvent;
17-
import org.yaml.snakeyaml.events.Event;
18-
import org.yaml.snakeyaml.events.MappingStartEvent;
19-
import org.yaml.snakeyaml.events.NodeEvent;
20-
import org.yaml.snakeyaml.events.ScalarEvent;
21-
import org.yaml.snakeyaml.events.SequenceStartEvent;
22-
import org.yaml.snakeyaml.nodes.NodeId;
23-
import org.yaml.snakeyaml.parser.Parser;
24-
import org.yaml.snakeyaml.parser.ParserImpl;
25-
import org.yaml.snakeyaml.reader.ReaderException;
26-
import org.yaml.snakeyaml.reader.StreamReader;
27-
import org.yaml.snakeyaml.resolver.Resolver;
5+
import com.semmle.extractor.yaml.YamlPopulator;
286

297
/**
308
* Extractor for populating YAML files.
319
*
32-
* <p>The extractor uses <a href="http://www.snakeyaml.org/">SnakeYAML</a> to parse YAML.
10+
* <p>
11+
* The extractor uses <a href="http://www.snakeyaml.org/">SnakeYAML</a> to parse
12+
* YAML.
3313
*/
3414
public class YAMLExtractor implements IExtractor {
35-
/** The tables constituting the YAML dbscheme. */
36-
private static enum YAMLTables implements Table {
37-
YAML(6), // yaml (id: @yaml_node, kind: int ref, parent: @yaml_node_parent ref,
38-
// idx: int ref, tag: string ref, tostring: string ref)
39-
YAML_ANCHORS(2), // yaml_anchors (node: @yaml_node ref, anchor: string ref)
40-
YAML_ALIASES(2), // yaml_aliases (alias: @yaml_alias_node ref, target: string ref)
41-
YAML_SCALARS(
42-
3), // yaml_scalars (scalar: @yaml_scalar_node ref, style: int ref, value: string ref)
43-
YAML_ERRORS(2); // yaml_errors (id: @yaml_error, message: string ref)
44-
45-
private final int arity;
46-
47-
private YAMLTables(int arity) {
48-
this.arity = arity;
49-
}
50-
51-
@Override
52-
public String getName() {
53-
return StringUtil.lc(name());
54-
}
55-
56-
@Override
57-
public int getArity() {
58-
return arity;
59-
}
60-
61-
@Override
62-
public boolean validate(Object... values) {
63-
return true;
64-
}
65-
}
66-
67-
/*
68-
* case @yaml_node.kind of
69-
* 0 = @yaml_scalar_node
70-
* | 1 = @yaml_mapping_node
71-
* | 2 = @yaml_sequence_node
72-
* | 3 = @yaml_alias_node
73-
*/
74-
private static enum NodeKind {
75-
SCALAR,
76-
MAPPING,
77-
SEQUENCE,
78-
ALIAS
79-
};
80-
8115
private final boolean tolerateParseErrors;
8216

83-
private TextualExtractor textualExtractor;
84-
private LocationManager locationManager;
85-
private TrapWriter trapWriter;
86-
private LineTable lineTable;
87-
88-
/**
89-
* The underlying SnakeYAML parser; we use the relatively low-level {@linkplain Parser} instead of
90-
* the more high-level {@linkplain Composer}, since our dbscheme represents YAML documents in AST
91-
* form, with aliases left unresolved.
92-
*/
93-
private Parser parser;
94-
95-
/** The resolver used for resolving type tags. */
96-
private Resolver resolver;
97-
9817
public YAMLExtractor(ExtractorConfig config) {
9918
this.tolerateParseErrors = config.isTolerateParseErrors();
10019
}
10120

102-
private LineTable getLineTable() {
103-
if (lineTable == null) {
104-
lineTable = new LineTable(this.textualExtractor.getSource());
105-
}
106-
return lineTable;
107-
}
108-
10921
@Override
11022
public ParseResultInfo extract(TextualExtractor textualExtractor) {
111-
this.textualExtractor = textualExtractor;
112-
locationManager = textualExtractor.getLocationManager();
113-
trapWriter = textualExtractor.getTrapwriter();
114-
115-
Label fileLabel = locationManager.getFileLabel();
116-
locationManager.setHasLocationTable("yaml_locations");
117-
try {
118-
parser = new ParserImpl(new StreamReader(textualExtractor.getSource()));
119-
resolver = new Resolver();
120-
int idx = 0;
121-
while (!atStreamEnd())
122-
extractDocument(fileLabel, idx++, textualExtractor.getSource().codePoints().toArray());
123-
} catch (MarkedYAMLException e) {
124-
int line = e.getProblemMark().getLine() + 1;
125-
int column = e.getProblemMark().getColumn() + 1;
126-
if (!this.tolerateParseErrors)
127-
throw new UserError(e.getProblem() + ": " + line + ":" + column);
128-
Label lbl = trapWriter.freshLabel();
129-
trapWriter.addTuple(YAMLTables.YAML_ERRORS, lbl, e.getProblem());
130-
locationManager.emitSnippetLocation(lbl, line, column, line, column);
131-
} catch (ReaderException e) {
132-
if (!this.tolerateParseErrors) throw new UserError(e.toString());
133-
int c = e.getCodePoint();
134-
String s = String.valueOf(Character.toChars(c));
135-
trapWriter.addTuple(
136-
YAMLTables.YAML_ERRORS,
137-
trapWriter.freshLabel(),
138-
"Unexpected character " + s + "(" + c + ")");
139-
// unfortunately, SnakeYAML does not provide structured location information for
140-
// ReaderExceptions
141-
}
142-
23+
new YamlPopulator(textualExtractor.getExtractedFile(), textualExtractor.getSource(),
24+
textualExtractor.getTrapwriter(),
25+
this.tolerateParseErrors).extract();
14326
return new ParseResultInfo(0, 0, Collections.emptyList());
14427
}
145-
146-
/** Check whether the parser has encountered the end of the YAML input stream. */
147-
private boolean atStreamEnd() {
148-
if (parser.checkEvent(Event.ID.StreamStart)) parser.getEvent();
149-
return parser.checkEvent(Event.ID.StreamEnd);
150-
}
151-
152-
/** Extract a complete YAML document; cf. {@link Composer#getNode}. */
153-
private void extractDocument(Label parent, int idx, int[] codepoints) {
154-
// Drop the DOCUMENT-START event
155-
parser.getEvent();
156-
extractNode(parent, idx, codepoints);
157-
// Drop the DOCUMENT-END event
158-
parser.getEvent();
159-
}
160-
161-
/** Extract a single YAML node; cf. {@link Composer#composeNode}. */
162-
private void extractNode(Label parent, int idx, int[] codepoints) {
163-
Label label = trapWriter.freshLabel();
164-
NodeKind kind;
165-
String tag = "";
166-
Event start = parser.getEvent(), end = start;
167-
168-
if (start.is(Event.ID.Alias)) {
169-
kind = NodeKind.ALIAS;
170-
trapWriter.addTuple(YAMLTables.YAML_ALIASES, label, ((AliasEvent) start).getAnchor());
171-
} else {
172-
String anchor = start instanceof NodeEvent ? ((NodeEvent) start).getAnchor() : null;
173-
if (anchor != null) trapWriter.addTuple(YAMLTables.YAML_ANCHORS, label, anchor);
174-
175-
if (start.is(Event.ID.Scalar)) {
176-
kind = NodeKind.SCALAR;
177-
ScalarEvent scalar = (ScalarEvent) start;
178-
tag =
179-
getTag(
180-
scalar.getTag(),
181-
NodeId.scalar,
182-
scalar.getValue(),
183-
scalar.getImplicit().canOmitTagInPlainScalar());
184-
Character style = scalar.getStyle();
185-
int styleCode = style == null ? 0 : (int) style;
186-
trapWriter.addTuple(YAMLTables.YAML_SCALARS, label, styleCode, scalar.getValue());
187-
} else if (start.is(Event.ID.SequenceStart)) {
188-
kind = NodeKind.SEQUENCE;
189-
SequenceStartEvent sequenceStart = (SequenceStartEvent) start;
190-
tag = getTag(sequenceStart.getTag(), NodeId.sequence, null, sequenceStart.getImplicit());
191-
192-
int childIdx = 0;
193-
while (!parser.checkEvent(Event.ID.SequenceEnd)) extractNode(label, childIdx++, codepoints);
194-
195-
end = parser.getEvent();
196-
} else if (start.is(Event.ID.MappingStart)) {
197-
kind = NodeKind.MAPPING;
198-
MappingStartEvent mappingStart = (MappingStartEvent) start;
199-
tag = getTag(mappingStart.getTag(), NodeId.mapping, null, mappingStart.getImplicit());
200-
201-
int childIdx = 1;
202-
while (!parser.checkEvent(Event.ID.MappingEnd)) {
203-
extractNode(label, childIdx, codepoints);
204-
extractNode(label, -childIdx, codepoints);
205-
++childIdx;
206-
}
207-
208-
end = parser.getEvent();
209-
} else {
210-
throw new CatastrophicError("Unexpected YAML parser event: " + start);
211-
}
212-
}
213-
214-
trapWriter.addTuple(
215-
YAMLTables.YAML,
216-
label,
217-
kind.ordinal(),
218-
parent,
219-
idx,
220-
tag,
221-
mkToString(start.getStartMark(), end.getEndMark(), codepoints));
222-
extractLocation(label, start.getStartMark(), end.getEndMark());
223-
}
224-
225-
/** Determine the type tag of a node. */
226-
private String getTag(String explicitTag, NodeId kind, String value, boolean implicit) {
227-
if (explicitTag == null || "!".equals(explicitTag))
228-
return resolver.resolve(kind, value, implicit).getValue();
229-
return explicitTag;
230-
}
231-
232-
private static boolean isNewLine(int codePoint) {
233-
switch (codePoint) {
234-
case '\n':
235-
case '\r':
236-
case '\u0085':
237-
case '\u2028':
238-
case '\u2029':
239-
return true;
240-
default:
241-
return false;
242-
}
243-
}
244-
245-
/**
246-
* SnakeYAML doesn't directly expose the source text of nodes, but we also take the file contents
247-
* as an array of Unicode code points. The start and end marks each contain an index into the code
248-
* point stream (the end is exclusive), so we can reconstruct the snippet. For readability, we
249-
* stop at the first encountered newline.
250-
*/
251-
private static String mkToString(Mark startMark, Mark endMark, int[] codepoints) {
252-
StringBuilder b = new StringBuilder();
253-
for (int i = startMark.getIndex(); i < endMark.getIndex() && !isNewLine(codepoints[i]); i++)
254-
b.appendCodePoint(codepoints[i]);
255-
return TextualExtractor.sanitiseToString(b.toString());
256-
}
257-
258-
/** Emit a source location for a YAML node. */
259-
private void extractLocation(Label label, Mark startMark, Mark endMark) {
260-
int startLine, startColumn, endLine, endColumn;
261-
262-
// SnakeYAML uses 0-based indexing for both lines and columns, so need to +1
263-
startLine = startMark.getLine() + 1;
264-
startColumn = startMark.getColumn() + 1;
265-
266-
// SnakeYAML's end positions are exclusive, so only need to +1 for the line
267-
endLine = endMark.getLine() + 1;
268-
endColumn = endMark.getColumn();
269-
270-
// Avoid emitting column zero for non-empty locations
271-
if (endColumn == 0 && !(startLine == endLine && startColumn == endColumn)) {
272-
String source = textualExtractor.getSource();
273-
int offset = getLineTable().getOffsetFromPoint(endMark.getLine(), endMark.getColumn()) - 1;
274-
while (offset > 0 && isNewLine((int)source.charAt(offset))) {
275-
--offset;
276-
}
277-
com.semmle.util.locations.Position adjustedEndPos = getLineTable().getEndPositionFromOffset(offset);
278-
endLine = adjustedEndPos.getLine();
279-
endColumn = adjustedEndPos.getColumn();
280-
}
281-
282-
locationManager.emitSnippetLocation(label, startLine, startColumn, endLine, endColumn);
283-
}
28428
}

0 commit comments

Comments
 (0)