Skip to content

Commit bbe7f2c

Browse files
authored
fix: overhaul template literal parsing to rely on token positions (#186)
The previous code mostly relied on AST nodes and manual string matching, which was fragile and didn't seem to provide a clean way to allow empty string interpolations. The new approach is to walk the template literal, keeping track of the coffee-lex tokens and defining the quasi ranges in terms of those. This means that the first and last quasi no longer bleed into the quotation marks, and other small location data fixes. Empty string interpolations still aren't 100% working, but this commit gets most of the way there.
1 parent 1bc9002 commit bbe7f2c

File tree

27 files changed

+584
-415
lines changed

27 files changed

+584
-415
lines changed

src/nodes.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ export class String extends Node {
204204
end: number,
205205
raw: string,
206206
readonly quasis: Array<Quasi>,
207-
readonly expressions: Array<Node>,
207+
readonly expressions: Array<Node | null>,
208208
) {
209209
super('String', line, column, start, end, raw);
210210
}
@@ -452,7 +452,7 @@ export class Heregex extends Node {
452452
end: number,
453453
raw: string,
454454
readonly quasis: Array<Quasi>,
455-
readonly expressions: Array<Node>,
455+
readonly expressions: Array<Node | null>,
456456
readonly flags: RegexFlags,
457457
) {
458458
super('Heregex', line, column, start, end, raw);
@@ -1594,7 +1594,7 @@ export function makeRealNode(context: ParseContext, type: string, loc: LocationD
15941594
result[key] = value;
15951595
if (value && result.range) {
15961596
(Array.isArray(value) ? value : [value]).forEach(node => {
1597-
if (node.range) {
1597+
if (node && node.range) {
15981598
// Expand the range to contain all the children.
15991599
if (result.range[0] > node.range[0]) {
16001600
result.range[0] = node.range[0];
@@ -1649,3 +1649,28 @@ export default function makeNode(context: ParseContext, type: string, loc: Locat
16491649

16501650
return makeRealNode(context, type, loc, attrs);
16511651
}
1652+
1653+
// tslint:disable-next-line:no-any
1654+
export function makeNodeFromSourceRange(context: ParseContext, type: string, start: number, end: number, attrs: any = {}): Node {
1655+
// tslint:disable-next-line:no-any
1656+
let result: any = { type };
1657+
let startLoc = context.linesAndColumns.locationForIndex(start);
1658+
if (!startLoc) {
1659+
throw new Error('Unable to determine');
1660+
}
1661+
result.line = startLoc.line + 1;
1662+
result.column = startLoc.column + 1;
1663+
result.range = [start, end];
1664+
1665+
for (let key in attrs) {
1666+
if (attrs.hasOwnProperty(key)) {
1667+
let value = attrs[key];
1668+
result[key] = value;
1669+
}
1670+
}
1671+
1672+
result.start = start;
1673+
result.end = end;
1674+
result.raw = context.source.slice(result.range[0], result.range[1]);
1675+
return result;
1676+
}

src/parser.js

Lines changed: 13 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ import lex, { SourceType } from 'coffee-lex';
1212
import locationsEqual from './util/locationsEqual';
1313
import locationContainingNodes from './util/locationContainingNodes';
1414
import locationWithLastPosition from './util/locationWithLastPosition';
15-
import makeNode, { RegexFlags } from './nodes';
15+
import makeNode, { makeNodeFromSourceRange, RegexFlags } from './nodes';
1616
import mapAny from './mappers/mapAny';
1717
import mapAnyWithFallback from './mappers/mapAnyWithFallback';
18+
import getTemplateLiteralComponents from './util/getTemplateLiteralComponents';
1819
import mergeLocations from './util/mergeLocations';
1920
import parseString from './util/parseString';
2021
import rangeOfBracketTokensForIndexNode from './util/rangeOfBracketTokensForIndexNode';
@@ -336,15 +337,18 @@ function convert(context: ParseContext, map: (context: ParseContext, node: Base,
336337
return map(context, node, () => {
337338
if (isHeregexTemplateNode(node, context)) {
338339
let firstArgOp = convertOperator(node.args[0].base.body.expressions[0]);
339-
let heregexResult = createTemplateLiteral(firstArgOp, 'Heregex');
340+
let { quasis, expressions, start, end } = getTemplateLiteralComponents(context, firstArgOp);
340341
let flags;
341342
if (node.args.length > 1) {
342343
flags = parseString(node.args[1].base.value);
343344
} else {
344345
flags = '';
345346
}
346-
heregexResult.flags = RegexFlags.parse(flags);
347-
return heregexResult;
347+
return makeNodeFromSourceRange(context, 'Heregex', start, end, {
348+
quasis,
349+
expressions,
350+
flags: RegexFlags.parse(flags),
351+
})
348352
}
349353

350354
if (node.isNew) {
@@ -398,7 +402,11 @@ function convert(context: ParseContext, map: (context: ParseContext, node: Base,
398402
return map(context, node, () => {
399403
const op = convertOperator(node);
400404
if (isImplicitPlusOp(op, context) && isInterpolatedString(node, ancestors, context)) {
401-
return createTemplateLiteral(op, 'String');
405+
let { quasis, expressions, start, end } = getTemplateLiteralComponents(context, op);
406+
return makeNodeFromSourceRange(context, 'String', start, end, {
407+
quasis,
408+
expressions,
409+
})
402410
}
403411
if (isChainedComparison(node)) {
404412
let operands = unwindChainedComparison(node).map(convertChild);
@@ -830,139 +838,6 @@ function convert(context: ParseContext, map: (context: ParseContext, node: Base,
830838
}
831839
}
832840

833-
function createTemplateLiteral(op, nodeType) {
834-
let tokens = context.sourceTokens;
835-
let startTokenIndex = tokens.indexOfTokenContainingSourceIndex(op.range[0]);
836-
let interpolatedStringTokenRange = tokens.rangeOfInterpolatedStringTokensContainingTokenIndex(startTokenIndex);
837-
if (!interpolatedStringTokenRange) {
838-
throw new Error('cannot find interpolation end for node');
839-
}
840-
let firstToken = tokens.tokenAtIndex(interpolatedStringTokenRange[0]);
841-
let lastToken = tokens.tokenAtIndex(interpolatedStringTokenRange[1].previous());
842-
op.type = nodeType;
843-
op.range = [firstToken.start, lastToken.end];
844-
op.raw = source.slice(...op.range);
845-
846-
let elements = [];
847-
848-
function addElements({ left, right }) {
849-
if (isImplicitPlusOp(left, context)) {
850-
addElements(left);
851-
} else {
852-
elements.push(left);
853-
}
854-
elements.push(right);
855-
}
856-
addElements(op);
857-
858-
let quasis = [];
859-
let expressions = [];
860-
let quote = op.raw.slice(0, 3) === '"""' ? '"""' : '"';
861-
862-
function findNextToken(position, tokenType) {
863-
let tokens = context.sourceTokens;
864-
let startTokenIndex = tokens.indexOfTokenNearSourceIndex(position);
865-
let tokenIndex = tokens.indexOfTokenMatchingPredicate(
866-
token => token.type === tokenType, startTokenIndex);
867-
return tokens.tokenAtIndex(tokenIndex);
868-
}
869-
870-
function findPrevToken(position, tokenType) {
871-
let tokens = context.sourceTokens;
872-
let startTokenIndex = tokens.indexOfTokenNearSourceIndex(position);
873-
let tokenIndex = tokens.lastIndexOfTokenMatchingPredicate(
874-
token => token.type === tokenType, startTokenIndex);
875-
return tokens.tokenAtIndex(tokenIndex);
876-
}
877-
878-
function buildFirstQuasi() {
879-
// Find the start of the first interpolation, i.e. "#{a}".
880-
// ^
881-
let interpolationStart = findNextToken(op.range[0], SourceType.INTERPOLATION_START);
882-
let range = [op.range[0], interpolationStart.start];
883-
return buildQuasi(range);
884-
}
885-
886-
function buildLastQuasi() {
887-
// Find the close of the last interpolation, i.e. "a#{b}".
888-
// ^
889-
let interpolationEnd = findPrevToken(op.range[1] - 1, SourceType.INTERPOLATION_END);
890-
return buildQuasi([interpolationEnd.end, op.range[1]]);
891-
}
892-
893-
function buildQuasi(range) {
894-
let loc = linesAndColumns.locationForIndex(range[0]);
895-
return {
896-
type: 'Quasi',
897-
data: '',
898-
raw: source.slice(...range),
899-
line: loc.line + 1,
900-
column: loc.column + 1,
901-
range
902-
};
903-
}
904-
905-
function buildQuasiWithString(range, raw){
906-
let loc = linesAndColumns.locationForIndex(range[0]);
907-
return {
908-
type: 'Quasi',
909-
data: raw,
910-
raw: source.slice(...range),
911-
line: loc.line + 1,
912-
column: loc.column ,
913-
range
914-
};
915-
}
916-
917-
elements.forEach((element, i) => {
918-
if (i === 0) {
919-
if (element.type === 'String') {
920-
if (element.range[0] === op.range[0]) {
921-
// This string is not interpolated, it's part of the string interpolation.
922-
if (element.data === '' && element.raw.length > quote.length) {
923-
// CoffeeScript includes the `#` in the raw value of a leading
924-
// empty quasi string, but it shouldn't be there.
925-
element = buildFirstQuasi();
926-
}
927-
quasis.push(element);
928-
return;
929-
}
930-
}
931-
}
932-
933-
if (element.type === 'Quasi') {
934-
quasis.push(element);
935-
} else {
936-
if (quasis.length === 0) {
937-
// This element is interpolated and is first, i.e. "#{a}".
938-
quasis.push(buildFirstQuasi());
939-
expressions.push(element);
940-
} else if (/^"(.*?)"$/.test(element.data)) {
941-
quasis.push(buildQuasiWithString(element.range, element.raw));
942-
} else if (quasis.length < expressions.length + 1) {
943-
let lastInterpolationEnd = findPrevToken(element.range[0], SourceType.INTERPOLATION_END);
944-
let lastInterpolationStart = findPrevToken(element.range[0], SourceType.INTERPOLATION_START);
945-
quasis.push(buildQuasi([lastInterpolationEnd.end, lastInterpolationStart.start]));
946-
expressions.push(element);
947-
} else {
948-
expressions.push(element);
949-
}
950-
}
951-
952-
953-
});
954-
955-
if (quasis.length < expressions.length + 1) {
956-
quasis.push(buildLastQuasi());
957-
}
958-
959-
op.quasis = quasis;
960-
op.expressions = expressions;
961-
delete op.left;
962-
delete op.right;
963-
return op;
964-
}
965-
966841
/**
967842
* @param expression converted base
968843
* @param prop CS node to convertNode
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import SourceToken from 'coffee-lex/dist/SourceToken';
2+
import SourceTokenList from 'coffee-lex/dist/SourceTokenList';
3+
import SourceType from 'coffee-lex/dist/SourceType';
4+
import { Node, PlusOp, Quasi } from '../nodes';
5+
import isImplicitPlusOp from './isImplicitPlusOp';
6+
import ParseContext from './ParseContext';
7+
8+
/**
9+
* Reconstruct template literal information given the coffee-lex tokens and the
10+
* CoffeeScript AST. Since the CoffeeScript AST doesn't attempt to represent a
11+
* template literal (it's a bunch of + operations instead), the source locations
12+
* are generally unreliable and we need to rely on the token locations instead.
13+
*/
14+
export default function getTemplateLiteralComponents(context: ParseContext, node: Node) {
15+
let tokens = context.sourceTokens;
16+
17+
let quasis: Array<Node> = [];
18+
let expressions: Array<Node | null> = [];
19+
20+
let elements = getElements(node, context);
21+
let { startTokenIndex, startToken } = getStartToken(node.range[0], tokens);
22+
23+
let depth = 0;
24+
let lastToken = startToken;
25+
for (let token of tokens.slice(startTokenIndex, tokens.endIndex).toArray()) {
26+
if (token.type === SourceType.INTERPOLATION_START) {
27+
depth++;
28+
if (depth === 1) {
29+
quasis.push(findQuasi(lastToken, token, context, elements));
30+
lastToken = token;
31+
}
32+
} else if (token.type === SourceType.INTERPOLATION_END) {
33+
depth--;
34+
if (depth === 0) {
35+
expressions.push(findExpression(lastToken, token, elements));
36+
lastToken = token;
37+
}
38+
} else if (depth === 0 && isTemplateLiteralEnd(token)) {
39+
quasis.push(findQuasi(lastToken, token, context, elements));
40+
lastToken = token;
41+
break;
42+
}
43+
}
44+
return {
45+
quasis,
46+
expressions,
47+
start: startToken.start,
48+
end: lastToken.end,
49+
};
50+
}
51+
52+
function getElements(node: Node, context: ParseContext): Array<Node> {
53+
if (node.type === 'PlusOp' && isImplicitPlusOp(node as PlusOp, context)) {
54+
let { left, right } = node as PlusOp;
55+
return [...getElements(left, context), ...getElements(right, context)];
56+
}
57+
return [node];
58+
}
59+
60+
/**
61+
* Usually the start token is at the start index of the relevant AST node, but
62+
* if the start of the template literal is an interpolation, it's two before
63+
* that one, so check to see which case we are and return what we find.
64+
*/
65+
function getStartToken(start: number, tokens: SourceTokenList) {
66+
let tokenIndex = tokens.indexOfTokenNearSourceIndex(start);
67+
for (let i = 0; i < 3; i++) {
68+
let token = tokens.tokenAtIndex(tokenIndex);
69+
if (!token) {
70+
throw new Error('Expected to find a start token in a template literal.');
71+
}
72+
if (isTemplateLiteralStart(token)) {
73+
return { startToken: token, startTokenIndex: tokenIndex };
74+
}
75+
let prevToken = tokenIndex.previous();
76+
if (!prevToken) {
77+
throw new Error('Expected a previous token when searching for a template start.');
78+
}
79+
tokenIndex = prevToken;
80+
}
81+
throw new Error('Expected a template literal start token.');
82+
}
83+
84+
function findQuasi(leftToken: SourceToken, rightToken: SourceToken, context: ParseContext, elements: Array<Node>): Node {
85+
let matchingElements = elements.filter(elem =>
86+
elem.range[0] >= leftToken.start && elem.range[1] <= rightToken.end);
87+
88+
let start = leftToken.end;
89+
let end = rightToken.start;
90+
let startLoc = context.linesAndColumns.locationForIndex(leftToken.end);
91+
if (!startLoc) {
92+
throw new Error(`Expected to find a location for index ${leftToken.end}.`);
93+
}
94+
let raw = context.source.slice(start, end);
95+
96+
if (matchingElements.length === 0) {
97+
return new Quasi(startLoc.line + 1, startLoc.column + 1, start, end, raw, '');
98+
} else if (matchingElements.length === 1) {
99+
let element = matchingElements[0];
100+
if (element.type !== 'Quasi') {
101+
throw new Error('Expected matching element to be a quasi.');
102+
}
103+
return new Quasi(startLoc.line + 1, startLoc.column + 1, start, end, raw, (element as Quasi).data);
104+
} else {
105+
throw new Error('Unexpectedly found multiple elements in string interpolation.');
106+
}
107+
}
108+
109+
function findExpression(leftToken: SourceToken, rightToken: SourceToken, elements: Array<Node>): Node | null {
110+
let matchingElements = elements.filter(elem =>
111+
elem.range[0] >= leftToken.start && elem.range[1] <= rightToken.end);
112+
if (matchingElements.length === 0) {
113+
return null;
114+
} else if (matchingElements.length === 1) {
115+
return matchingElements[0];
116+
} else {
117+
throw new Error('Unexpectedly found multiple elements in string interpolation.');
118+
}
119+
}
120+
121+
function isTemplateLiteralStart(token: SourceToken): boolean {
122+
return [
123+
SourceType.DSTRING_START,
124+
SourceType.SSTRING_START,
125+
SourceType.TDSTRING_START,
126+
SourceType.TSSTRING_START,
127+
SourceType.HEREGEXP_START,
128+
].indexOf(token.type) >= 0;
129+
}
130+
131+
function isTemplateLiteralEnd(token: SourceToken): boolean {
132+
return [
133+
SourceType.DSTRING_END,
134+
SourceType.SSTRING_END,
135+
SourceType.TDSTRING_END,
136+
SourceType.TSSTRING_END,
137+
SourceType.HEREGEXP_END,
138+
].indexOf(token.type) >= 0;
139+
}

src/util/isImplicitPlusOp.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export type Op = {
1414
/**
1515
* Determine if the operator is a fake + operator for string interpolation.
1616
*/
17-
export default function isImplicitPlusOp(op: Op, context: ParseContext) {
17+
export default function isImplicitPlusOp(op: Op, context: ParseContext): boolean {
1818
if (op.type !== 'PlusOp') {
1919
return false;
2020
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
///a#{}b///

0 commit comments

Comments
 (0)