Skip to content

Commit f866627

Browse files
committed
bring parsing and prettying in-house
1 parent bb1a6bd commit f866627

File tree

1 file changed

+251
-79
lines changed

1 file changed

+251
-79
lines changed
Lines changed: 251 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,273 @@
1-
import type { Options, Plugin } from 'prettier';
2-
import type { builders } from 'prettier/doc';
3-
import * as html from 'prettier/plugins/html';
4-
import { format } from 'prettier/standalone';
5-
6-
interface HtmlNode {
7-
type: 'element' | 'text' | 'ieConditionalComment';
8-
name?: string;
9-
sourceSpan: {
10-
start: { file: unknown[]; offset: number; line: number; col: number };
11-
end: { file: unknown[]; offset: number; line: number; col: number };
12-
details: null;
13-
};
14-
parent?: HtmlNode;
1+
interface HtmlTagProperty {
2+
name: string;
3+
value: string;
154
}
165

17-
function recursivelyMapDoc(
18-
doc: builders.Doc,
19-
callback: (innerDoc: string | builders.DocCommand) => builders.Doc,
20-
): builders.Doc {
21-
if (Array.isArray(doc)) {
22-
return doc.map((innerDoc) => recursivelyMapDoc(innerDoc, callback));
23-
}
6+
interface HtmlTag {
7+
type: 'tag';
8+
name: string;
9+
/**
10+
* Whether the html tag is self-closing, or a void element in spec nomenclature.
11+
*/
12+
void: boolean;
13+
properties: HtmlTagProperty[];
14+
children: HtmlNode[];
15+
}
2416

25-
if (typeof doc === 'object') {
26-
if (doc.type === 'group') {
27-
return {
28-
...doc,
29-
contents: recursivelyMapDoc(doc.contents, callback),
30-
expandedStates: recursivelyMapDoc(
31-
doc.expandedStates,
32-
callback,
33-
) as builders.Doc[],
34-
};
35-
}
17+
/**
18+
* Something like the DOCTYPE for the document, or comments.
19+
*/
20+
interface HtmlDeclaration {
21+
type: 'declaration';
22+
content: string;
23+
}
24+
25+
interface HtmlText {
26+
type: 'text';
27+
content: string;
28+
}
29+
30+
type HtmlNode = HtmlTag | HtmlDeclaration | HtmlText;
31+
32+
export const lenientParse = (html: string): HtmlNode[] => {
33+
const result: HtmlNode[] = [];
34+
35+
const stack: HtmlTag[] = []; // Stack to keep track of parent tags
36+
let index = 0; // Current parsing index
37+
while (index < html.length) {
38+
const currentParent = stack.length > 0 ? stack[stack.length - 1] : null;
39+
const addToTree = (node: HtmlNode) => {
40+
if (currentParent) {
41+
currentParent.children.push(node);
42+
} else {
43+
result.push(node);
44+
}
45+
};
46+
47+
const htmlObjectStart = html.indexOf('<', index);
48+
if (htmlObjectStart === -1) {
49+
if (index < html.length) {
50+
const content = html.slice(index);
51+
addToTree({ type: 'text', content });
52+
}
3653

37-
if ('contents' in doc) {
38-
return {
39-
...doc,
40-
contents: recursivelyMapDoc(doc.contents, callback),
41-
};
54+
break;
4255
}
56+
if (htmlObjectStart > index) {
57+
const content = html.slice(index, htmlObjectStart);
58+
addToTree({ type: 'text', content });
59+
index = htmlObjectStart;
60+
}
61+
62+
if (html.startsWith('<!', index)) {
63+
// an HTML declaration, i.e. a comment or a DOCTYPE
64+
const declEnd = html.indexOf('>', index + 2);
65+
if (declEnd === -1) {
66+
// Assumes the rest of the document is part of this declaration
67+
const content = html.slice(index);
68+
addToTree({ type: 'declaration', content });
69+
break;
70+
}
4371

44-
if ('parts' in doc) {
45-
return {
46-
...doc,
47-
parts: recursivelyMapDoc(doc.parts, callback) as builders.Doc[],
48-
};
72+
const content = html.substring(index, declEnd + 1);
73+
addToTree({ type: 'declaration', content });
74+
index = declEnd + 1;
75+
continue;
4976
}
5077

51-
if (doc.type === 'if-break') {
52-
return {
53-
...doc,
54-
breakContents: recursivelyMapDoc(doc.breakContents, callback),
55-
flatContents: recursivelyMapDoc(doc.flatContents, callback),
56-
};
78+
if (html.startsWith('</', index)) {
79+
const bracketEnd = html.indexOf('>', index + 2);
80+
const tagName = html.slice(index + 2, bracketEnd);
81+
82+
if (stack.length > 0 && stack[stack.length - 1].name === tagName) {
83+
stack.pop();
84+
} else {
85+
// Mismatched closing tag. In a simple lenient parser, we might just ignore it
86+
// or log a warning. For now, it's effectively ignored if no match on stack top.
87+
}
88+
index += 3 + tagName.length;
89+
continue;
5790
}
58-
}
5991

60-
return callback(doc);
61-
}
92+
const tag: HtmlTag = {
93+
type: 'tag',
94+
name: '',
95+
void: false,
96+
properties: [],
97+
children: [],
98+
};
6299

63-
const modifiedHtml = { ...html } as Plugin;
64-
if (modifiedHtml.printers) {
65-
// eslint-disable-next-line @typescript-eslint/unbound-method
66-
const previousPrint = modifiedHtml.printers.html.print;
67-
modifiedHtml.printers.html.print = (path, options, print, args) => {
68-
const node = path.getNode() as HtmlNode;
100+
index++;
101+
while (!html.startsWith('>', index) && !html.startsWith('/>', index)) {
102+
const character = html[index];
103+
if (character !== ' ' && tag.name.length === 0) {
104+
const tagNameEndIndex = Math.min(
105+
html.indexOf(' ', index),
106+
html.indexOf('>', index),
107+
);
108+
tag.name = html.slice(index, tagNameEndIndex);
109+
index = tagNameEndIndex;
110+
continue;
111+
}
69112

70-
const rawPrintingResult = previousPrint(path, options, print, args);
113+
if (character !== ' ') {
114+
const propertyName = html.slice(index, html.indexOf('=', index));
115+
index = html.indexOf('=', index) + 1;
71116

72-
if (node.type === 'ieConditionalComment') {
73-
const printingResult = recursivelyMapDoc(rawPrintingResult, (doc) => {
74-
if (typeof doc === 'object' && doc.type === 'line') {
75-
return doc.soft ? '' : ' ';
76-
}
117+
index = html.indexOf('"', index);
118+
const propertyValue = html.slice(
119+
index,
120+
html.indexOf('"', index + 1) + 1,
121+
);
122+
index = html.indexOf('"', index + 1) + 1;
77123

78-
return doc;
79-
});
124+
tag.properties.push({
125+
name: propertyName,
126+
value: propertyValue,
127+
});
128+
continue;
129+
}
80130

81-
return printingResult;
131+
index++;
132+
}
133+
if (html.startsWith('/>', index)) {
134+
index++;
135+
tag.void = true;
82136
}
137+
if (html.startsWith('>', index)) {
138+
addToTree(tag);
139+
if (!tag.void) {
140+
stack.push(tag);
141+
}
142+
index++;
143+
}
144+
}
83145

84-
return rawPrintingResult;
85-
};
146+
return result;
147+
};
148+
149+
interface Options {
150+
/**
151+
* Disables the word wrapping we do to ensure the maximum line length is kept.
152+
*
153+
* @default false
154+
*/
155+
preserveLinebreaks?: boolean;
156+
/**
157+
* The maximum line length before wrapping some piece of the document.
158+
*
159+
* @default 80
160+
*/
161+
maxLineLength?: number;
162+
163+
lineBreak: '\n' | '\r\n';
86164
}
87165

88-
const defaults: Options = {
89-
endOfLine: 'lf',
90-
tabWidth: 2,
91-
plugins: [modifiedHtml],
92-
bracketSameLine: true,
93-
parser: 'html',
166+
export const getIndentationOfLine = (line: string) => {
167+
const match = line.match(/^\s+/);
168+
if (match === null) return '';
169+
return match[0];
170+
};
171+
172+
export const pretty = (html: string, options: Options) => {
173+
const nodes = lenientParse(html);
174+
175+
return prettyNodes(nodes, options);
176+
};
177+
178+
export const wrapText = (
179+
text: string,
180+
linePrefix: string,
181+
maxLineLength: number,
182+
lineBreak: string,
183+
): string => {
184+
let wrappedText = linePrefix + text;
185+
let nextLineStartIndex = 0;
186+
while (wrappedText.length - nextLineStartIndex > maxLineLength) {
187+
const overflowingCharacterIndex = Math.min(
188+
nextLineStartIndex + maxLineLength - 1,
189+
wrappedText.length,
190+
);
191+
for (let i = overflowingCharacterIndex; i >= nextLineStartIndex; i--) {
192+
const char = wrappedText[i];
193+
if (char === ' ') {
194+
wrappedText =
195+
wrappedText.slice(0, i) +
196+
lineBreak +
197+
linePrefix +
198+
wrappedText.slice(i + 1);
199+
nextLineStartIndex = lineBreak.length + linePrefix.length + i;
200+
break;
201+
}
202+
if (i === nextLineStartIndex) {
203+
const nextSpaceIndex = wrappedText.indexOf(' ', nextLineStartIndex);
204+
wrappedText =
205+
wrappedText.slice(0, nextSpaceIndex) +
206+
lineBreak +
207+
linePrefix +
208+
wrappedText.slice(nextSpaceIndex + 1);
209+
nextLineStartIndex =
210+
lineBreak.length + linePrefix.length + nextSpaceIndex;
211+
}
212+
}
213+
}
214+
return wrappedText;
94215
};
95216

96-
export const pretty = (str: string, options: Options = {}) => {
97-
return format(str.replaceAll('\0', ''), {
98-
...defaults,
99-
...options,
100-
});
217+
const prettyNodes = (
218+
nodes: HtmlNode[],
219+
options: Options,
220+
currentIndentationSize = 0,
221+
) => {
222+
const { preserveLinebreaks = false, maxLineLength = 80, lineBreak } = options;
223+
const indentation = ' '.repeat(currentIndentationSize);
224+
225+
let formatted = '';
226+
for (const node of nodes) {
227+
if (node.type === 'text') {
228+
if (preserveLinebreaks) {
229+
formatted += node.content;
230+
} else {
231+
const rawText = node.content.replaceAll(/(\r|\n|\r\n)\s*/g, '');
232+
formatted += wrapText(
233+
rawText,
234+
indentation,
235+
maxLineLength - currentIndentationSize,
236+
lineBreak,
237+
);
238+
}
239+
} else if (node.type === 'tag') {
240+
const propertiesRawString = node.properties
241+
.map((property) => ` ${property.name}=${property.value}`)
242+
.join('');
243+
244+
const rawTagStart = `${indentation}<${node.name}${propertiesRawString}${node.void ? '/' : ''}>`;
245+
if (rawTagStart.length > maxLineLength) {
246+
let tagStart = `${indentation}<${node.name}`;
247+
for (const property of node.properties) {
248+
tagStart += `${indentation} ${property.name}=${property.value}${lineBreak}`;
249+
}
250+
tagStart += `${indentation}${node.void ? '/' : ''}>`;
251+
formatted += tagStart;
252+
} else {
253+
formatted += `${rawTagStart}`;
254+
}
255+
256+
if (!node.void) {
257+
if (node.children.length > 0) {
258+
formatted += `${lineBreak}${prettyNodes(
259+
node.children,
260+
options,
261+
currentIndentationSize + 2,
262+
)}`;
263+
formatted += `${lineBreak}${indentation}`;
264+
}
265+
266+
formatted += `</${node.name}>${lineBreak}`;
267+
}
268+
} else if (node.type === 'declaration') {
269+
formatted = `${indentation}${node.content}${lineBreak}`;
270+
}
271+
}
272+
return formatted;
101273
};

0 commit comments

Comments
 (0)