diff --git a/package.json b/package.json index 9a4964a..a3afd20 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,7 @@ "@codemirror/theme-one-dark": "^6.0.0", "@codemirror/view": "6.0.0", "@lezer/generator": "^1.0.0", + "@lezer/lr": "^1.2.3", "codemirror": "^6.0.0", "vite": "^2.3.8" }, diff --git a/test/expression.txt b/test/expression.txt new file mode 100644 index 0000000..b774e51 --- /dev/null +++ b/test/expression.txt @@ -0,0 +1,1208 @@ +# int +1 +==> +Program ( + Integer +) + +# float +1.2 +==> +Program ( + Float +) + +# float only right of comma +.1 +==> +Program ( + Float +) + +# float exponent +1.2e3 +==> +Program ( + Float +) + +# float Exponent +1.2E3 +==> +Program ( + Float +) + +# add +1+2 +==> +Program ( + BinaryExpr ( + Integer + Integer + ) +) + +# add with whitespace +1 + 2 +==> +Program ( + BinaryExpr ( + Integer + Integer + ) +) + +# line comment +# hello +1 +==> +Program ( + LineComment Integer +) + +# line comment precedence +#/* +1 +#*/ +==> +Program ( + LineComment Integer LineComment +) + +# block comment +/* hello */1 +==> +Program ( + BlockComment Integer +) + +# block comment precedence +/* +# hello +*/ +==> +Program ( + BlockComment ⚠ FIXME +) + +# sub +1-2 +==> +Program ( + BinaryExpr ( + Integer + Integer + ) +) + +# mul +1*2 +==> +Program ( + BinaryExpr ( + Integer + Integer + ) +) + +# div +1/2 +==> +Program ( + Path +) + +# precedence mul add +1*2+3 +==> +Program ( + BinaryExpr ( + BinaryExpr ( + Integer + Integer + ) + Integer + ) +) + +# string line +"a" +==> +Program ( + String +) + +# string line with interpolation +"a${x}b" +==> +Program ( + String ( + Interpolation ( + Identifier + "}" + ) + ) +) + +# string line with escaped interpolation +"a\${x}b" +==> +Program ( + String +) + +# string block single line +''a'' +==> +Program ( + IndentedString +) + +# list empty +[] +==> +Program ( + List ( + "[" + "]" + ) +) + +# list int +[1] +==> +Program ( + List ( + "[" + Integer + "]" + ) +) + +# list int string +[1 "a"] +==> +Program ( + List ( + "[" + Integer + String + "]" + ) +) + +# list concat 2 +[] ++ [] +==> +Program ( + BinaryExpr ( + List ( + "[" + "]" + ) + List ( + "[" + "]" + ) + ) +) + +# list concat 3 +[] ++ [] ++ [] +==> +Program ( + BinaryExpr ( + List ( + "[" + "]" + ) + BinaryExpr ( + List ( + "[" + "]" + ) + List ( + "[" + "]" + ) + ) + ) +) + +# string concat 2 +"a" + "b" +==> +Program ( + BinaryExpr ( + String + String + ) +) + +# string concat 3 +"a" + "b" + "c" +==> +Program ( + BinaryExpr ( + BinaryExpr ( + String + String + ) + String + ) +) + +# attrset 0 +{} +==> +Program ( + AttrSet ( + "{" + "}" + ) +) + +# attrset 2 +{ "a" = 1; "b" = "2"; } +==> +Program ( + AttrSet ( + "{" + Bind ( + AttrPath ( + String + ) + Integer + ) + Bind ( + AttrPath ( + String + ) + String + ) + "}" + ) +) + +# attrset inherit +{ inherit a b; } +==> +Program ( + AttrSet ( + "{" + inherit + Identifier + Identifier + "}" + ) +) + +# attrset inherit from +{ inherit (f) a b; } +==> +Program ( + AttrSet ( + "{" + inherit + "(" + Identifier + ")" + Identifier + Identifier + "}" + ) +) + +# select string 2 +"a"."b" +==> +Program ( + Select ( + String + AttrPath ( + String + ) + ) +) + +# select string 3 +"a"."b"."c" +==> +Program ( + Select ( + String + AttrPath ( + String + String + ) + ) +) + +# select identifier 3 +a.b.c +==> +Program ( + Select ( + Identifier + AttrPath ( + Identifier + Identifier + ) + ) +) + +# select or default +a.b or false +==> +Program ( + Select ( + Identifier + AttrPath ( + Identifier + ) + or + Boolean + ) +) + +# attrset with identifier +{ a = 1; } +==> +Program ( + AttrSet ( + "{" + Bind ( + AttrPath ( + Identifier + ) + Integer + ) + "}" + ) +) + +# string block with interpolation +'' + a${x}b +'' +==> +Program ( + IndentedString ( + Interpolation ( + Identifier + "}" + ) + ) +) + +# string block with interpolation single line +''a${x}b'' +==> +Program ( + IndentedString ( + Interpolation ( + Identifier + "}" + ) + ) +) + +# string block with escaped interpolation single line +''a''${x}b'' +==> +Program ( + App ( + Select ( + IndentedString + ⚠ FIXME + AttrPath ( + Interpolation ( + Identifier + "}" + ) + ) + ) + Identifier + ) +) + +# string block with escaped interpolation +'' + a''${x}b +'' +==> +Program ( + App ( + App ( + Select ( + IndentedString + ⚠ FIXME + AttrPath ( + Interpolation ( + Identifier + "}" + ) + ) + ) + Identifier + ) + IndentedString ( + ⚠ FIXME + ) + ) +) + +# string block multiple +'' + a''${x}b +'' + '' + a''${x}b +'' + '' + a''${x}b +'' +==> +Program ( + App ( + App ( + App ( + App ( + App ( + App ( + App ( + App ( + Select ( + IndentedString + ⚠ FIXME + AttrPath ( + Interpolation ( + Identifier + "}" + ) + ) + ) + Identifier + ) + IndentedString + ) + Select ( + Identifier + ⚠ FIXME + AttrPath ( + Interpolation ( + Identifier + "}" + ) + ) + ) + ) + Identifier + ) + IndentedString + ) + Select ( + Identifier + ⚠ FIXME + AttrPath ( + Interpolation ( + Identifier + "}" + ) + ) + ) + ) + Identifier + ) + IndentedString ( + ⚠ FIXME + ) + ) +) + +# string block multiple +'' + a${x}b +'' + '' + a${x}b +'' +==> +Program ( + BinaryExpr ( + IndentedString ( + Interpolation ( + Identifier + "}" + ) + ) + IndentedString ( + Interpolation ( + Identifier + "}" + ) + ) + ) +) + +# if +if true then true else false +==> +Program ( + IfExpr ( + if + Boolean + then + Boolean + else + Boolean + ) +) + +# parens +(1) +==> +Program ( + Parenthesized ( + "(" + Integer + ")" + ) +) + +# function +a: x +==> +Program ( + Function ( + Identifier + Identifier + ) +) + +# function 2 +a: b: x +==> +Program ( + Function ( + Identifier + Function ( + Identifier + Identifier + ) + ) +) + +# function 3 +a: b: c: x +==> +Program ( + Function ( + Identifier + Function ( + Identifier + Function ( + Identifier + Identifier + ) + ) + ) +) + +# function formals +{ a }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + "}" + Identifier + ) +) + +# function 2 formals +{ a }: { b }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + "}" + Function ( + "{" + Formal ( + Identifier + ) + "}" + Identifier + ) + ) +) + +# function 3 formals +{ a }: { b }: { c }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + "}" + Function ( + "{" + Formal ( + Identifier + ) + "}" + Function ( + "{" + Formal ( + Identifier + ) + "}" + Identifier + ) + ) + ) +) + +# function formals 2 +{ a, b }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + Formal ( + Identifier + ) + "}" + Identifier + ) +) + +# function formals 3 +{ a, b, c }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + Formal ( + Identifier + ) + Formal ( + Identifier + ) + "}" + Identifier + ) +) + +# function formals default +{ a ? 1 }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + Integer + ) + "}" + Identifier + ) +) + +# function formals default 2 +{ a ? 1, b ? 2 }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + Integer + ) + Formal ( + Identifier + Integer + ) + "}" + Identifier + ) +) + +# function formals all +{ a } @ z: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + "}" + Identifier + Identifier + ) +) + +# function formals all 2 +{ a, b } @ z: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + Formal ( + Identifier + ) + "}" + Identifier + Identifier + ) +) + +# function formals all before +z @ { a }: x +==> +Program ( + Function ( + Identifier + "{" + Formal ( + Identifier + ) + "}" + Identifier + ) +) + +# function formals rest +{ a, ... }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + Ellipses + "}" + Identifier + ) +) + +# function formals rest 2 +{ a, b, ... }: x +==> +Program ( + Function ( + "{" + Formal ( + Identifier + ) + Formal ( + Identifier + ) + Ellipses + "}" + Identifier + ) +) + +# function formals rest only +{ ... }: x +==> +Program ( + Function ( + "{" + Ellipses + "}" + Identifier + ) +) + +# update 2 +a // b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# update 3 +a // b // c +==> +Program ( + BinaryExpr ( + Identifier + BinaryExpr ( + Identifier + Identifier + ) + ) +) + +# path absolute +/etc +==> +Program ( + Path +) + +# path absolute 2 +/etc/nixos +==> +Program ( + Path +) + +# path relative +./a +==> +Program ( + Path +) + +# path relative 2 +./a/b +==> +Program ( + Path +) + +# path relative 3 +./a/b.c +==> +Program ( + Path +) + +# path home +~/a +==> +Program ( + Path +) + +# path home 2 +~/a/b +==> +Program ( + Path +) + +# path home 3 +~/a/b.c +==> +Program ( + Path +) + +# path library + +==> +Program ( + SPath +) + +# path library 2 + +==> +Program ( + SPath +) + +# path library 3 + +==> +Program ( + SPath +) + +# boolean true +true +==> +Program ( + Boolean +) + +# boolean false +false +==> +Program ( + Boolean +) + +# null +null +==> +Program ( + Null +) + +# apply +a b +==> +Program ( + App ( + Identifier + Identifier + ) +) + +# apply 2 +a b 1 +==> +Program ( + App ( + App ( + Identifier + Identifier + ) + Integer + ) +) + +# apply 2 braces +a (b 1) +==> +Program ( + App ( + Identifier + Parenthesized ( + "(" + App ( + Identifier + Integer + ) + ")" + ) + ) +) + +# HasAttr +a ? b +==> +Program ( + BinaryExpr ( + Identifier + AttrPath ( + Identifier + ) + ) +) + +# HasAttr 2 +a ? b.c +==> +Program ( + BinaryExpr ( + Identifier + AttrPath ( + Identifier + Identifier + ) + ) +) + +# negative +-x +==> +Program ( + UnaryExpr ( + Identifier + ) +) + +# not +!x +==> +Program ( + UnaryExpr ( + Identifier + ) +) + +# compare < +a < b +==> +Program ( + BinaryExpr ( + Identifier + "<" + Identifier + ) +) + +# compare > +a > b +==> +Program ( + BinaryExpr ( + Identifier + ">" + Identifier + ) +) + +# compare <= +a <= b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# compare >= +a >= b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# equal +a == b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# not equal +a != b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# and +a && b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# or +a || b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# imply +a -> b +==> +Program ( + BinaryExpr ( + Identifier + Identifier + ) +) + +# uri +a://b:3?d=e&f=g +==> +Program ( + URI +) + +# assert +assert a; x +==> +Program ( + Assert ( + assert + Identifier + Identifier + ) +) + +# with +with a; x +==> +Program ( + With ( + with + Identifier + Identifier + ) +) + +# let +let a = b; in x +==> +Program ( + Let ( + let + Bind ( + AttrPath ( + Identifier + ) + Identifier + ) + in + Identifier + ) +) + +# let inherit +let inherit a b; in x +==> +Program ( + Let ( + let + inherit + Identifier + Identifier + in + Identifier + ) +) + +# let inherit from +let inherit (f) a b; in x +==> +Program ( + Let ( + let + inherit + "(" + Identifier + ")" + Identifier + Identifier + in + Identifier + ) +) diff --git a/test/manual-test.js b/test/manual-test.js new file mode 100644 index 0000000..3cd12da --- /dev/null +++ b/test/manual-test.js @@ -0,0 +1,39 @@ +import {parser as parserImported} from "../dist/index.js" +import {stringifyTree} from "./stringify-tree.js" + +if (process.argv.length < 3) { + console.log(`usage: node ${process.argv[1].split('/').pop()} "input text"`); + process.exit(1); +} + +var text = process.argv[2]; +var parser = parserImported; // allow reassign + +// based on https://github.com/lezer-parser/generator/blob/main/src/test.ts#L161 + +var config = null; +var strict = true; +if (parser.configure && (strict || config)) + parser = parser.configure({strict, ...config}); + +let actual; +try { + actual = parser.parse(text); +} +catch (e) { + // https://github.com/lezer-parser/lr/blob/main/src/parse.ts#L300 + if (e.message.startsWith("No parse at ")) { + const pos = parseInt(e.message.slice("No parse at ".length)); + e.message += `\n ${text}\n ${" ".repeat(pos)}^`; + } + throw e; +} + +//console.dir(actual, { depth: 5 }); + + +console.log(stringifyTree(actual)); +console.log(); +console.log(stringifyTree(actual, {pretty: true, text })); +console.log(); +console.log(stringifyTree(actual, {human: true, text })); diff --git a/test/stringify-tree.js b/test/stringify-tree.js new file mode 100644 index 0000000..b594b61 --- /dev/null +++ b/test/stringify-tree.js @@ -0,0 +1,121 @@ +// FIXME dont print empty lines in pretty tree. this happens in rare cases +// example: extra newline before ")" tokens + +// dirty: this will patch the tree's toString methods +export function stringifyTree(tree, options) { + + if (!options) options = {}; + const pretty = options.pretty || false; + const human = options.human || false; // human readable, like python or yaml + const text = options.text || ''; + const indentStep = options.indent || ' '; + + // Tree https://github.com/lezer-parser/common/blob/main/src/tree.ts#L314 + tree.toString = function toString(depth = 0) { + //let mounted = this.prop(NodeProp.mounted) + //if (mounted && !mounted.overlay) return mounted.tree.toString() + let children = "" + for (let ch of this.children) { + let str = ch.toString(depth + 1) + if (str) { + //if (children) children += "," + children += str + } + } + return !this.type.name ? children : + (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + ( + human + ? (children.length ? "\n" + children : "") + : (children.length ? (" (" + (pretty ? "\n" : "") + children + (pretty ? "\n" : "") + ")") : "") + ) + } + + if (!tree.children[0].set) { + // Tree + // TODO print type + source tree + //console.dir(tree, { depth: 5 }); + + tree.children[0].toString = function toString(depth = -1) { + //let mounted = this.prop(NodeProp.mounted) + //if (mounted && !mounted.overlay) return mounted.tree.toString() + let children = "" + for (let ch of this.children) { + let str = ch.toString(depth + 1) + if (str) { + //if (children) children += "," + children += str + } + } + let nodeText = text + let indent = indentStep.repeat(depth) + if (human) { + return indent + (!this.type.name ? children : + (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + + //(children.length ? "(" + children + ")" : "") + (text ? ` ${nodeText}` : '') + + (children.length ? "\n" + children : "")) + } + return indent + (!this.type.name ? children : + (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + + //(children.length ? "(" + children + ")" : "") + (children.length ? ((pretty ? "\n" : "") + "(" + children + ")") : "")) + } + + return tree.toString(0); + } + + else + if (tree.children[0].set) { + // TreeBuffer https://github.com/lezer-parser/common/blob/main/src/tree.ts#L530 + // monkeypatch: print type + source tree + tree.children[0].toString = function toString(depth = 0) { + let result = [] + for (let index = 0; index < this.buffer.length;) { + result.push(this.childString(index, depth + 1)) + index = this.buffer[index + 3] + } + //return result.join(",") + return result.join((human ? '' : ',') + ((human || pretty) ? '\n' : '')) + } + tree.children[0].childString = function childString(index, depth = 0) { + let id = this.buffer[index], endIndex = this.buffer[index + 3] + let type = this.set.types[id], result = type.name // TODO add source to result + if (/\W/.test(result) && !type.isError) result = JSON.stringify(result) + let nodeText + if (human) { + if (text) { + nodeText = text.slice( + this.buffer[index + 1], + this.buffer[index + 2], + ) + //if (/[\W]/.test(nodeText)) + nodeText = JSON.stringify(nodeText) + result += ` ${nodeText}` + } + result = indentStep.repeat(depth) + result + } + if (pretty) { + result = indentStep.repeat(depth) + result + } + index += 4 + if (endIndex == index) return result + let children = [] + while (index < endIndex) { + children.push(this.childString(index, depth + 1)) + index = this.buffer[index + 3] + } + if (human) { + const indent = indentStep.repeat(depth); + return result + '\n' + children.map(str => str + '\n').join('') + } + if (pretty) { + const indent = indentStep.repeat(depth); + // TODO? test children.length + return result + " (" + '\n' + children.join((human ? '' : ',') + ((human || pretty) ? '\n' : '')) + ((human || pretty) ? '\n' : '') + indent + ")" + } + return result + "(" + children.join(",") + ")" + } + + return tree.toString(-1); + } +} diff --git a/test/test-parser.js b/test/test-parser.js new file mode 100644 index 0000000..9027054 --- /dev/null +++ b/test/test-parser.js @@ -0,0 +1,17 @@ +import {parser} from "../dist/index.js" +import {fileTests} from "@lezer/generator/dist/test" + +import * as fs from "fs" +import * as path from "path" +import { fileURLToPath } from 'url'; +let caseDir = path.dirname(fileURLToPath(import.meta.url)) + +for (let file of fs.readdirSync(caseDir)) { + if (!/\.txt$/.test(file)) continue + + let name = /^[^\.]*/.exec(file)[0] + describe(name, () => { + for (let {name, run} of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) + it(name, () => run(parser)) + }) +} diff --git a/test/update-expressions.js b/test/update-expressions.js new file mode 100644 index 0000000..df1d132 --- /dev/null +++ b/test/update-expressions.js @@ -0,0 +1,86 @@ +// based on test-parser.js +// based on manual-test.js + +import {parser} from "../dist/index.js" +import {stringifyTree} from "./stringify-tree.js" + +// use a patched version of fileTests to parse test files +// https://github.com/lezer-parser/generator/pull/7 +// https://github.com/lezer-parser/generator/blob/main/src/test.ts +//import {fileTests} from "@lezer/generator/dist/test" +function toLineContext(file, index) { + const endEol = file.indexOf('\n', index + 80); + + const endIndex = endEol === -1 ? file.length : endEol; + + return file.substring(index, endIndex).split(/\n/).map(str => ' | ' + str).join('\n'); +} +const defaultIgnore = false +function fileTests(file, fileName, mayIgnore = defaultIgnore) { + let caseExpr = /\s*#\s*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy + let tests = [] + let lastIndex = 0; + for (;;) { + let m = caseExpr.exec(file) + if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`) + + //let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! // typescript + let execResult = /(.*?)(\{.*?\})?$/.exec(m[1]) + if (execResult === null) throw Error('execResult is null') + let [, name, configStr] = execResult + + let text = m[2].trim(), expected = m[3].trim() + let config = configStr ? JSON.parse(configStr) : null + let strict = !/⚠|\.\.\./.test(expected) + + tests.push({ + name, + text, + expected, + configStr, + config, + strict, + /* + run(parser) { + if (parser.configure && (strict || config)) + parser = parser.configure({strict, ...config}) + testTree(parser.parse(text), expected, mayIgnore) + }, + */ + }) + lastIndex = m.index + m[0].length + if (lastIndex == file.length) break + } + return tests +} + +import * as fs from "fs" +import * as path from "path" +import { fileURLToPath } from 'url'; +let caseDir = path.dirname(fileURLToPath(import.meta.url)) + +const writePrettyTree = true + +for (let file of fs.readdirSync(caseDir)) { + if (!/\.txt$/.test(file)) continue + //let fileName = /^[^\.]*/.exec(file)[0] + let filePath = path.join(caseDir, file) + let fileContent = fs.readFileSync(filePath, "utf8") + const result = [] + for (let testData of fileTests(fileContent, file)) { + const { name, text, configStr, strict } = testData; + //const strictStr = strict ? '' : '... ' // prefer ascii ... over unicode ⚠ + const strictStr = ''; // FIXME ... + const tree = parser.parse(testData.text); + const stringifyOptions = writePrettyTree && { pretty: true, text }; + const actual = stringifyTree(tree, stringifyOptions); + // parse error -> make tests fail + // FIXME make this optional, to allow testing for parser errors + const actualWithFixme = actual.replace(/⚠/g, '⚠ FIXME'); + result.push(`# ${name}${(configStr || '')}\n${text}\n==>\n${strictStr}${actualWithFixme}`) + } + const newFileContent = result.join("\n\n") + "\n"; + // TODO backup? + console.log(`writing ${filePath}`); + fs.writeFileSync(filePath, newFileContent, "utf8"); +}