test.fileTests: also return text, expected, config #7

milahu · 2022-09-11T12:55:24Z

make the fileTests function useful for parsing test files

expected has leading + trailing whitespace (\n)
maybe we should expected = expected.trim()?

marijnh · 2022-09-11T13:40:42Z

What are you planning to use this for?

milahu · 2022-09-11T13:50:28Z

updating the expected strings in test files
for example after renaming tokens

jest --updateSnapshot

tree-sitter has this feature too

# Update all syntax trees in corpus files with current parser output
tree-sitter test --update

sample code:
update test files for lezer-parser

used in
replit/codemirror-lang-nix#3
https://github.com/milahu/lezer-parser-nix

test/update-expressions.js

// test/update-expressions.js

// based on test-parser.js
// based on manual-test.js

import {parser} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

// use a patched version of fileTests to parse test files
// https://github.com/lezer-parser/generator/pull/7
// https://github.com/lezer-parser/generator/blob/main/src/test.ts
//import {fileTests} from "@lezer/generator/dist/test"
function toLineContext(file, index) {
  const endEol = file.indexOf('\n', index + 80);

  const endIndex = endEol === -1 ? file.length : endEol;

  return file.substring(index, endIndex).split(/\n/).map(str => '  | ' + str).join('\n');
}
const defaultIgnore = false
function fileTests(file, fileName, mayIgnore = defaultIgnore) {
  let caseExpr = /\s*#\s*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy
  let tests = []
  let lastIndex = 0;
  for (;;) {
    let m = caseExpr.exec(file)
    if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`)

    //let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! // typescript
    let execResult = /(.*?)(\{.*?\})?$/.exec(m[1])
    if (execResult === null) throw Error('execResult is null')
    let [, name, configStr] = execResult

    let text = m[2].trim(), expected = m[3].trim()
    let config = configStr ? JSON.parse(configStr) : null
    let strict = !/⚠|\.\.\./.test(expected)

    tests.push({
      name,
      text,
      expected,
      configStr,
      config,
      strict,
      /*
      run(parser) {
        if (parser.configure && (strict || config))
          parser = parser.configure({strict, ...config})
        testTree(parser.parse(text), expected, mayIgnore)
      },
      */
    })
    lastIndex = m.index + m[0].length
    if (lastIndex == file.length) break
  }
  return tests
}

import * as fs from "fs"
import * as path from "path"
import { fileURLToPath } from 'url';
let caseDir = path.dirname(fileURLToPath(import.meta.url))

const writePrettyTree = true

for (let file of fs.readdirSync(caseDir)) {
  if (!/\.txt$/.test(file)) continue
  //let fileName = /^[^\.]*/.exec(file)[0]
  let filePath = path.join(caseDir, file)
  let fileContent = fs.readFileSync(filePath, "utf8")
  const result = []
  for (let testData of fileTests(fileContent, file)) {
    const { name, text, configStr, strict } = testData;
    const strictStr = strict ? '' : '... ' // prefer ascii ... over unicode ⚠
    const tree = parser.parse(testData.text);
    const stringifyOptions = writePrettyTree && { pretty: true, text };
    const actual = stringifyTree(tree, stringifyOptions);
    // parse error -> make tests fail
    // FIXME make this optional, to allow testing for parser errors
    const actualWithFixme = actual.replace(/⚠/g, '⚠ FIXME');
    result.push(`# ${name}${(configStr || '')}\n${text}\n==>\n${strictStr}${actualWithFixme}`)
  }
  const newFileContent = result.join("\n\n") + "\n";
  // TODO backup?
  console.log(`writing ${filePath}`);
  fs.writeFileSync(filePath, newFileContent, "utf8");
}

test/manual-test.js

// test/manual-test.js

import {parser as parserImported} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

if (process.argv.length < 3) {
  console.log(`usage: node ${process.argv[1].split('/').pop()} "input text"`);
  process.exit(1);
}

var text = process.argv[2];
var parser = parserImported; // allow reassign

// based on https://github.com/lezer-parser/generator/blob/main/src/test.ts#L161

var config = null;
var strict = true;
if (parser.configure && (strict || config))
  parser = parser.configure({strict, ...config});

let actual;
try {
  actual = parser.parse(text);
}
catch (e) {
  // https://github.com/lezer-parser/lr/blob/main/src/parse.ts#L300
  if (e.message.startsWith("No parse at ")) {
    const pos = parseInt(e.message.slice("No parse at ".length));
    e.message += `\n      ${text}\n      ${" ".repeat(pos)}^`;
  }
  throw e;
}

console.log(stringifyTree(actual));
console.log();
console.log(stringifyTree(actual, {pretty: true, text }));

test/stringify-tree.js

// test/stringify-tree.js

// FIXME dont print empty lines in pretty tree. this happens in rare cases
// example: extra newline before ")" tokens

// dirty: this will patch the tree's toString methods
export function stringifyTree(tree, options) {

  if (!options) options = {};
  const pretty = options.pretty || false;
  const human = options.human || false; // human readable, like python or yaml
  const text = options.text || '';
  const indentStep = options.indent || '  ';

  // Tree https://github.com/lezer-parser/common/blob/main/src/tree.ts#L314
  tree.toString = function toString(depth = 0) {
    //let mounted = this.prop(NodeProp.mounted)
    //if (mounted && !mounted.overlay) return mounted.tree.toString()
    let children = ""
    for (let ch of this.children) {
      let str = ch.toString(depth + 1)
      if (str) {
        //if (children) children += ","
        children += str
      }
    }
    return !this.type.name ? children :
      (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + (
        human
        ? (children.length ? "\n" + children : "")
        : (children.length ? (" (" + (pretty ? "\n" : "") + children + (pretty ? "\n" : "") + ")") : "")
      )
  }

  if (!tree.children[0].set) {
    // Tree
    // TODO print type + source tree
    //console.dir(tree, { depth: 5 });

    tree.children[0].toString = function toString(depth = -1) {
      //let mounted = this.prop(NodeProp.mounted)
      //if (mounted && !mounted.overlay) return mounted.tree.toString()
      let children = ""
      for (let ch of this.children) {
        let str = ch.toString(depth + 1)
        if (str) {
          //if (children) children += ","
          children += str
        }
      }
      let nodeText = text
      let indent = indentStep.repeat(depth)
      if (human) {
        return indent + (!this.type.name ? children :
          (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
          //(children.length ? "(" + children + ")" : "")
          (text ? ` ${nodeText}` : '') +
          (children.length ? "\n" + children : ""))
      }
      return indent + (!this.type.name ? children :
        (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
        //(children.length ? "(" + children + ")" : "")
        (children.length ? ((pretty ? "\n" : "") + "(" + children + ")") : ""))
    }

    return tree.toString(0);
  }

  else
  if (tree.children[0].set) {
    // TreeBuffer https://github.com/lezer-parser/common/blob/main/src/tree.ts#L530
    // monkeypatch: print type + source tree
    tree.children[0].toString = function toString(depth = 0) {
      let result = []
      for (let index = 0; index < this.buffer.length;) {
        result.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      //return result.join(",")
      return result.join('')
    }
    tree.children[0].childString = function childString(index, depth = 0) {
      let id = this.buffer[index], endIndex = this.buffer[index + 3]
      let type = this.set.types[id], result = type.name // TODO add source to result
      if (/\W/.test(result) && !type.isError) result = JSON.stringify(result)
      let nodeText
      if (human) {
        if (text) {
          nodeText = text.slice(
            this.buffer[index + 1],
            this.buffer[index + 2],
          )
          if (/[\r\n]/.test(nodeText)) nodeText = JSON.stringify(nodeText)
          result += ` ${nodeText}`
        }
        result = indentStep.repeat(depth) + result
      }
      if (pretty) {
        result = indentStep.repeat(depth) + result
      }
      index += 4
      if (endIndex == index) return result
      let children = []
      while (index < endIndex) {
        children.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      if (human) {
        const indent = indentStep.repeat(depth);
        return result + '\n' + children.map(str => str + '\n').join('')
      }
      if (pretty) {
        const indent = indentStep.repeat(depth);
        // TODO? test children.length
        return result + " (" + '\n' + children.map(str => str + '\n').join('') + indent + ")"
      }
      return result + "(" + children.join(",") + ")"
    }

    return tree.toString(-1);
  }
}

marijnh · 2022-09-11T13:59:53Z

All right, seems harmless enough.

milahu force-pushed the patch-1 branch from 681ba18 to 5660ad4 Compare September 11, 2022 13:01

milahu changed the title ~~test.fileTests: also return text, expected~~ test.fileTests: also return text, expected, config Sep 11, 2022

milahu force-pushed the patch-1 branch from 5660ad4 to c2b2397 Compare September 11, 2022 13:08

test.fileTests: also return text, expected, config

ba98ec7

milahu force-pushed the patch-1 branch from c2b2397 to ba98ec7 Compare September 11, 2022 13:35

marijnh merged commit f5cdf9f into lezer-parser:main Sep 11, 2022

milahu deleted the patch-1 branch September 11, 2022 14:00

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

test.fileTests: also return text, expected, config #7

test.fileTests: also return text, expected, config #7

Uh oh!

milahu commented Sep 11, 2022 •

edited

Loading

Uh oh!

marijnh commented Sep 11, 2022

Uh oh!

milahu commented Sep 11, 2022 •

edited

Loading

Uh oh!

marijnh commented Sep 11, 2022

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

test.fileTests: also return text, expected, config #7

test.fileTests: also return text, expected, config #7

Uh oh!

Conversation

milahu commented Sep 11, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

marijnh commented Sep 11, 2022

Uh oh!

milahu commented Sep 11, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

marijnh commented Sep 11, 2022

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

milahu commented Sep 11, 2022 •

edited

Loading

milahu commented Sep 11, 2022 •

edited

Loading