Skip to content

Conversation

@milahu
Copy link
Contributor

@milahu milahu commented Sep 11, 2022

make the fileTests function useful for parsing test files

expected has leading + trailing whitespace (\n)
maybe we should expected = expected.trim()?

@milahu milahu changed the title test.fileTests: also return text, expected test.fileTests: also return text, expected, config Sep 11, 2022
@marijnh
Copy link
Collaborator

marijnh commented Sep 11, 2022

What are you planning to use this for?

@milahu
Copy link
Contributor Author

milahu commented Sep 11, 2022

updating the expected strings in test files
for example after renaming tokens

similar to updating jest snapshot tests

jest --updateSnapshot

tree-sitter has this feature too

# Update all syntax trees in corpus files with current parser output
tree-sitter test --update

sample code:
update test files for lezer-parser

used in
replit/codemirror-lang-nix#3
https://github.com/milahu/lezer-parser-nix

test/update-expressions.js
// test/update-expressions.js

// based on test-parser.js
// based on manual-test.js

import {parser} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

// use a patched version of fileTests to parse test files
// https://github.com/lezer-parser/generator/pull/7
// https://github.com/lezer-parser/generator/blob/main/src/test.ts
//import {fileTests} from "@lezer/generator/dist/test"
function toLineContext(file, index) {
  const endEol = file.indexOf('\n', index + 80);

  const endIndex = endEol === -1 ? file.length : endEol;

  return file.substring(index, endIndex).split(/\n/).map(str => '  | ' + str).join('\n');
}
const defaultIgnore = false
function fileTests(file, fileName, mayIgnore = defaultIgnore) {
  let caseExpr = /\s*#\s*(.*)(?:\r\n|\r|\n)([^]*?)==+>([^]*?)(?:$|(?:\r\n|\r|\n)+(?=#))/gy
  let tests = []
  let lastIndex = 0;
  for (;;) {
    let m = caseExpr.exec(file)
    if (!m) throw new Error(`Unexpected file format in ${fileName} around\n\n${toLineContext(file, lastIndex)}`)

    //let [, name, configStr] = /(.*?)(\{.*?\})?$/.exec(m[1])! // typescript
    let execResult = /(.*?)(\{.*?\})?$/.exec(m[1])
    if (execResult === null) throw Error('execResult is null')
    let [, name, configStr] = execResult

    let text = m[2].trim(), expected = m[3].trim()
    let config = configStr ? JSON.parse(configStr) : null
    let strict = !/|\.\.\./.test(expected)

    tests.push({
      name,
      text,
      expected,
      configStr,
      config,
      strict,
      /*
      run(parser) {
        if (parser.configure && (strict || config))
          parser = parser.configure({strict, ...config})
        testTree(parser.parse(text), expected, mayIgnore)
      },
      */
    })
    lastIndex = m.index + m[0].length
    if (lastIndex == file.length) break
  }
  return tests
}

import * as fs from "fs"
import * as path from "path"
import { fileURLToPath } from 'url';
let caseDir = path.dirname(fileURLToPath(import.meta.url))

const writePrettyTree = true

for (let file of fs.readdirSync(caseDir)) {
  if (!/\.txt$/.test(file)) continue
  //let fileName = /^[^\.]*/.exec(file)[0]
  let filePath = path.join(caseDir, file)
  let fileContent = fs.readFileSync(filePath, "utf8")
  const result = []
  for (let testData of fileTests(fileContent, file)) {
    const { name, text, configStr, strict } = testData;
    const strictStr = strict ? '' : '... ' // prefer ascii ... over unicode ⚠
    const tree = parser.parse(testData.text);
    const stringifyOptions = writePrettyTree && { pretty: true, text };
    const actual = stringifyTree(tree, stringifyOptions);
    // parse error -> make tests fail
    // FIXME make this optional, to allow testing for parser errors
    const actualWithFixme = actual.replace(//g, '⚠ FIXME');
    result.push(`# ${name}${(configStr || '')}\n${text}\n==>\n${strictStr}${actualWithFixme}`)
  }
  const newFileContent = result.join("\n\n") + "\n";
  // TODO backup?
  console.log(`writing ${filePath}`);
  fs.writeFileSync(filePath, newFileContent, "utf8");
}
test/manual-test.js
// test/manual-test.js

import {parser as parserImported} from "../dist/index.js"
import {stringifyTree} from "./stringify-tree.js"

if (process.argv.length < 3) {
  console.log(`usage: node ${process.argv[1].split('/').pop()} "input text"`);
  process.exit(1);
}

var text = process.argv[2];
var parser = parserImported; // allow reassign

// based on https://github.com/lezer-parser/generator/blob/main/src/test.ts#L161

var config = null;
var strict = true;
if (parser.configure && (strict || config))
  parser = parser.configure({strict, ...config});

let actual;
try {
  actual = parser.parse(text);
}
catch (e) {
  // https://github.com/lezer-parser/lr/blob/main/src/parse.ts#L300
  if (e.message.startsWith("No parse at ")) {
    const pos = parseInt(e.message.slice("No parse at ".length));
    e.message += `\n      ${text}\n      ${" ".repeat(pos)}^`;
  }
  throw e;
}

console.log(stringifyTree(actual));
console.log();
console.log(stringifyTree(actual, {pretty: true, text }));
test/stringify-tree.js
// test/stringify-tree.js

// FIXME dont print empty lines in pretty tree. this happens in rare cases
// example: extra newline before ")" tokens

// dirty: this will patch the tree's toString methods
export function stringifyTree(tree, options) {

  if (!options) options = {};
  const pretty = options.pretty || false;
  const human = options.human || false; // human readable, like python or yaml
  const text = options.text || '';
  const indentStep = options.indent || '  ';

  // Tree https://github.com/lezer-parser/common/blob/main/src/tree.ts#L314
  tree.toString = function toString(depth = 0) {
    //let mounted = this.prop(NodeProp.mounted)
    //if (mounted && !mounted.overlay) return mounted.tree.toString()
    let children = ""
    for (let ch of this.children) {
      let str = ch.toString(depth + 1)
      if (str) {
        //if (children) children += ","
        children += str
      }
    }
    return !this.type.name ? children :
      (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) + (
        human
        ? (children.length ? "\n" + children : "")
        : (children.length ? (" (" + (pretty ? "\n" : "") + children + (pretty ? "\n" : "") + ")") : "")
      )
  }

  if (!tree.children[0].set) {
    // Tree
    // TODO print type + source tree
    //console.dir(tree, { depth: 5 });

    tree.children[0].toString = function toString(depth = -1) {
      //let mounted = this.prop(NodeProp.mounted)
      //if (mounted && !mounted.overlay) return mounted.tree.toString()
      let children = ""
      for (let ch of this.children) {
        let str = ch.toString(depth + 1)
        if (str) {
          //if (children) children += ","
          children += str
        }
      }
      let nodeText = text
      let indent = indentStep.repeat(depth)
      if (human) {
        return indent + (!this.type.name ? children :
          (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
          //(children.length ? "(" + children + ")" : "")
          (text ? ` ${nodeText}` : '') +
          (children.length ? "\n" + children : ""))
      }
      return indent + (!this.type.name ? children :
        (/\W/.test(this.type.name) && !this.type.isError ? JSON.stringify(this.type.name) : this.type.name) +
        //(children.length ? "(" + children + ")" : "")
        (children.length ? ((pretty ? "\n" : "") + "(" + children + ")") : ""))
    }

    return tree.toString(0);
  }

  else
  if (tree.children[0].set) {
    // TreeBuffer https://github.com/lezer-parser/common/blob/main/src/tree.ts#L530
    // monkeypatch: print type + source tree
    tree.children[0].toString = function toString(depth = 0) {
      let result = []
      for (let index = 0; index < this.buffer.length;) {
        result.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      //return result.join(",")
      return result.join('')
    }
    tree.children[0].childString = function childString(index, depth = 0) {
      let id = this.buffer[index], endIndex = this.buffer[index + 3]
      let type = this.set.types[id], result = type.name // TODO add source to result
      if (/\W/.test(result) && !type.isError) result = JSON.stringify(result)
      let nodeText
      if (human) {
        if (text) {
          nodeText = text.slice(
            this.buffer[index + 1],
            this.buffer[index + 2],
          )
          if (/[\r\n]/.test(nodeText)) nodeText = JSON.stringify(nodeText)
          result += ` ${nodeText}`
        }
        result = indentStep.repeat(depth) + result
      }
      if (pretty) {
        result = indentStep.repeat(depth) + result
      }
      index += 4
      if (endIndex == index) return result
      let children = []
      while (index < endIndex) {
        children.push(this.childString(index, depth + 1))
        index = this.buffer[index + 3]
      }
      if (human) {
        const indent = indentStep.repeat(depth);
        return result + '\n' + children.map(str => str + '\n').join('')
      }
      if (pretty) {
        const indent = indentStep.repeat(depth);
        // TODO? test children.length
        return result + " (" + '\n' + children.map(str => str + '\n').join('') + indent + ")"
      }
      return result + "(" + children.join(",") + ")"
    }

    return tree.toString(-1);
  }
}

@marijnh marijnh merged commit f5cdf9f into lezer-parser:main Sep 11, 2022
@marijnh
Copy link
Collaborator

marijnh commented Sep 11, 2022

All right, seems harmless enough.

@milahu milahu deleted the patch-1 branch September 11, 2022 14:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants