diff --git a/.github/workflows/JuliaSyntaxCI.yml b/.github/workflows/JuliaSyntaxCI.yml new file mode 100644 index 0000000000000..bfa6003d3bac3 --- /dev/null +++ b/.github/workflows/JuliaSyntaxCI.yml @@ -0,0 +1,72 @@ +name: JuliaSyntax +on: + push: + branches: + - master + - release-* + paths: + - 'JuliaSyntax/**' + tags: '*' + pull_request: + paths: + - 'JuliaSyntax/**' +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + runs-on: ${{ matrix.github-runner }} + timeout-minutes: 60 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + julia-version: + - '1.0' + - '1.6' + - '1.10' + - '1' + github-runner: + - ubuntu-latest + - macos-13 + - macos-latest + - windows-latest + julia-wordsize: + # The value here only affects the version of Julia binary that we download. + # It does not affect the architecture of the GitHub Runner (virtual machine) that + # we run on. + - '32' # 32-bit Julia. Only available on x86_64. Not available on aarch64. + - '64' # 64-bit Julia. + exclude: + # For Intel macOS we don't have 32-bit builds of Julia + - github-runner: macos-13 + julia-wordsize: '32' + # For Apple Silicon macOS we don't have 32-bit builds of Julia, nor + # builds for older Julia versions. + - github-runner: macos-latest + julia-wordsize: '32' + - github-runner: macos-latest + julia-version: '1.0' + - github-runner: macos-latest + julia-version: '1.6' + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + persist-credentials: false + - uses: julia-actions/setup-julia@5c9647d97b78a5debe5164e9eec09d653d29bd71 # v2.6.1 + with: + version: ${{ matrix.julia-version }} + # If `julia-wordsize` is 32, then we set `arch` to `x86`, because we know that + # 32-bit builds of Julia are only available for x86. + # + # If `julia-wordsize` is 64, then we set `arch` to `${{ runner.arch }}`, which + # GitHub will automatically expand to the correct value (`x86_64` or `aarch64`) + # based on the architecture of the underlying GitHub Runner (virtual machine). + arch: ${{ matrix.julia-wordsize == '32' && 'x86' || runner.arch }} + - uses: julia-actions/julia-buildpkg@e3eb439fad4f9aba7da2667e7510e4a46ebc46e1 # v1.7.0 + - uses: julia-actions/julia-runtest@678da69444cd5f13d7e674a90cb4f534639a14f9 # v1.11.2 + with: + project: JuliaSyntax diff --git a/JuliaLowering/.github/workflows/CI.yml b/JuliaLowering/.github/workflows/CI.yml new file mode 100644 index 0000000000000..7ed1133e01315 --- /dev/null +++ b/JuliaLowering/.github/workflows/CI.yml @@ -0,0 +1,35 @@ +name: CI +on: + push: + branches: + - main + tags: ['*'] + pull_request: + workflow_dispatch: +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - 'nightly' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 diff --git a/JuliaLowering/.github/workflows/CompatHelper.yml b/JuliaLowering/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000000000..cba9134c670f0 --- /dev/null +++ b/JuliaLowering/.github/workflows/CompatHelper.yml @@ -0,0 +1,16 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaLowering/.github/workflows/TagBot.yml b/JuliaLowering/.github/workflows/TagBot.yml new file mode 100644 index 0000000000000..2bacdb87e004b --- /dev/null +++ b/JuliaLowering/.github/workflows/TagBot.yml @@ -0,0 +1,31 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: + inputs: + lookback: + default: 3 +permissions: + actions: read + checks: read + contents: write + deployments: read + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: read +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/JuliaLowering/.gitignore b/JuliaLowering/.gitignore new file mode 100644 index 0000000000000..b067eddee4ee0 --- /dev/null +++ b/JuliaLowering/.gitignore @@ -0,0 +1 @@ +/Manifest.toml diff --git a/JuliaLowering/LICENSE b/JuliaLowering/LICENSE new file mode 100644 index 0000000000000..62a00d053a428 --- /dev/null +++ b/JuliaLowering/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 JuliaHub and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/JuliaLowering/Project.toml b/JuliaLowering/Project.toml new file mode 100644 index 0000000000000..2b01366509e8b --- /dev/null +++ b/JuliaLowering/Project.toml @@ -0,0 +1,22 @@ +name = "JuliaLowering" +uuid = "f3c80556-a63f-4383-b822-37d64f81a311" +authors = ["Claire Foster and contributors"] +version = "1.0.0-DEV" + +[deps] +JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4" + +[sources] +JuliaSyntax = {rev = "99e975a7", url = "https://github.com/JuliaLang/JuliaSyntax.jl"} + +[compat] +julia = "1" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[targets] +test = ["Test", "FileWatching", "Markdown", "REPL"] diff --git a/JuliaLowering/README.md b/JuliaLowering/README.md new file mode 100644 index 0000000000000..5ebe02b95688d --- /dev/null +++ b/JuliaLowering/README.md @@ -0,0 +1,954 @@ +# JuliaLowering + +[![Build Status](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/c42f/JuliaLowering.jl/actions/workflows/CI.yml?query=branch%3Amain) + +JuliaLowering.jl is an experimental port of Julia's code lowering compiler +passes, written in Julia itself. "Code lowering" is the set of compiler passes +which *symbolically* transform and simplify Julia's syntax prior to type +inference. + +## Goals + +This work is intended to +* Bring precise code provenance to Julia's lowered form (and eventually + downstream in type inference, stack traces, etc). This has many benefits + - Talk to users precisely about their code via character-precise error and + diagnostic messages from lowering + - Greatly simplify the implementation of critical tools like Revise.jl + which rely on analyzing how the user's source maps to the compiler's data + structures + - Allow tools like JuliaInterpreter to use type-inferred and optimized + code, with the potential for huge speed improvements. +* Bring improvements for macro authors + - Prototype "automatic hygiene" (no more need for `esc()`!) + - Precise author-defined error reporting from macros + - Sketch better interfaces for syntax trees (hopefully!) + +## Trying it out + +Note this is a work in progress; many types of syntax are not yet handled. + +1. You need a 1.13.0-DEV build of Julia: At least 1.13.0-DEV.880. Commit `5ebc5b463ea` is currently known to work. Note that JuliaLowering relies on Julia internals and may be broken on the latest Julia dev version from time to time. +2. Use commit `e02f29f` of [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) +3. Get the latest version of [JuliaSyntaxFormatter](https://github.com/c42f/JuliaSyntaxFormatter.jl) +4. Run the demo `include("test/demo.jl")` + +# Design notes + +## Syntax trees + +Want something something better than `JuliaSyntax.SyntaxNode`! `SyntaxTree` and +`SyntaxGraph` provide this. Some future version of these should end up in +`JuliaSyntax`. + +We want to allow arbitrary attributes to be attached to tree nodes by analysis +passes. This separates the analysis pass implementation from the data +structure, allowing passes which don't know about each other to act on a shared +data structure. + +Design and implementation inspiration comes in several analogies: + +Analogy 1: the ECS (Entity-Component-System) pattern for computer game design. +This pattern is highly successful because it separates game logic (systems) +from game objects (entities) by providing flexible storage +* Compiler passes are "systems" +* AST tree nodes are "entities" +* Node attributes are "components" + +Analogy 2: The AoS to SoA transformation. But here we've got a kind of +tree-of-structs-with-optional-attributes to struct-of-Dicts transformation. +The data alignment / packing efficiency and concrete type safe storage benefits +are similar. + +Analogy 3: Graph algorithms which represent graphs as a compact array of node +ids and edges with integer indices, rather than using a linked data structure. + +### References + +Sander Mertens, the author of the Flecs ECS has a blog post series discussing +ECS data structures and the many things that may be done with them. We may want +to use some of these tricks to make `SyntaxTree` faster, eventually. See, for +example, +[Building Games in ECS with Entity Relationships](https://ajmmertens.medium.com/building-games-in-ecs-with-entity-relationships-657275ba2c6c) + +### Structural assertions / checking validity of syntax trees + +Syntax trees in Julia `Expr` form are very close to lisp lists: a symbol at the +`head` of the list which specifies the syntactic form, and a sequence of +children in the syntax tree. This is a representation which `JuliaSyntax` and +`JuliaLowering` follow but it does come with certain disadvantages. One of the +most problematic is that the number of children affects the validity (and +sometimes semantics) of an AST node, as much as the `head` symbol does. + +In `JuliaSyntax` we've greatly reduced the overloading of `head` in order to +simplify the interpretation of child structures in the tree. For example, +broadcast calls like `f.(x,y)` use the `K"dotcall"` kind rather than being a +node with `head == Symbol(".")` and a tuple as children. + +However, there's still many ways for lowering to encounter invalid expressions +of type `SyntaxTree` and these must be checked. In JuliaSyntax we have several +levels of effort corresponding to the type of errors conditions we desire to +check and report: + +* For invalid syntax which is accepted by the `JuliaSyntax` + parser but is invalid in lowering we use manual `if` blocks followed by + throwing a `LoweringError`. This is more programming effort but allows for + the highest quality error messages for the typical end user. +* For invalid syntax which can only be produced by macros (ie, not by the + parser) we mostly use the `@chk` macro. This is a quick tool for validating + input but gives lesser quality error messages. +* For JuliaLowering's internal invariants we just use `@assert` - these should + never be hit and can be compiled out in principle. + +## Provenance tracking + +Expression provenance is tracked through lowering by attaching provenance +information in the `source` attribute to every expression as it is generated. +For example when parsing a source file we have + +```julia +julia> ex = parsestmt(SyntaxTree, "a + b", filename="foo.jl") +SyntaxTree with attributes kind,value,name_val,syntax_flags,source +[call-i] │ + a │ + + │ + b │ + +julia> ex[3].source +a + b +# ╙ ── these are the bytes you're looking for 😊 +``` + +The `provenance` function should be used to look up the `source` attribute and +the `showprov` function used to inspect the content (this is preferred because +the encoding of `source` is an implementation detail). For example: + +```julia +julia> showprov(ex[3]) +a + b +# ╙ ── in source +# @ foo.jl:1 +``` + +During macro expansion and lowering provenance gets more complicated because an +expression can arise from multiple sources. For example, we want to keep track +of the entire stack of macro expansions an expression was generated by, while +also recording where it occurred in the original source file. + +For this, we use a tree data structure. Let's look at the following pair of +macros + +```julia +julia> JuliaLowering.include_string(Main, raw""" + module M + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + end + """, "some_macros.jl") +``` + +The tree which arises from macro expanding this is pretty simple: + +```julia +julia> expanded = JuliaLowering.macroexpand(Main, parsestmt(SyntaxTree, "M.@outer()")) +SyntaxTree with attributes scope_layer,kind,value,var_id,name_val,syntax_flags,source +[tuple-p] │ + 1 │ + 2 │ +``` + +but the provenance information recorded for the second element `2` of this +tuple is not trivial; it includes the macro call expressions for `@inner` and +`@outer`. We can show this in tree form: + +```julia +julia> showprov(expanded[2], tree=true) +2 +├─ 2 +│ └─ @ some_macros.jl:3 +└─ (macrocall @inner) + ├─ (macrocall @inner) + │ └─ @ some_macros.jl:7 + └─ (macrocall-p (. M @outer)) + └─ @ foo.jl:1 +``` + +or as a more human readable flattened list highlighting of source ranges: + +```julia +module M + macro inner() + :(2) +# ╙ ── in source + end + +# @ some_macros.jl:3 + + + macro outer() + :((1, @inner)) +# └────┘ ── in macro expansion + end +end +# @ some_macros.jl:7 + +M.@outer() +└────────┘ ── in macro expansion +# @ foo.jl:1 +``` + +## Problems with Hygiene in Julia's exiting macro system + +To write correct hygienic macros in Julia (as of 2024), macro authors must use +`esc()` on any any syntax passed to the macro so that passed identifiers escape +to the macro caller scope. However + +* This is not automatic and the correct use of `esc()` is one of the things + that new macro authors find most confusing. (My impression, based on various + people complaining about how confusing `esc()` is.) +* `esc()` wraps expressions in `Expr(:escape)`, but this doesn't work well when + macros pass such escaped syntax to an inner macro call. As discussed in + [Julia issue #37691](https://github.com/JuliaLang/julia/issues/37691), macros + in Julia's existing system are not composable by default. Writing + composable macros in the existing system would require preserving the escape + nesting depth when recursing into any macro argument nested expressions. + Almost no macro author knows how to do this and is prepared to pay for the + complexity of getting it right. + +The requirement to use `esc()` stems from Julia's pervasive use of the simple +`Expr` data structure which represents a unadorned AST in which names are plain +symbols. For example, a macro call `@foo x` gets passed the symbol `:x` +which is just a name without any information attached to indicate that it came +from the scope where `@foo` was called. + +### Hygiene References + +* [Toward Fearless Macros](https://lambdaland.org/posts/2023-10-17_fearless_macros) - + a blog post by Ashton Wiersdorf +* [Towards the Essence of Hygiene](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) - a paper by Michael Adams +* [Bindings as sets of scopes](https://www-old.cs.utah.edu/plt/scope-sets/) - a description of Racket's scope set mechanism by Matthew Flatt + +# Overview of lowering passes + +JuliaLowering uses six symbolic transformation passes: + +1. Macro expansion - expanding user-defined syntactic constructs by running the + user's macros. This pass also includes a small amount of other symbolic + simplification. +2. Syntax desugaring - simplifying Julia's rich surface syntax down to a small + number of syntactic forms. +3. Scope analysis - analyzing identifier names used in the code to discover + local variables, closure captures, and associate global variables to the + appropriate module. Transform all names (kind `K"Identifier"`) into binding + IDs (kind `K"BindingId"`) which can be looked up in a table of bindings. +4. Closure conversion - convert closures to types and deal with captured + variables efficiently where possible. +5. Flattening to untyped IR - convert code in hierarchical tree form to a + flat array of statements; convert control flow into gotos. +6. Convert untyped IR to `CodeInfo` form for integration with the Julia runtime. + +## Pass 1: Macro expansion + +This pass expands macros and quoted syntax, and does some very light conversion +of a few syntax `Kind`s in preparation for syntax desugaring. + +### Hygiene in JuliaLowering + +In JuliaLowering we make hygiene automatic and remove `esc()` by combining names +with scope information. In the language of the paper [*Towards the Essence of +Hygiene*](https://michaeldadams.org/papers/hygiene/hygiene-2015-popl-authors-copy.pdf) +by Michael Adams, this combination is called a "syntax object". In +JuliaLowering our representation is the tuple `(name,scope_layer)`, also called +`VarId` in the scope resolution pass. + +JuliaLowering's macro expander attaches a unique *scope layer* to each +identifier in a piece of syntax. A "scope layer" is an integer identifier +combined with the module in which the syntax was created. + +When expanding macros, + +* Any identifiers passed to the macro are tagged with the scope layer they were + defined within. +* A new unique scope layer is generated for the macro invocation, and any names + in the syntax produced by the macro are tagged with this layer. + +Subsequently, the `(name,scope_layer)` pairs are used when resolving bindings. +This ensures that, by default, we satisfy the basic rules for hygienic macros +discussed in Adams' paper: + +1. A macro can't insert a binding that can capture references other than those + inserted by the macro. +2. A macro can't insert a reference that can be captured by bindings other than + those inserted by the macro. + +TODO: Write more here... + + +### Compatibility with `Expr` macros + +In order to have compatibility with old-style macros which expect an `Expr`-based +data structure as input, we convert `SyntaxTree` to `Expr`, call the old-style +macro, then convert `SyntaxTree` back to `Expr` and continue with the expansion +process. This involves some loss of provenance precision but allows full +interoperability in the package ecosystem without a need to make breaking +changes. + +Let's look at an example. Suppose a manually escaped old-style macro +`@oldstyle` is implemented as + +```julia +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +``` + +along with two correctly escaped new-style macros: + +```julia +macro call_oldstyle_macro(y) + quote + x = "x in call_oldstyle_macro" + @oldstyle $y x + end +end + +macro newstyle(x, y, z) + quote + x = "x in @newstyle" + ($x, $y, $z, x) + end +end +``` + +Then want some code like the following to "just work" with respect to hygiene + +```julia +let + x = "x in outer ctx" + @call_oldstyle_macro x +end +``` + +When calling `@oldstyle`, we must convert `SyntaxTree` into `Expr`, but we need +to preserve the scope layer of the `x` from the outer context as it is passed +into `@oldstyle` as a macro argument. To do this, we use `Expr(:scope_layer, +:x, outer_layer_id)`. (In the old system, this would be `Expr(:escape, :x)` +instead, presuming that `@call_oldstyle_macro` was implemented using `esc()`.) + +When receiving output from old style macro invocations, we preserve the escape +handling of the existing system for any symbols which aren't tagged with a +scope layer. + +## Pass 2: Syntax desugaring + +This pass recursively converts many special surface syntax forms to a smaller +set of syntax `Kind`s, following the AST's hierarchical tree structure. Some +such as `K"scope_block"` are internal to lowering and removed during later +passes. See `kinds.jl` for a list of these internal forms. + +This pass is implemented in `desugaring.jl`. It's quite large because Julia has +many special syntax features. + +### Desugaring of function definitions + +Desugaring of function definitions is particularly complex because of the cross +product of features which need to work together consistently: + +* Positional arguments (with and without defaults, with and without types) +* Keyword arguments (with and without defaults, with and without types) +* Type parameters with `where` syntax +* Argument slurping syntax with `...` +* Fancy arguments (argument destructuring) + +The combination of positional arguments with defaults and keyword arguments is +particularly complex. Here's an example. Suppose we're given the function +definition + +```julia +function f(a::A=a_default, b::B=b_default; x::X=x_default,y::Y=y_default) + body +end +``` + +This generates +* One method of `f` for each number of positional arguments which can be + called when `f` is called without keyword args +* One overload of `Core.kwcall(kws, ::typeof(f), ...)` for each number of + positional arguments (when called with a nonzero number of keyword args; the + tuple `kws` being constructed by the caller) +* One internal method for the body of the function (we can call it `f_kw` + though it will be named something like `#f#18`) + +First, partially expanding the kw definitions this roughly looks like + +```julia +function f_kw(x::X, y::X, f_self::typeof(f), a::A, b::B) + body +end + +function f(a::A=a_default, b::B=b_default) + f_kw(x_default, y_default, var"#self#", a, b) +end + +function Core.kwcall(kws::NamedTuple, self::typeof(f), a::A=a_default, b::B=b_default) + if Core.isdefined(kws, :x) + x_tmp = Core.getfield(kws, :x) + if x_tmp isa X + nothing + else + Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :x, X, x_tmp))) + end + x = x_tmp + else + x = 1 + end + if Core.isdefined(kws, :y) + y_tmp = Core.getfield(kws, :y) + if y_tmp isa Y + nothing + else + Core.throw($(Expr(:new, Core.TypeError, Symbol("keyword argument"), :y, Y, y_tmp))) + end + y = y_tmp + else + y = 2 + end + if Base.isempty(Base.diff_names(Base.keys(kws), (:x, :y))) + nothing + else + # Else unsupported kws + Base.kwerr(kws, self, a, b) + end + f_kw(x, y, self, a, b) +end +``` + +We can then pass this to function expansion for default arguments which expands +each of the above into three more methods. For example, for the first +definition we conceptually expand `f(a::A=a_default, b::B=b_default)` into the +methods + +```julia +# The body +function f(a::A, b::B) + f_kw(x_default, y_default, var"#self#", a, b) +end + +# And two methods for the different numbers of default args +function f(a::A) + var"#self#"(a, b_default) +end + +function f() + var"#self#"(a_default, b_default) +end +``` + +In total, this expands a single "function definition" into seven methods. + +Note that the above is only a sketch! There's more fiddly details when `where` +syntax comes in + +### Desugaring of generated functions + +A brief description of how this works. Let's consider the generated function + +```julia +function gen(x::NTuple{N}, y) where {N,T} + shared = :shared + # Unnecessary use of @generated, but it shows what's going on. + if @generated + quote + maybe_gen = ($x, $N) + end + else + maybe_gen = (typeof(x), N) + end + (shared, maybe_gen) +end +``` + +This is desugared into the following two function definitions. First, a code +generator which will generate code for the body of the function, given the +static parameters `N`, `T` and the positional arguments `x`, `y`. +(`var"#self#"::Type{typeof(gen)}` is also provided by the Julia runtime to +complete the full signature of `gen`, though the user won't normally use this.) + +```julia +function var"#gen@generator#0"(__context__::JuilaSyntax.MacroContext, N, T, var"#self#", x, y) + gen_stuff = quote + maybe_gen = ($x, $N) + end + quote + shared = :shared + $gen_stuff + (shared, maybe_gen) + end +end +``` + +Second, the non-generated version, using the `if @generated` else branches, and +containing mostly normal code. + +```julia +function gen(x::NTuple{N}, y) where {N,T} + $(Expr(:meta, :generated, + Expr(:call, JuliaLowering.GeneratedFunctionStub, + :var"#gen@generator#0", sourceref_of_gen, + :(Core.svec(:var"#self", :x, :y)) + :(Core.svec(:N, :T))))) + shared = :shared + maybe_gen = (typeof(x), N) + (shared, maybe_gen) +end +``` + +The one extra thing added here is the `Expr(:meta, :generated)` which is an +expression creating a callable wrapper for the user's generator, to be +evaluated at top level. This wrapper will then be invoked by the runtime +whenever the user calls `gen` with a new signature and it's expected that a +`CodeInfo` be returned from it. `JuliaLowering.GeneratedFunctionStub` differs +from `Core.GeneratedFunctionStub` in that it contains extra provenance +information (the `sourcref_of_gen`) and expects a `SyntaxTree` to be returned +by the user's generator code. + +## Pass 3: Scope analysis / binding resolution + +This pass replaces variables with bindings of kind `K"BindingId"`, +disambiguating variables when the same name is used in different scopes. It +also fills in the list of non-global bindings within each lambda and metadata +about such bindings as will be used later during closure conversion. + +Scopes are documented in the Juila documentation on +[Scope of Variables](https://docs.julialang.org/en/v1/manual/variables-and-scoping/) + +During scope resolution, we maintain a stack of `ScopeInfo` data structures. + +When a new `lambda` or `scope_block` is discovered, we create a new `ScopeInfo` by +1. Find all identifiers bound or used within a scope. New *bindings* may be + introduced by one of the `local`, `global` keywords, implicitly by + assignment, as function arguments to a `lambda`, or as type arguments in a + method ("static parameters"). Identifiers are *used* when they are + referenced. +2. Infer which bindings are newly introduced local or global variables (and + thus require a distinct identity from names already in the stack) +3. Assign a `BindingId` (unique integer) to each new binding + +We then push this `ScopeInfo` onto the stack and traverse the expressions +within the scope translating each `K"Identifier"` into the associated +`K"BindingId"`. While we're doing this we also resolve some special forms like +`islocal` by making use of the scope stack. + +The detailed rules for whether assignment introduces a new variable depend on +the `scope_block`'s `scope_type` attribute when we are processing top-level +code. +* `scope_type == :hard` (as for bindings inside a `let` block) means an + assignment always introduces a new binding +* `scope_type == :neutral` - inherit soft or hard scope from the parent scope. +* `scope_type == :soft` - assignments are to globals if the variable + exists in global module scope. Soft scope doesn't have surface syntax and is + introduced for top-level code by REPL-like environments. + +## Pass 4: Closure conversion / lower bindings + +The main goal of this pass is closure conversion, but it's also used for +lowering typed bindings and global assignments. Roughly, this is passes 3 and 4 +in the original `julia-syntax.scm`. In JuliaLowering it also comes in two steps: + +The first step (part of `scope_resolution.jl`) is to compute metadata related +to bindings, both per-binding and per-binding-per-closure-scope. + +Properties which are computed per-binding which can help with symbolic +optimizations include: +* Type is declared (`x::T` syntax in a statement): type conversions must be + inserted at every assignment of `x`. +* Never undefined: value is always assigned to the binding before being read + hence this binding doesn't require the use of `Core.NewvarNode`. +* Single assignment: (TODO how is this defined, what is it for and does it go + here or below?) + +Properties of non-globals which are computed per-binding-per-closure include: +* Read: the value of the binding is used. +* Write: the binding is assigned to. +* Captured: Bindings defined outside the closure which are either Read or Write + within the closure are "captured" and need to be one of the closure's fields. +* Called: the binding is called as a function, ie, `x()`. (TODO - what is this + for?) + +The second step uses this metadata to +* Convert closures into `struct` types +* Lower bindings captured by closures into references to boxes as necessary +* Deal with typed bindings (`K"decl"`) and their assignments +* Lower const and non-const global assignments +* TODO: probably more here. + + +### Q&A + +#### When does `function` introduce a closure? + +Closures are just functions where the name of the function is *local* in scope. +How does the function name become a local? The `function` keyword acts like an +assignment to the function name for the purposes of scope resolution. Thus +`function f() body end` is rather like `f = ()->body` and may result in the +symbol `f` being either `local` or `global`. Like other assignments, `f` may be +declared global or local explicitly, but if not `f` is subject to the usual +rules for assignments inside scopes. For example, inside a `let` scope +`function f() ...` would result in the symbol `f` being local. + +Examples: + +```julia +begin + # f is global because `begin ... end` does not introduce a scope + function f() + body + end + + # g is a closure because `g` is explicitly declared local + local g + function g() + body + end +end + +let + # f is local so this is a closure because `let ... end` introduces a scope + function f() + body + end + + # g is not a closure because `g` is declared global + global g + function g() + body + end +end +``` + +#### How do captures work with non-closures? + +Yes it's true, you can capture local variables into global methods. For example: + +```julia +begin + local x = 1 + function f(y) + x + y + end + x = 2 +end +``` + +The way this works is to put `x` in a `Box` and interpolate it into the AST of +`f` (the `Box` can be eliminated in some cases, but not here). Essentially this +lowers to code which is almost-equivalent to the following: + +```julia +begin + local x = Core.Box(1) + @eval function f(y) + $(x.contents) + y + end + x.contents = 2 +end +``` + +#### How do captures work with closures with multiple methods? + +Sometimes you might want a closure with multiple methods, but those methods +might capture different local variables. For example, + +```julia +let + x = 1 + y = 1.5 + function f(xx::Int) + xx + x + end + function f(yy::Float64) + yy + y + end + + f(42) +end +``` + +In this case, the closure type must capture both `x` and `y` and the generated +code looks rather like this: + +```julia +struct TheClosureType + x + y +end + +let + x = 1 + y = 1.5 + f = TheClosureType(x,y) + function (self::TheClosureType)(xx::Int) + xx + self.x + end + function (self::TheClosureType)(yy::Int) + yy + self.y + end + + f(42) +end +``` + +#### When are `method` defs lifted to top level? + +Closure method definitions must be lifted to top level whenever the definitions +appear inside a function. This is allow efficient compilation and avoid world +age issues. + +Conversely, when method defs appear in top level code, they are executed +inline. + +## Pass 5: Convert to untyped IR + +This pass is implemented in `linear_ir.jl`. + +### Untyped IR (JuliaLowering form) + +JuliaLowering's untyped IR is very close to the runtime's `CodeInfo` form (see +below), but is more concretely typed as `JuliaLowering.SyntaxTree`. + +Metadata is generally represented differently: +* The statements retain full code provenance information as `SyntaxTree` + objects. See `kinds.jl` for a list of which `Kind`s occur in the output IR + but not in surface syntax. +* The list of slots is `Vector{Slot}`, including `@nospecialize` metadata + +### Lowering of exception handlers + +Exception handling involves a careful interplay between lowering and the Julia +runtime. The forms `enter`, `leave` and `pop_exception` dynamically modify the +exception-related state on the `Task`; lowering and the runtime work together +to maintain correct invariants for this state. + +Lowering of exception handling must ensure that + +* Each `enter` is matched with a `leave` on every possible non-exceptional + program path (including implicit returns generated in tail position). +* Each `catch` block which is entered and handles the exception - by exiting + via a non-exceptional program path - is matched with a `pop_exception` +* Each `finally` block runs, regardless of the way it's entered - either by + normal program flow, an exception, early `return` or a jump out of an inner + context via `break`/`continue`/`goto` etc. + +The following special forms are emitted into the IR: + +* `(= tok (enter catch_label dynscope))` - + push exception handler with catch block at `catch_label` and dynamic + scope `dynscope`, yielding a token which is used by `leave` and + `pop_exception`. `dynscope` is only used in the special `tryfinally` form + without associated source level syntax (see the `@with` macro) +* `(leave tok)` - + pop exception handler back to the state of the `tok` from the associated + `enter`. Multiple tokens can be supplied to pop multiple handlers using + `(leave tok1 tok2 ...)`. +* `(pop_exception tok)` - pop exception stack back to state of associated enter + +When an `enter` is encountered, the runtime pushes a new handler onto the +`Task`'s exception handler stack which will jump to `catch_label` when an +exception occurs. + +There are two ways that the exception-related task state can be restored + +1. By encountering a `leave` which will restore the handler state with `tok`. +2. By throwing an exception. In this case the runtime will pop one handler + automatically and jump to the catch label with the new exception pushed + onto the exception stack. On this path the exception stack state must be + restored back to the associated `enter` by encountering `pop_exception`. + +Note that the handler and exception stack represent two distinct types of +exception-related state restoration which need to happen. Note also that the +"handler state restoration" actually includes several pieces of runtime state +including GC flags - see `jl_eh_restore_state` in the runtime for that. + +#### Lowering finally code paths + +When lowering `finally` blocks we want to emit the user's finally code once but +multiple code paths may traverse the finally block. For example, consider the +code + +```julia +function foo(x) + while true + try + if x == 1 + return f(x) + elseif x == 2 + g(x) + continue + else + break + end + finally + h() + end + end +end +``` + +In this situation there's four distinct code paths through the finally block: +1. `return f(x)` needs to call `val = f(x)`, leave the `try` block, run `h()` then + return `val`. +2. `continue` needs to call `h()` then jump to the start of the while loop +3. `break` needs to call `h()` then jump to the exit of the while loop +4. If an exception occurs in `f(x)` or `g(x)`, we need to call `h()` before + falling back into the while loop. + +To deal with these we create a `finally_tag` variable to dynamically track +which action to take after the finally block exits. Before jumping to the block +we set this variable to a unique integer tag identifying the incoming code +path. At the exit of the user's code (`h()` in this case) we perform the jump +appropriate to the `break`, `continue` or `return` as necessary based on the tag. + +(TODO - these are the only four cases which can occur, but, for example, +multiple `return`s create multiple tags rather than assigning to a single +variable. Collapsing these into a single case might be worth considering? But +also might be worse for type inference in some cases?) + +## Pass 6: Convert IR to `CodeInfo` representation + +This pass convert's JuliaLowering's internal representation of untyped IR into +a form the Julia runtime understands. This is a necessary decoupling which +separates the development of JuliaLowering.jl from the evolution of the Julia +runtime itself. + +### Untyped IR (`CodeInfo` form) + +The final lowered IR is expressed as `CodeInfo` objects which are a sequence of +`code` statements containing +* Literals +* Restricted forms of `Expr` (with semantics different from surface syntax, + even for the same `head`! for example the arguments to `Expr(:call)` in IR + must be "simple" and aren't evaluated in order) +* `Core.SlotNumber` +* Other special forms from `Core` like `Core.ReturnNode`, `Core.EnterNode`, etc. +* `Core.SSAValue`, indexing any value generated from a statement in the `code` + array. +* Etc (todo) + +The IR obeys certain invariants which are checked by the downstream code in +base/compiler/validation.jl. + +See also https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form + +CodeInfo layout (as of early 1.12-DEV): + +```julia +mutable struct CodeInfo + code::Vector{Any} # IR statements + codelocs::Vector{Int32} # `length(code)` Vector of indices into `linetable` + ssavaluetypes::Any # `length(code)` or Vector of inferred types after opt + ssaflags::Vector{UInt32} # flag for every statement in `code` + # 0 if meta statement + # inbounds_flag - 1 bit (LSB) + # inline_flag - 1 bit + # noinline_flag - 1 bit + # ... other 8 flags which are defined in compiler/optimize.jl + # effects_flags - 9 bits + method_for_inference_limit_heuristics::Any + linetable::Any + slotnames::Vector{Symbol} # names of parameters and local vars used in the code + slotflags::Vector{UInt8} # vinfo flags from flisp + slottypes::Any # nothing (used by typeinf) + rettype::Any # Any (used by typeinf) + parent::Any # nothing (used by typeinf) + edges::Any + min_world::UInt64 + max_world::UInt64 + inferred::Bool + propagate_inbounds::Bool + has_fcall::Bool + nospecializeinfer::Bool + inlining::UInt8 + constprop::UInt8 + purity::UInt16 + inlining_cost::UInt16 +end +``` + +## Notes on toplevel-only forms and eval-related functions + +In the current Julia runtime, + +`Base.eval()` +- Uses `jl_toplevel_eval_in` which calls `jl_toplevel_eval_flex` + +`jl_toplevel_eval_flex(mod, ex)` +- Lowers if necessary +- Evaluates certain blessed top level forms + * `:.` + * `:module` + * `:using` + * `:import` + * `:public` + * `:export` + * `:toplevel` + * `:error` + * `:incomplete` + * Identifier and literals +- Otherwise expects `Expr(:thunk)` + * Use codegen "where necessary/profitable" (eg ccall, has_loops etc) + * Otherwise interpret via `jl_interpret_toplevel_thunk` + +Should we lower the above blessed top level forms to julia runtime calls? +Pros: +- Semantically sound. Lowering should do syntax checking in things like + `Expr(:using)` rather than doing this in the runtime support functions. +- Precise lowering error messages +- Replaces more Expr usage +- Replaces a whole pile of C code with significantly less Julia code +- Lowering output becomes more consistently imperative +Cons: +- Lots more code to write +- May need to invent intermediate data structures to replace `Expr` +- Bootstrap? +- Some forms require creating toplevel thunks + +In general, we'd be replacing current *declarative* lowering targets like +`Expr(:using)` with an *imperative* call to a `Core` API instead. The call and +the setup of its arguments would need to go in a thunk. We've currently got an +odd mixture of imperative and declarative lowered code. + +## Bugs in Julia's lowering + +Subset of bugs which exist in upstream in flisp implementation, but which are fixed here +* `f()[begin]` has the side effect `f()` twice. +* `a[(begin=1; a=2)]` gives a weird error +* `function A.ccall() ; end` allows `ccall` as a name but it's not allowed without the `A.` +* `a .< b .< c` expands to `(a .< b) .& (b .< c)` where the scope of the `&` is + the expansion module but should be `top.&` to avoid scope-dependence + (especially in the presence of macros) + +## Notes on Racket's hygiene + +People look at [Racket](https://racket-lang.org/) as an example of a very +complete system of hygienic macros. We should learn from them, but keeping in +mind that Racket's macro system is inherently more complicated. Racket's +current approach to hygiene is described in an [accessible talk](https://www.youtube.com/watch?v=Or_yKiI3Ha4) +and in more depth in [a paper](https://www-old.cs.utah.edu/plt/publications/popl16-f.pdf). + +Some differences which makes Racket's macro expander different from Julia: + +* Racket allows *local* definitions of macros. Macro code can be embedded in an + inner lexical scope and capture locals from that scope, but still needs to be + executed at compile time. Julia supports macros at top level scope only. +* Racket goes to great lengths to execute the minimal package code necessary to + expand macros; the "pass system". Julia just executes all top level + statements in order when precompiling a package. +* As a lisp, Racket's surface syntax is dramatically simpler and more uniform diff --git a/JuliaLowering/src/JuliaLowering.jl b/JuliaLowering/src/JuliaLowering.jl new file mode 100644 index 0000000000000..d8ff05dd013e8 --- /dev/null +++ b/JuliaLowering/src/JuliaLowering.jl @@ -0,0 +1,47 @@ +# Use a baremodule because we're implementing `include` and `eval` +baremodule JuliaLowering + +using Base +# We define a separate _include() for use in this module to avoid mixing method +# tables with the public `JuliaLowering.include()` API +const _include = Base.IncludeInto(JuliaLowering) + +if parentmodule(JuliaLowering) === Base + using Base.JuliaSyntax +else + using JuliaSyntax +end + +using .JuliaSyntax: highlight, Kind, @KSet_str, is_leaf, children, numchildren, + head, kind, flags, has_flags, numeric_flags, filename, first_byte, + last_byte, byte_range, sourcefile, source_location, span, sourcetext, + is_literal, is_number, is_operator, is_prec_assignment, is_prefix_call, + is_infix_op_call, is_postfix_op_call, is_error + +_include("kinds.jl") +_register_kinds() + +_include("syntax_graph.jl") +_include("ast.jl") +_include("bindings.jl") +_include("utils.jl") + +_include("macro_expansion.jl") +_include("desugaring.jl") +_include("scope_analysis.jl") +_include("closure_conversion.jl") +_include("linear_ir.jl") +_include("runtime.jl") +_include("syntax_macros.jl") + +_include("eval.jl") +_include("compat.jl") +_include("hooks.jl") + +function __init__() + _register_kinds() +end + +_include("precompile.jl") + +end diff --git a/JuliaLowering/src/ast.jl b/JuliaLowering/src/ast.jl new file mode 100644 index 0000000000000..93f5a7c13f5c3 --- /dev/null +++ b/JuliaLowering/src/ast.jl @@ -0,0 +1,748 @@ +#------------------------------------------------------------------------------- +# @chk: Basic AST structure checking tool +# +# Check a condition involving an expression, throwing a LoweringError if it +# doesn't evaluate to true. Does some very simple pattern matching to attempt +# to extract the expression variable from the left hand side. +# +# Forms: +# @chk pred(ex) +# @chk pred(ex) msg +# @chk pred(ex) (msg_display_ex, msg) +macro chk(cond, msg=nothing) + if Meta.isexpr(msg, :tuple) + ex = msg.args[1] + msg = msg.args[2] + else + ex = cond + while true + if ex isa Symbol + break + elseif ex.head == :call + ex = ex.args[2] + elseif ex.head == :ref + ex = ex.args[1] + elseif ex.head == :. + ex = ex.args[1] + elseif ex.head in (:(==), :(in), :<, :>) + ex = ex.args[1] + else + error("Can't analyze $cond") + end + end + end + quote + ex = $(esc(ex)) + @assert ex isa SyntaxTree + ok = try + $(esc(cond)) + catch + false + end + if !ok + throw(LoweringError(ex, $(isnothing(msg) ? "expected `$cond`" : esc(msg)))) + end + end +end + +#------------------------------------------------------------------------------- +abstract type AbstractLoweringContext end + +""" +Bindings for the current lambda being processed. + +Lowering passes prior to scope resolution return `nothing` and bindings are +collected later. +""" +current_lambda_bindings(ctx::AbstractLoweringContext) = nothing + +function syntax_graph(ctx::AbstractLoweringContext) + ctx.graph +end + +""" +Unique symbolic identity for a variable, constant, label, or other entity +""" +const IdTag = Int + +""" +Id for scope layers in macro expansion +""" +const LayerId = Int + +""" +A `ScopeLayer` is a mechanism for automatic hygienic macros; every identifier +is assigned to a particular layer and can only match against bindings which are +themselves part of that layer. + +Normal code contains a single scope layer, whereas each macro expansion +generates a new layer. +""" +struct ScopeLayer + id::LayerId + mod::Module + parent_layer::LayerId # Index of parent layer in a macro expansion. Equal to 0 for no parent + is_macro_expansion::Bool # FIXME +end + +#------------------------------------------------------------------------------- +# AST creation utilities +_node_id(graph::SyntaxGraph, ex::SyntaxTree) = (check_compatible_graph(graph, ex); ex._id) +function _node_id(graph::SyntaxGraph, ex) + # Fallback to give a comprehensible error message for use with the @ast macro + error("Attempt to use `$(repr(ex))` of type `$(typeof(ex))` as an AST node. Try annotating with `::K\"your_intended_kind\"?`") +end +function _node_id(graph::SyntaxGraph, ex::AbstractVector{<:SyntaxTree}) + # Fallback to give a comprehensible error message for use with the @ast macro + error("Attempt to use vector as an AST node. Did you mean to splat this? (content: `$(repr(ex))`)") +end + +_node_ids(graph::SyntaxGraph) = () +_node_ids(graph::SyntaxGraph, ::Nothing, cs...) = _node_ids(graph, cs...) +_node_ids(graph::SyntaxGraph, c, cs...) = (_node_id(graph, c), _node_ids(graph, cs...)...) +_node_ids(graph::SyntaxGraph, cs::SyntaxList, cs1...) = (_node_ids(graph, cs...)..., _node_ids(graph, cs1...)...) +function _node_ids(graph::SyntaxGraph, cs::SyntaxList) + check_compatible_graph(graph, cs) + cs.ids +end + +_unpack_srcref(graph, srcref::SyntaxTree) = _node_id(graph, srcref) +_unpack_srcref(graph, srcref::Tuple) = _node_ids(graph, srcref...) +_unpack_srcref(graph, srcref) = srcref + +function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val) + push!(ids, _node_id(graph, val)) +end +function _push_nodeid!(graph::SyntaxGraph, ids::Vector{NodeId}, val::Nothing) + nothing +end +function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals) + for v in vals + _push_nodeid!(graph, ids, v) + end +end +function _append_nodeids!(graph::SyntaxGraph, ids::Vector{NodeId}, vals::SyntaxList) + check_compatible_graph(graph, vals) + append!(ids, vals.ids) +end + +function makeleaf(graph::SyntaxGraph, srcref, proto; attrs...) + id = newnode!(graph) + ex = SyntaxTree(graph, id) + copy_attrs!(ex, proto, true) + setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...) + return ex +end + +function _makenode(graph::SyntaxGraph, srcref, proto, children; attrs...) + id = newnode!(graph) + setchildren!(graph, id, children) + ex = SyntaxTree(graph, id) + copy_attrs!(ex, proto, true) + setattr!(graph, id; source=_unpack_srcref(graph, srcref), attrs...) + return SyntaxTree(graph, id) +end +function _makenode(ctx, srcref, proto, children; attrs...) + _makenode(syntax_graph(ctx), srcref, proto, children; attrs...) +end + +function makenode(ctx, srcref, proto, children...; attrs...) + _makenode(ctx, srcref, proto, _node_ids(syntax_graph(ctx), children...); attrs...) +end + +function makeleaf(ctx, srcref, proto; kws...) + makeleaf(syntax_graph(ctx), srcref, proto; kws...) +end + +function makeleaf(ctx, srcref, k::Kind, value; kws...) + graph = syntax_graph(ctx) + if k == K"Identifier" || k == K"core" || k == K"top" || k == K"Symbol" || + k == K"globalref" || k == K"Placeholder" || + k == K"StrMacroName" || k == K"CmdMacroName" + makeleaf(graph, srcref, k; name_val=value, kws...) + elseif k == K"BindingId" + makeleaf(graph, srcref, k; var_id=value, kws...) + elseif k == K"label" + makeleaf(graph, srcref, k; id=value, kws...) + elseif k == K"symbolic_label" + makeleaf(graph, srcref, k; name_val=value, kws...) + elseif k in KSet"TOMBSTONE SourceLocation latestworld latestworld_if_toplevel" + makeleaf(graph, srcref, k; kws...) + else + val = k == K"Integer" ? convert(Int, value) : + k == K"Float" ? convert(Float64, value) : + k == K"String" ? convert(String, value) : + k == K"Char" ? convert(Char, value) : + k == K"Value" ? value : + k == K"Bool" ? value : + error("Unexpected leaf kind `$k`") + makeleaf(graph, srcref, k; value=val, kws...) + end +end + +# TODO: Replace this with makeleaf variant? +function mapleaf(ctx, src, kind) + ex = makeleaf(syntax_graph(ctx), src, kind) + # TODO: Value coercion might be broken here due to use of `name_val` vs + # `value` vs ... ? + copy_attrs!(ex, src) + ex +end + +# Convenience functions to create leaf nodes referring to identifiers within +# the Core and Top modules. +core_ref(ctx, ex, name) = makeleaf(ctx, ex, K"core", name) +svec_type(ctx, ex) = core_ref(ctx, ex, "svec") +nothing_(ctx, ex) = core_ref(ctx, ex, "nothing") + +top_ref(ctx, ex, name) = makeleaf(ctx, ex, K"top", name) + +# Assign `ex` to an SSA variable. +# Return (variable, assignment_node) +function assign_tmp(ctx::AbstractLoweringContext, ex, name="tmp") + var = ssavar(ctx, ex, name) + assign_var = makenode(ctx, ex, K"=", var, ex) + var, assign_var +end + +function emit_assign_tmp(stmts::SyntaxList, ctx, ex, name="tmp") + if is_ssa(ctx, ex) + return ex + end + var = ssavar(ctx, ex, name) + push!(stmts, makenode(ctx, ex, K"=", var, ex)) + var +end + +#------------------------------------------------------------------------------- +# @ast macro +function _match_srcref(ex) + if Meta.isexpr(ex, :macrocall) && ex.args[1] == Symbol("@HERE") + QuoteNode(ex.args[2]) + else + esc(ex) + end +end + +function _match_kind(f::Function, srcref, ex) + kws = [] + if Meta.isexpr(ex, :call) + kind = esc(ex.args[1]) + args = ex.args[2:end] + if Meta.isexpr(args[1], :parameters) + kws = map(esc, args[1].args) + popfirst!(args) + end + while length(args) >= 1 && Meta.isexpr(args[end], :kw) + pushfirst!(kws, esc(pop!(args))) + end + if length(args) == 1 + srcref_tmp = gensym("srcref") + return quote + $srcref_tmp = $(_match_srcref(args[1])) + $(f(kind, srcref_tmp, kws)) + end + elseif length(args) > 1 + error("Unexpected: extra srcref argument in `$ex`?") + end + else + kind = esc(ex) + end + f(kind, srcref, kws) +end + +function _expand_ast_tree(ctx, srcref, tree) + if Meta.isexpr(tree, :(::)) + # Leaf node + if length(tree.args) == 2 + val = esc(tree.args[1]) + kindspec = tree.args[2] + else + val = nothing + kindspec = tree.args[1] + end + _match_kind(srcref, kindspec) do kind, srcref, kws + :(makeleaf($ctx, $srcref, $kind, $(val), $(kws...))) + end + elseif Meta.isexpr(tree, :call) && tree.args[1] === :(=>) + # Leaf node with copied attributes + kind = esc(tree.args[3]) + srcref = esc(tree.args[2]) + :(mapleaf($ctx, $srcref, $kind)) + elseif Meta.isexpr(tree, (:vcat, :hcat, :vect)) + # Interior node + flatargs = [] + for a in tree.args + if Meta.isexpr(a, :row) + append!(flatargs, a.args) + else + push!(flatargs, a) + end + end + children_ex = :(let child_ids = Vector{NodeId}(), graph = syntax_graph($ctx) + end) + child_stmts = children_ex.args[2].args + for a in flatargs[2:end] + child = _expand_ast_tree(ctx, srcref, a) + if Meta.isexpr(child, :(...)) + push!(child_stmts, :(_append_nodeids!(graph, child_ids, $(child.args[1])))) + else + push!(child_stmts, :(_push_nodeid!(graph, child_ids, $child))) + end + end + push!(child_stmts, :(child_ids)) + _match_kind(srcref, flatargs[1]) do kind, srcref, kws + :(_makenode($ctx, $srcref, $kind, $children_ex; $(kws...))) + end + elseif Meta.isexpr(tree, :(:=)) + lhs = tree.args[1] + rhs = _expand_ast_tree(ctx, srcref, tree.args[2]) + ssadef = gensym("ssadef") + quote + ($(esc(lhs)), $ssadef) = assign_tmp($ctx, $rhs, $(string(lhs))) + $ssadef + end + elseif Meta.isexpr(tree, :macrocall) + esc(tree) + elseif tree isa Expr + Expr(tree.head, map(a->_expand_ast_tree(ctx, srcref, a), tree.args)...) + else + esc(tree) + end +end + +""" + @ast ctx srcref tree + +Syntactic s-expression shorthand for constructing a `SyntaxTree` AST. + +* `ctx` - SyntaxGraph context +* `srcref` - Reference to the source code from which this AST was derived. + +The `tree` contains syntax of the following forms: +* `[kind child₁ child₂]` - construct an interior node with children +* `value :: kind` - construct a leaf node +* `ex => kind` - convert a leaf node to the given `kind`, copying attributes + from it and also using `ex` as the source reference. +* `var := ex` - Set `var=ssavar(...)` and return an assignment node `\$var=ex`. + `var` may be used outside `@ast` +* `cond ? ex1 : ex2` - Conditional; `ex1` and `ex2` will be recursively expanded. + `if ... end` and `if ... else ... end` also work with this. + +Any `kind` can be replaced with an expression of the form +* `kind(srcref)` - override the source reference for this node and its children +* `kind(attr=val)` - set an additional attribute +* `kind(srcref; attr₁=val₁, attr₂=val₂)` - the general form + +In any place `srcref` is used, the special form `@HERE()` can be used to instead +to indicate that the "primary" location of the source is the location where +`@HERE` occurs. + + +# Examples + +``` +@ast ctx srcref [ + K"toplevel" + [K"using" + [K"importpath" + "Base" ::K"Identifier"(src) + ] + ] + [K"function" + [K"call" + "eval" ::K"Identifier" + "x" ::K"Identifier" + ] + [K"call" + "eval" ::K"core" + mn =>K"Identifier" + "x" ::K"Identifier" + ] + ] +] +``` +""" +macro ast(ctx, srcref, tree) + quote + ctx = $(esc(ctx)) + srcref = $(_match_srcref(srcref)) + $(_expand_ast_tree(:ctx, :srcref, tree)) + end +end + +#------------------------------------------------------------------------------- +# Mapping and copying of AST nodes +function copy_attrs!(dest, src, all=false) + # TODO: Make this faster? + for (name, attr) in pairs(src._graph.attributes) + if (all || (name !== :source && name !== :kind && name !== :syntax_flags)) && + haskey(attr, src._id) + dest_attr = getattr(dest._graph, name, nothing) + if !isnothing(dest_attr) + dest_attr[dest._id] = attr[src._id] + end + end + end +end + +function copy_attrs!(dest, head::Union{Kind,JuliaSyntax.SyntaxHead}, all=false) + if all + sethead!(dest._graph, dest._id, head) + end +end + +function mapchildren(f::Function, ctx, ex::SyntaxTree, do_map_child::Function; + extra_attrs...) + if is_leaf(ex) + return ex + end + orig_children = children(ex) + cs = isempty(extra_attrs) ? nothing : SyntaxList(ctx) + for (i,e) in enumerate(orig_children) + newchild = do_map_child(i) ? f(e) : e + if isnothing(cs) + if newchild == e + continue + else + cs = SyntaxList(ctx) + append!(cs, orig_children[1:i-1]) + end + end + push!(cs::SyntaxList, newchild) + end + if isnothing(cs) + # This function should be allocation-free if no children were changed + # by the mapping and there's no extra_attrs + return ex + end + cs::SyntaxList + ex2 = makenode(ctx, ex, head(ex), cs) + copy_attrs!(ex2, ex) + setattr!(ex2; extra_attrs...) + return ex2 +end + +function mapchildren(f::Function, ctx, ex::SyntaxTree, mapped_children::AbstractVector{<:Integer}; + extra_attrs...) + j = Ref(firstindex(mapped_children)) + function do_map_child(i) + ind = j[] + if ind <= lastindex(mapped_children) && mapped_children[ind] == i + j[] += 1 + true + else + false + end + end + mapchildren(f, ctx, ex, do_map_child; extra_attrs...) +end + +function mapchildren(f::Function, ctx, ex::SyntaxTree; extra_attrs...) + mapchildren(f, ctx, ex, i->true; extra_attrs...) +end + + +""" +Recursively copy AST `ex` into `ctx`. + +Special provenance handling: If `copy_source` is true, treat the `.source` +attribute as a reference and recurse on its contents. Otherwise, treat it like +any other attribute. +""" +function copy_ast(ctx, ex::SyntaxTree; copy_source=true) + graph1 = syntax_graph(ex) + graph2 = syntax_graph(ctx) + !copy_source && check_same_graph(graph1, graph2) + id2 = _copy_ast(graph2, graph1, ex._id, Dict{NodeId, NodeId}(), copy_source) + return SyntaxTree(graph2, id2) +end + +function _copy_ast(graph2::SyntaxGraph, graph1::SyntaxGraph, + id1::NodeId, seen, copy_source) + let copied = get(seen, id1, nothing) + isnothing(copied) || return copied + end + id2 = newnode!(graph2) + seen[id1] = id2 + src1 = get(SyntaxTree(graph1, id1), :source, nothing) + src2 = if !copy_source + src1 + elseif src1 isa NodeId + _copy_ast(graph2, graph1, src1, seen, copy_source) + elseif src1 isa Tuple + map(i->_copy_ast(graph2, graph1, i, seen, copy_source), src1) + else + src1 + end + copy_attrs!(SyntaxTree(graph2, id2), SyntaxTree(graph1, id1), true) + setattr!(graph2, id2; source=src2) + if !is_leaf(graph1, id1) + cs = NodeId[] + for cid in children(graph1, id1) + push!(cs, _copy_ast(graph2, graph1, cid, seen, copy_source)) + end + setchildren!(graph2, id2, cs) + end + return id2 +end + +#------------------------------------------------------------------------------- +function set_scope_layer(ctx, ex, layer_id, force) + k = kind(ex) + scope_layer = force ? layer_id : get(ex, :scope_layer, layer_id) + if k == K"module" || k == K"toplevel" || k == K"inert" + makenode(ctx, ex, ex, children(ex); + scope_layer=scope_layer) + elseif k == K"." + makenode(ctx, ex, ex, set_scope_layer(ctx, ex[1], layer_id, force), ex[2], + scope_layer=scope_layer) + elseif !is_leaf(ex) + mapchildren(e->set_scope_layer(ctx, e, layer_id, force), ctx, ex; + scope_layer=scope_layer) + else + makeleaf(ctx, ex, ex; + scope_layer=scope_layer) + end +end + +""" + adopt_scope(ex, ref) + +Copy `ex`, adopting the scope layer of `ref`. +""" +function adopt_scope(ex::SyntaxTree, scope_layer::LayerId) + set_scope_layer(ex, ex, scope_layer, true) +end + +function adopt_scope(ex::SyntaxTree, layer::ScopeLayer) + adopt_scope(ex, layer.id) +end + +function adopt_scope(ex::SyntaxTree, ref::SyntaxTree) + adopt_scope(ex, ref.scope_layer) +end + +function adopt_scope(exs::SyntaxList, ref) + out = SyntaxList(syntax_graph(exs)) + for e in exs + push!(out, adopt_scope(e, ref)) + end + return out +end + +# Type for `meta` attribute, to replace `Expr(:meta)`. +# It's unclear how much flexibility we need here - is a dict good, or could we +# just use a struct? Likely this will be sparse. Alternatively we could just +# use individual attributes but those aren't easy to add on an ad-hoc basis in +# the middle of a pass. +const CompileHints = Base.ImmutableDict{Symbol,Any} + +function CompileHints(d::Dict{Symbol, Any}) + id = CompileHints() + for (k, v) in d + id = CompileHints(id, k, v) + end + id +end + +function setmeta!(ex::SyntaxTree; kws...) + @assert length(kws) == 1 # todo relax later ? + key = first(keys(kws)) + value = first(values(kws)) + meta = begin + m = get(ex, :meta, nothing) + isnothing(m) ? CompileHints(key, value) : CompileHints(m, key, value) + end + setattr!(ex; meta=meta) + ex +end + +setmeta(ex::SyntaxTree; kws...) = setmeta!(copy_node(ex); kws...) + +function getmeta(ex::SyntaxTree, name::Symbol, default) + meta = get(ex, :meta, nothing) + isnothing(meta) ? default : get(meta, name, default) +end + +name_hint(name) = CompileHints(:name_hint, name) + +#------------------------------------------------------------------------------- +# Predicates and accessors working on expression trees + +# For historical reasons, `cglobal` and `ccall` are their own special +# quasi-identifier-like syntax but with special handling inside lowering which +# means they can't be used as normal identifiers. +function is_ccall_or_cglobal(name::AbstractString) + return name == "ccall" || name == "cglobal" +end + +function is_quoted(ex) + kind(ex) in KSet"Symbol quote top core globalref break inert + meta inbounds inline noinline loopinfo" +end + +function extension_type(ex) + @assert kind(ex) == K"extension" || kind(ex) == K"assert" + @chk numchildren(ex) >= 1 + @chk kind(ex[1]) == K"Symbol" + ex[1].name_val +end + +function is_sym_decl(x) + k = kind(x) + k == K"Identifier" || k == K"::" +end + +function is_eventually_call(ex::SyntaxTree) + k = kind(ex) + return k == K"call" || ((k == K"where" || k == K"::") && is_eventually_call(ex[1])) +end + +function find_parameters_ind(exs) + i = length(exs) + while i >= 1 + k = kind(exs[i]) + if k == K"parameters" + return i + elseif k != K"do" + break + end + i -= 1 + end + return 0 +end + +function has_parameters(ex::SyntaxTree) + find_parameters_ind(children(ex)) != 0 +end + +function has_parameters(args::AbstractVector) + find_parameters_ind(args) != 0 +end + +function any_assignment(exs) + any(kind(e) == K"=" for e in exs) +end + +function is_valid_modref(ex) + return kind(ex) == K"." && kind(ex[2]) == K"Symbol" && + (kind(ex[1]) == K"Identifier" || is_valid_modref(ex[1])) +end + +function is_core_ref(ex, name) + kind(ex) == K"core" && ex.name_val == name +end + +function is_core_nothing(ex) + is_core_ref(ex, "nothing") +end + +function is_core_Any(ex) + is_core_ref(ex, "Any") +end + +function is_simple_atom(ctx, ex) + k = kind(ex) + # TODO thismodule + is_literal(k) || k == K"Symbol" || k == K"Value" || is_ssa(ctx, ex) || is_core_nothing(ex) +end + +function is_identifier_like(ex) + k = kind(ex) + k == K"Identifier" || k == K"BindingId" || k == K"Placeholder" +end + +function decl_var(ex) + kind(ex) == K"::" ? ex[1] : ex +end + +# Given the signature of a `function`, return the symbol that will ultimately +# be assigned to in local/global scope, if any. +function assigned_function_name(ex) + while kind(ex) == K"where" + # f() where T + ex = ex[1] + end + if kind(ex) == K"::" && numchildren(ex) == 2 + # f()::T + ex = ex[1] + end + if kind(ex) != K"call" + throw(LoweringError(ex, "Expected call syntax in function signature")) + end + ex = ex[1] + if kind(ex) == K"curly" + # f{T}() + ex = ex[1] + end + if kind(ex) == K"::" || kind(ex) == K"." + # (obj::CallableType)(args) + # A.b.c(args) + nothing + elseif is_identifier_like(ex) + ex + else + throw(LoweringError(ex, "Unexpected name in function signature")) + end +end + +# Remove empty parameters block, eg, in the arg list of `f(x, y;)` +function remove_empty_parameters(args) + i = length(args) + while i > 0 && kind(args[i]) == K"parameters" && numchildren(args[i]) == 0 + i -= 1 + end + args[1:i] +end + +function to_symbol(ctx, ex) + @ast ctx ex ex=>K"Symbol" +end + +function new_scope_layer(ctx, mod_ref::Module=ctx.mod) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ctx.mod, 0, false) + push!(ctx.scope_layers, new_layer) + new_layer.id +end + +function new_scope_layer(ctx, mod_ref::SyntaxTree) + @assert kind(mod_ref) == K"Identifier" + new_scope_layer(ctx, ctx.scope_layers[mod_ref.scope_layer].mod) +end + +#------------------------------------------------------------------------------- +# Context wrapper which helps to construct a list of statements to be executed +# prior to some expression. Useful when we need to use subexpressions multiple +# times. +struct StatementListCtx{Ctx, GraphType} <: AbstractLoweringContext + ctx::Ctx + stmts::SyntaxList{GraphType} +end + +function Base.getproperty(ctx::StatementListCtx, field::Symbol) + if field === :ctx + getfield(ctx, :ctx) + elseif field === :stmts + getfield(ctx, :stmts) + else + getproperty(getfield(ctx, :ctx), field) + end +end + +function emit(ctx::StatementListCtx, ex) + push!(ctx.stmts, ex) +end + +function emit_assign_tmp(ctx::StatementListCtx, ex, name="tmp") + emit_assign_tmp(ctx.stmts, ctx.ctx, ex, name) +end + +with_stmts(ctx, stmts) = StatementListCtx(ctx, stmts) +with_stmts(ctx::StatementListCtx, stmts) = StatementListCtx(ctx.ctx, stmts) + +function with_stmts(ctx) + StatementListCtx(ctx, SyntaxList(ctx)) +end + +with_stmts(ctx::StatementListCtx) = StatementListCtx(ctx.ctx) diff --git a/JuliaLowering/src/bindings.jl b/JuliaLowering/src/bindings.jl new file mode 100644 index 0000000000000..286e67ecbeb14 --- /dev/null +++ b/JuliaLowering/src/bindings.jl @@ -0,0 +1,249 @@ +""" +Metadata about a binding +""" +struct BindingInfo + id::IdTag # Unique integer identifying this binding + name::String + kind::Symbol # :local :global :argument :static_parameter + node_id::Int # ID of associated K"BindingId" node in the syntax graph + mod::Union{Nothing,Module} # Set when `kind === :global` + type::Union{Nothing,SyntaxTree} # Type, for bindings declared like x::T = 10 + n_assigned::Int32 # Number of times variable is assigned to + is_const::Bool # Constant, cannot be reassigned + is_ssa::Bool # Single assignment, defined before use + is_captured::Bool # Variable is captured by some lambda + is_always_defined::Bool # A local that we know has an assignment that dominates all usages (is never undef) + is_internal::Bool # True for internal bindings generated by the compiler + is_ambiguous_local::Bool # Local, but would be global in soft scope (ie, the REPL) + is_nospecialize::Bool # @nospecialize on this argument (only valid for kind == :argument) +end + +function BindingInfo(id::IdTag, name::AbstractString, kind::Symbol, node_id::Integer; + mod::Union{Nothing,Module} = nothing, + type::Union{Nothing,SyntaxTree} = nothing, + n_assigned::Integer = 0, + is_const::Bool = false, + is_ssa::Bool = false, + is_captured::Bool = false, + is_always_defined::Bool = is_ssa, + is_internal::Bool = false, + is_ambiguous_local::Bool = false, + is_nospecialize::Bool = false) + BindingInfo(id, name, kind, node_id, mod, type, n_assigned, is_const, + is_ssa, is_captured, is_always_defined, + is_internal, is_ambiguous_local, is_nospecialize) +end + +function Base.show(io::IO, binfo::BindingInfo) + print(io, "BindingInfo(", binfo.id, ", ", + repr(binfo.name), ", ", + repr(binfo.kind), ", ", + binfo.node_id) + if !isnothing(binfo.mod) + print(io, ", mod=", binfo.mod) + end + if !isnothing(binfo.type) + print(io, ", type=", binfo.type) + end + if binfo.n_assigned != 0 + print(io, ", n_assigned=", binfo.n_assigned) + end + if binfo.is_const + print(io, ", is_const=", binfo.is_const) + end + if binfo.is_ssa + print(io, ", is_ssa=", binfo.is_ssa) + end + if binfo.is_captured + print(io, ", is_captured=", binfo.is_captured) + end + if binfo.is_always_defined != binfo.is_ssa + print(io, ", is_always_defined=", binfo.is_always_defined) + end + if binfo.is_internal + print(io, ", is_internal=", binfo.is_internal) + end + if binfo.is_ambiguous_local + print(io, ", is_ambiguous_local=", binfo.is_ambiguous_local) + end + if binfo.is_nospecialize + print(io, ", is_nospecialize=", binfo.is_nospecialize) + end + print(io, ")") +end + +""" +Metadata about "entities" (variables, constants, etc) in the program. Each +entity is associated to a unique integer id, the BindingId. A binding will be +inferred for each *name* in the user's source program by symbolic analysis of +the source. + +However, bindings can also be introduced programmatically during lowering or +macro expansion: the primary key for bindings is the `BindingId` integer, not +a name. +""" +struct Bindings + info::Vector{BindingInfo} +end + +Bindings() = Bindings(Vector{BindingInfo}()) + +next_binding_id(bindings::Bindings) = length(bindings.info) + 1 + +function add_binding(bindings::Bindings, binding) + if next_binding_id(bindings) != binding.id + error("Use next_binding_id() to create a valid binding id") + end + push!(bindings.info, binding) +end + +function _binding_id(id::Integer) + id +end + +function _binding_id(ex::SyntaxTree) + @chk kind(ex) == K"BindingId" + ex.var_id +end + +function update_binding!(bindings::Bindings, x; + type=nothing, is_const=nothing, add_assigned=0, + is_always_defined=nothing, is_captured=nothing) + id = _binding_id(x) + b = lookup_binding(bindings, id) + bindings.info[id] = BindingInfo( + b.id, + b.name, + b.kind, + b.node_id, + b.mod, + isnothing(type) ? b.type : type, + b.n_assigned + add_assigned, + isnothing(is_const) ? b.is_const : is_const, + b.is_ssa, + isnothing(is_captured) ? b.is_captured : is_captured, + isnothing(is_always_defined) ? b.is_always_defined : is_always_defined, + b.is_internal, + b.is_ambiguous_local, + b.is_nospecialize + ) +end + +function lookup_binding(bindings::Bindings, x) + bindings.info[_binding_id(x)] +end + +function lookup_binding(ctx::AbstractLoweringContext, x) + lookup_binding(ctx.bindings, x) +end + +function update_binding!(ctx::AbstractLoweringContext, x; kws...) + update_binding!(ctx.bindings, x; kws...) +end + +function new_binding(ctx::AbstractLoweringContext, srcref::SyntaxTree, + name::AbstractString, kind::Symbol; kws...) + binding_id = next_binding_id(ctx.bindings) + ex = @ast ctx srcref binding_id::K"BindingId" + add_binding(ctx.bindings, BindingInfo(binding_id, name, kind, ex._id; kws...)) + ex +end + +# Create a new SSA binding +function ssavar(ctx::AbstractLoweringContext, srcref, name="tmp") + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + new_binding(ctx, nameref, name, :local; is_ssa=true, is_internal=true) +end + +# Create a new local mutable binding or lambda argument +function new_local_binding(ctx::AbstractLoweringContext, srcref, name; kind=:local, kws...) + @assert kind === :local || kind === :argument + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + ex = new_binding(ctx, nameref, name, kind; is_internal=true, kws...) + lbindings = current_lambda_bindings(ctx) + if !isnothing(lbindings) + init_lambda_binding(lbindings, ex.var_id) + end + ex +end + +function new_global_binding(ctx::AbstractLoweringContext, srcref, name, mod; kws...) + nameref = makeleaf(ctx, srcref, K"Identifier", name_val=name) + new_binding(ctx, nameref, name, :global; is_internal=true, mod=mod, kws...) +end + +function binding_ex(ctx::AbstractLoweringContext, id::IdTag) + # Reconstruct the SyntaxTree for this binding. We keep only the node_id + # here, because that's got a concrete type. Whereas if we stored SyntaxTree + # that would contain the type of the graph used in the pass where the + # bindings were created and we'd need to call reparent(), etc. + SyntaxTree(syntax_graph(ctx), lookup_binding(ctx, id).node_id) +end + + +#------------------------------------------------------------------------------- +""" +Metadata about how a binding is used within some enclosing lambda +""" +struct LambdaBindingInfo + is_captured::Bool + is_read::Bool + is_assigned::Bool + # Binding was the function name in a call. Used for specialization + # heuristics in the optimizer. + is_called::Bool +end + +LambdaBindingInfo() = LambdaBindingInfo(false, false, false, false) + +function LambdaBindingInfo(parent::LambdaBindingInfo; + is_captured = nothing, + is_read = nothing, + is_assigned = nothing, + is_called = nothing) + LambdaBindingInfo( + isnothing(is_captured) ? parent.is_captured : is_captured, + isnothing(is_read) ? parent.is_read : is_read, + isnothing(is_assigned) ? parent.is_assigned : is_assigned, + isnothing(is_called) ? parent.is_called : is_called, + ) +end + +struct LambdaBindings + # Bindings used within the lambda + self::IdTag + bindings::Dict{IdTag,LambdaBindingInfo} +end + +LambdaBindings(self::IdTag = 0) = LambdaBindings(self, Dict{IdTag,LambdaBindings}()) + +function init_lambda_binding(bindings::LambdaBindings, id; kws...) + @assert !haskey(bindings.bindings, id) + bindings.bindings[id] = LambdaBindingInfo(LambdaBindingInfo(); kws...) +end + +function update_lambda_binding!(bindings::LambdaBindings, x; kws...) + id = _binding_id(x) + binfo = bindings.bindings[id] + bindings.bindings[id] = LambdaBindingInfo(binfo; kws...) +end + +function update_lambda_binding!(ctx::AbstractLoweringContext, x; kws...) + update_lambda_binding!(current_lambda_bindings(ctx), x; kws...) +end + +function lookup_lambda_binding(bindings::LambdaBindings, x) + get(bindings.bindings, _binding_id(x), nothing) +end + +function lookup_lambda_binding(ctx::AbstractLoweringContext, x) + lookup_lambda_binding(current_lambda_bindings(ctx), x) +end + +function has_lambda_binding(bindings::LambdaBindings, x) + haskey(bindings.bindings, _binding_id(x)) +end + +function has_lambda_binding(ctx::AbstractLoweringContext, x) + has_lambda_binding(current_lambda_bindings(ctx), x) +end diff --git a/JuliaLowering/src/closure_conversion.jl b/JuliaLowering/src/closure_conversion.jl new file mode 100644 index 0000000000000..d40d944db13f1 --- /dev/null +++ b/JuliaLowering/src/closure_conversion.jl @@ -0,0 +1,635 @@ +struct ClosureInfo{GraphType} + # Global name of the type of the closure + type_name::SyntaxTree{GraphType} + # Names of fields for use with getfield, in order + field_names::SyntaxList{GraphType} + # Map from the original BindingId of closed-over vars to the index of the + # associated field in the closure type. + field_inds::Dict{IdTag,Int} +end + +struct ClosureConversionCtx{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + mod::Module + closure_bindings::Dict{IdTag,ClosureBindings} + capture_rewriting::Union{Nothing,ClosureInfo{GraphType},SyntaxList{GraphType}} + lambda_bindings::LambdaBindings + # True if we're in a section of code which preserves top-level sequencing + # such that closure types can be emitted inline with other code. + is_toplevel_seq_point::Bool + # True if this expression should not have toplevel effects, namely, it + # should not declare the globals it references. This allows generated + # functions to refer to globals that have already been declared, without + # triggering the "function body AST not pure" error. + toplevel_pure::Bool + toplevel_stmts::SyntaxList{GraphType} + closure_infos::Dict{IdTag,ClosureInfo{GraphType}} +end + +function ClosureConversionCtx(graph::GraphType, bindings::Bindings, + mod::Module, closure_bindings::Dict{IdTag,ClosureBindings}, + lambda_bindings::LambdaBindings) where {GraphType} + ClosureConversionCtx{GraphType}( + graph, bindings, mod, closure_bindings, nothing, + lambda_bindings, false, true, SyntaxList(graph), + Dict{IdTag,ClosureInfo{GraphType}}()) +end + +function current_lambda_bindings(ctx::ClosureConversionCtx) + ctx.lambda_bindings +end + +# Access captured variable from inside a closure +function captured_var_access(ctx, ex) + cap_rewrite = ctx.capture_rewriting + if cap_rewrite isa ClosureInfo + field_sym = cap_rewrite.field_names[cap_rewrite.field_inds[ex.var_id]] + @ast ctx ex [K"call" + "getfield"::K"core" + binding_ex(ctx, current_lambda_bindings(ctx).self) + field_sym + ] + else + interpolations = cap_rewrite + @assert !isnothing(cap_rewrite) + if isempty(interpolations) || !is_same_identifier_like(interpolations[end], ex) + push!(interpolations, ex) + end + @ast ctx ex [K"captured_local" length(interpolations)::K"Integer"] + end +end + +function get_box_contents(ctx::ClosureConversionCtx, var, box_ex) + undef_var = new_local_binding(ctx, var, lookup_binding(ctx, var.var_id).name) + @ast ctx var [K"block" + box := box_ex + # Lower in an UndefVar check to a similarly named variable + # (ref #20016) so that closure lowering Box introduction + # doesn't impact the error message and the compiler is expected + # to fold away the extraneous null check + # + # TODO: Ideally the runtime would rely on provenance info for + # this error and we can remove the isdefined check. + [K"if" [K"call" + "isdefined"::K"core" + box + "contents"::K"Symbol" + ] + ::K"TOMBSTONE" + [K"block" + [K"newvar" undef_var] + undef_var + ] + ] + [K"call" + "getfield"::K"core" + box + "contents"::K"Symbol" + ] + ] +end + +# Convert `ex` to `type` by calling `convert(type, ex)` when necessary. +# +# Used for converting the right hand side of an assignment to a typed local or +# global and for converting the return value of a function call to the declared +# return type. +function convert_for_type_decl(ctx, srcref, ex, type, do_typeassert) + # Use a slot to permit union-splitting this in inference + tmp = new_local_binding(ctx, srcref, "tmp", is_always_defined=true) + + @ast ctx srcref [K"block" + type_tmp := type + # [K"=" type_ssa renumber_assigned_ssavalues(type)] + [K"=" tmp ex] + [K"if" + [K"call" "isa"::K"core" tmp type_tmp] + "nothing"::K"core" + [K"=" + tmp + if do_typeassert + [K"call" + "typeassert"::K"core" + [K"call" "convert"::K"top" type_tmp tmp] + type_tmp + ] + else + [K"call" "convert"::K"top" type_tmp tmp] + end + ] + ] + tmp + ] +end + +# TODO: Avoid producing redundant calls to declare_global +function make_globaldecl(ctx, src_ex, mod, name, strong=false, type=nothing; ret_nothing=false) + if !ctx.toplevel_pure + decl = @ast ctx src_ex [K"block" + [K"call" + "declare_global"::K"core" + mod::K"Value" name::K"Symbol" strong::K"Bool" + if type !== nothing + type + end + ] + [K"latestworld"] + @ast ctx src_ex [K"removable" "nothing"::K"core"] + ] + if ctx.is_toplevel_seq_point + return decl + else + push!(ctx.toplevel_stmts, decl) + end + end + if ret_nothing + nothing + else + @ast ctx src_ex [K"removable" "nothing"::K"core"] + end +end + +function convert_global_assignment(ctx, ex, var, rhs0) + binfo = lookup_binding(ctx, var) + @assert binfo.kind == :global + stmts = SyntaxList(ctx) + decl = make_globaldecl(ctx, ex, binfo.mod, binfo.name, true; ret_nothing=true) + decl !== nothing && push!(stmts, decl) + rhs1 = if is_simple_atom(ctx, rhs0) + rhs0 + else + tmp = ssavar(ctx, rhs0) + push!(stmts, @ast ctx rhs0 [K"=" tmp rhs0]) + tmp + end + rhs = if binfo.is_const && isnothing(binfo.type) + # const global assignments without a type declaration don't need us to + # deal with the binding type at all. + rhs1 + else + type_var = ssavar(ctx, ex, "binding_type") + push!(stmts, @ast ctx ex [K"=" + type_var + [K"call" + "get_binding_type"::K"core" + binfo.mod::K"Value" + binfo.name::K"Symbol" + ] + ]) + do_typeassert = false # Global assignment type checking is done by the runtime + convert_for_type_decl(ctx, ex, rhs1, type_var, do_typeassert) + end + push!(stmts, @ast ctx ex [K"=" var rhs]) + @ast ctx ex [K"block" + stmts... + rhs1 + ] +end + +# Convert assignment to a closed variable to a `setfield!` call and generate +# `convert` calls for variables with declared types. +# +# When doing this, the original value needs to be preserved, to ensure the +# expression `a=b` always returns exactly `b`. +function convert_assignment(ctx, ex) + var = ex[1] + rhs0 = _convert_closures(ctx, ex[2]) + if kind(var) == K"Placeholder" + return @ast ctx ex [K"=" var rhs0] + end + @chk kind(var) == K"BindingId" + binfo = lookup_binding(ctx, var) + if binfo.kind == :global + convert_global_assignment(ctx, ex, var, rhs0) + else + @assert binfo.kind == :local || binfo.kind == :argument + boxed = is_boxed(binfo) + if isnothing(binfo.type) && !boxed + @ast ctx ex [K"=" var rhs0] + else + # Typed local + tmp_rhs0 = ssavar(ctx, rhs0) + rhs = isnothing(binfo.type) ? tmp_rhs0 : + convert_for_type_decl(ctx, ex, tmp_rhs0, _convert_closures(ctx, binfo.type), true) + assignment = if boxed + @ast ctx ex [K"call" + "setfield!"::K"core" + is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var + "contents"::K"Symbol" + rhs + ] + else + @ast ctx ex [K"=" var rhs] + end + @ast ctx ex [K"block" + [K"=" tmp_rhs0 rhs0] + assignment + tmp_rhs0 + ] + end + end +end + +# Compute fields for a closure type, one field for each captured variable. +function closure_type_fields(ctx, srcref, closure_binds, is_opaque) + capture_ids = Vector{IdTag}() + for lambda_bindings in closure_binds.lambdas + for (id, lbinfo) in lambda_bindings.bindings + if lbinfo.is_captured + push!(capture_ids, id) + end + end + end + # sort here to avoid depending on undefined Dict iteration order. + capture_ids = sort!(unique(capture_ids)) + + field_syms = SyntaxList(ctx) + if is_opaque + field_orig_bindings = capture_ids + # For opaque closures we don't try to generate sensible names for the + # fields as there's no closure type to generate. + for (i,id) in enumerate(field_orig_bindings) + push!(field_syms, @ast ctx srcref i::K"Integer") + end + else + field_names = Dict{String,IdTag}() + for id in capture_ids + binfo = lookup_binding(ctx, id) + # We name each field of the closure after the variable which was closed + # over, for clarity. Adding a suffix can be necessary when collisions + # occur due to macro expansion and generated bindings + name0 = binfo.name + name = name0 + i = 1 + while haskey(field_names, name) + name = "$name0#$i" + i += 1 + end + field_names[name] = id + end + field_orig_bindings = Vector{IdTag}() + for (name,id) in sort!(collect(field_names)) + push!(field_syms, @ast ctx srcref name::K"Symbol") + push!(field_orig_bindings, id) + end + end + field_inds = Dict{IdTag,Int}() + field_is_box = Vector{Bool}() + for (i,id) in enumerate(field_orig_bindings) + push!(field_is_box, is_boxed(ctx, id)) + field_inds[id] = i + end + + return field_syms, field_orig_bindings, field_inds, field_is_box +end + +# Return a thunk which creates a new type for a closure with `field_syms` named +# fields. The new type will be named `name_str` which must be an unassigned +# name in the module. +function type_for_closure(ctx::ClosureConversionCtx, srcref, name_str, field_syms, field_is_box) + # New closure types always belong to the module we're expanding into - they + # need to be serialized there during precompile. + mod = ctx.mod + type_binding = new_global_binding(ctx, srcref, name_str, mod) + type_ex = @ast ctx srcref [K"call" + #"_call_latest"::K"core" + eval_closure_type::K"Value" + ctx.mod::K"Value" + name_str::K"Symbol" + [K"call" "svec"::K"core" field_syms...] + [K"call" "svec"::K"core" [f::K"Bool" for f in field_is_box]...] + ] + type_ex, type_binding +end + +function is_boxed(binfo::BindingInfo) + # True for + # * :argument when it's not reassigned + # * :static_parameter (these can't be reassigned) + defined_but_not_assigned = binfo.is_always_defined && binfo.n_assigned == 0 + # For now, we box almost everything but later we'll want to do dominance + # analysis on the untyped IR. + return binfo.is_captured && !defined_but_not_assigned +end + +function is_boxed(ctx, x) + is_boxed(lookup_binding(ctx, x)) +end + +# Is captured in the closure's `self` argument +function is_self_captured(ctx, x) + lbinfo = lookup_lambda_binding(ctx, x) + !isnothing(lbinfo) && lbinfo.is_captured +end + +# Map the children of `ex` through _convert_closures, lifting any toplevel +# closure definition statements to occur before the other content of `ex`. +function map_cl_convert(ctx::ClosureConversionCtx, ex, toplevel_preserving) + if ctx.is_toplevel_seq_point && !toplevel_preserving + toplevel_stmts = SyntaxList(ctx) + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, ctx.capture_rewriting, ctx.lambda_bindings, + false, ctx.toplevel_pure, toplevel_stmts, ctx.closure_infos) + res = mapchildren(e->_convert_closures(ctx2, e), ctx2, ex) + if isempty(toplevel_stmts) + res + else + @ast ctx ex [K"block" + toplevel_stmts... + res + ] + end + else + mapchildren(e->_convert_closures(ctx, e), ctx, ex) + end +end + +function _convert_closures(ctx::ClosureConversionCtx, ex) + k = kind(ex) + if k == K"BindingId" + access = is_self_captured(ctx, ex) ? captured_var_access(ctx, ex) : ex + if is_boxed(ctx, ex) + get_box_contents(ctx, ex, access) + else + access + end + elseif is_leaf(ex) || k == K"inert" || k == K"static_eval" + ex + elseif k == K"=" + convert_assignment(ctx, ex) + elseif k == K"isdefined" + # Convert isdefined expr to function for closure converted variables + var = ex[1] + binfo = lookup_binding(ctx, var) + if is_boxed(binfo) + access = is_self_captured(ctx, var) ? captured_var_access(ctx, var) : var + @ast ctx ex [K"call" + "isdefined"::K"core" + access + "contents"::K"Symbol" + ] + elseif binfo.is_always_defined || is_self_captured(ctx, var) + # Captured but unboxed vars are always defined + @ast ctx ex true::K"Bool" + elseif binfo.kind == :global + # Normal isdefined won't work for globals (#56985) + @ast ctx ex [K"call" + "isdefinedglobal"::K"core" + ctx.mod::K"Value" + binfo.name::K"Symbol" + false::K"Bool"] + else + ex + end + elseif k == K"decl" + @assert kind(ex[1]) == K"BindingId" + binfo = lookup_binding(ctx, ex[1]) + if binfo.kind == :global + # flisp has this, but our K"assert" handling is in a previous pass + # [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + make_globaldecl(ctx, ex, binfo.mod, binfo.name, true, _convert_closures(ctx, ex[2])) + else + makeleaf(ctx, ex, K"TOMBSTONE") + end + elseif k == K"global" + # Leftover `global` forms become weak globals. + mod, name = if kind(ex[1]) == K"BindingId" + binfo = lookup_binding(ctx, ex[1]) + @assert binfo.kind == :global + binfo.mod, binfo.name + else + # See note about using eval on Expr(:global/:const, GlobalRef(...)) + @assert ex[1].value isa GlobalRef + ex[1].value.mod, String(ex[1].value.name) + end + @ast ctx ex [K"unused_only" make_globaldecl(ctx, ex, mod, name, false)] + elseif k == K"local" + var = ex[1] + binfo = lookup_binding(ctx, var) + if binfo.is_captured + @ast ctx ex [K"=" var [K"call" "Box"::K"core"]] + elseif !binfo.is_always_defined + @ast ctx ex [K"newvar" var] + else + makeleaf(ctx, ex, K"TOMBSTONE") + end + elseif k == K"lambda" + closure_convert_lambda(ctx, ex) + elseif k == K"function_decl" + func_name = ex[1] + @assert kind(func_name) == K"BindingId" + func_name_id = func_name.var_id + if haskey(ctx.closure_bindings, func_name_id) + closure_info = get(ctx.closure_infos, func_name_id, nothing) + needs_def = isnothing(closure_info) + if needs_def + closure_binds = ctx.closure_bindings[func_name_id] + field_syms, field_orig_bindings, field_inds, field_is_box = + closure_type_fields(ctx, ex, closure_binds, false) + name_str = reserve_module_binding_i(ctx.mod, + "#$(join(closure_binds.name_stack, "#"))##") + closure_type_def, closure_type_ = + type_for_closure(ctx, ex, name_str, field_syms, field_is_box) + if !ctx.is_toplevel_seq_point + push!(ctx.toplevel_stmts, closure_type_def) + push!(ctx.toplevel_stmts, @ast ctx ex (::K"latestworld_if_toplevel")) + closure_type_def = nothing + end + closure_info = ClosureInfo(closure_type_, field_syms, field_inds) + ctx.closure_infos[func_name_id] = closure_info + type_params = SyntaxList(ctx) + init_closure_args = SyntaxList(ctx) + for (id, boxed) in zip(field_orig_bindings, field_is_box) + field_val = binding_ex(ctx, id) + if is_self_captured(ctx, field_val) + # Access from outer closure if necessary but do not + # unbox to feed into the inner nested closure. + field_val = captured_var_access(ctx, field_val) + end + push!(init_closure_args, field_val) + if !boxed + push!(type_params, @ast ctx ex [K"call" + # TODO: Update to use _typeof_captured_variable (#40985) + #"_typeof_captured_variable"::K"core" + "typeof"::K"core" + field_val]) + end + end + @ast ctx ex [K"block" + closure_type_def + (::K"latestworld_if_toplevel") + closure_type := if isempty(type_params) + closure_type_ + else + [K"call" "apply_type"::K"core" closure_type_ type_params...] + end + closure_val := [K"new" + closure_type + init_closure_args... + ] + convert_assignment(ctx, [K"=" func_name closure_val]) + ::K"TOMBSTONE" + ] + else + @ast ctx ex (::K"TOMBSTONE") + end + else + # Single-arg K"method" has the side effect of creating a global + # binding for `func_name` if it doesn't exist. + @ast ctx ex [K"block" + [K"method" func_name] + ::K"TOMBSTONE" # <- function_decl should not be used in value position + ] + end + elseif k == K"function_type" + func_name = ex[1] + if kind(func_name) == K"BindingId" && lookup_binding(ctx, func_name).kind === :local + ctx.closure_infos[func_name.var_id].type_name + else + @ast ctx ex [K"call" "Typeof"::K"core" func_name] + end + elseif k == K"method_defs" + name = ex[1] + is_closure = kind(name) == K"BindingId" && lookup_binding(ctx, name).kind === :local + cap_rewrite = is_closure ? ctx.closure_infos[name.var_id] : nothing + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, cap_rewrite, ctx.lambda_bindings, + ctx.is_toplevel_seq_point, ctx.toplevel_pure, ctx.toplevel_stmts, + ctx.closure_infos) + body = map_cl_convert(ctx2, ex[2], false) + if is_closure + if ctx.is_toplevel_seq_point + body + else + # Move methods out to a top-level sequence point. + push!(ctx.toplevel_stmts, body) + @ast ctx ex (::K"TOMBSTONE") + end + else + @ast ctx ex [K"block" + body + ::K"TOMBSTONE" + ] + end + elseif k == K"_opaque_closure" + closure_binds = ctx.closure_bindings[ex[1].var_id] + field_syms, field_orig_bindings, field_inds, field_is_box = + closure_type_fields(ctx, ex, closure_binds, true) + + capture_rewrites = ClosureInfo(ex #=unused=#, field_syms, field_inds) + + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, capture_rewrites, ctx.lambda_bindings, + false, ctx.toplevel_pure, ctx.toplevel_stmts, ctx.closure_infos) + + init_closure_args = SyntaxList(ctx) + for id in field_orig_bindings + push!(init_closure_args, binding_ex(ctx, id)) + end + @ast ctx ex [K"new_opaque_closure" + ex[2] # arg type tuple + ex[3] # return_lower_bound + ex[4] # return_upper_bound + ex[5] # allow_partial + [K"opaque_closure_method" + "nothing"::K"core" + ex[6] # nargs + ex[7] # is_va + ex[8] # functionloc + closure_convert_lambda(ctx2, ex[9]) + ] + init_closure_args... + ] + else + # A small number of kinds are toplevel-preserving in terms of closure + # closure definitions will be lifted out into `toplevel_stmts` if they + # occur inside `ex`. + toplevel_seq_preserving = k == K"if" || k == K"elseif" || k == K"block" || + k == K"tryfinally" || k == K"trycatchelse" + map_cl_convert(ctx, ex, toplevel_seq_preserving) + end +end + +function closure_convert_lambda(ctx, ex) + @assert kind(ex) == K"lambda" + lambda_bindings = ex.lambda_bindings + interpolations = nothing + if isnothing(ctx.capture_rewriting) + # Global method which may capture locals + interpolations = SyntaxList(ctx) + cap_rewrite = interpolations + else + cap_rewrite = ctx.capture_rewriting + end + ctx2 = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, cap_rewrite, lambda_bindings, + ex.is_toplevel_thunk, ctx.toplevel_pure && ex.toplevel_pure, + ctx.toplevel_stmts, ctx.closure_infos) + lambda_children = SyntaxList(ctx) + args = ex[1] + push!(lambda_children, args) + push!(lambda_children, ex[2]) + + # Add box initializations for arguments which are captured by an inner lambda + body_stmts = SyntaxList(ctx) + for arg in children(args) + kind(arg) != K"Placeholder" || continue + if is_boxed(ctx, arg) + push!(body_stmts, @ast ctx arg [K"=" + arg + [K"call" "Box"::K"core" arg] + ]) + end + end + # Convert body. + input_body_stmts = kind(ex[3]) != K"block" ? ex[3:3] : ex[3][1:end] + for e in input_body_stmts + push!(body_stmts, _convert_closures(ctx2, e)) + end + push!(lambda_children, @ast ctx2 ex[3] [K"block" body_stmts...]) + + if numchildren(ex) > 3 + # Convert return type + @assert numchildren(ex) == 4 + push!(lambda_children, _convert_closures(ctx2, ex[4])) + end + + lam = makenode(ctx, ex, ex, lambda_children; lambda_bindings=lambda_bindings) + if !isnothing(interpolations) && !isempty(interpolations) + @ast ctx ex [K"call" + replace_captured_locals!::K"Value" + lam + [K"call" + "svec"::K"core" + interpolations... + ] + ] + else + lam + end +end + + +""" +Closure conversion and lowering of bindings + +This pass does a few things things: +* Deal with typed variables (K"decl") and their assignments +* Deal with const and non-const global assignments +* Convert closures into types +* Lower variables captured by closures into boxes, etc, as necessary + +Invariants: +* This pass must not introduce new K"Identifier" - only K"BindingId". +* Any new binding IDs must be added to the enclosing lambda locals +""" +@fzone "JL: closures" function convert_closures(ctx::VariableAnalysisContext, ex) + ctx = ClosureConversionCtx(ctx.graph, ctx.bindings, ctx.mod, + ctx.closure_bindings, ex.lambda_bindings) + ex1 = closure_convert_lambda(ctx, ex) + if !isempty(ctx.toplevel_stmts) + throw(LoweringError(first(ctx.toplevel_stmts), "Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension")) + end + ctx, ex1 +end diff --git a/JuliaLowering/src/compat.jl b/JuliaLowering/src/compat.jl new file mode 100644 index 0000000000000..e9800d0a7ab48 --- /dev/null +++ b/JuliaLowering/src/compat.jl @@ -0,0 +1,605 @@ +const JS = JuliaSyntax + +function _insert_tree_node(graph::SyntaxGraph, k::Kind, src::SourceAttrType, + flags::UInt16=0x0000; attrs...) + id = newnode!(graph) + sethead!(graph, id, k) + flags !== 0 && setflags!(graph, id, flags) + setattr!(graph, id; source=src, attrs...) + return id +end + +""" +An Expr -> SyntaxTree transformation that should preserve semantics, but will +have low-quality provenance info (namely, each tree node will be associated with +the last seen LineNumberNode in the pre-order expr traversal). + +Last-resort option so that, for example, we can lower the output of old +Expr-producing macros. Always prefer re-parsing source text over using this. + +Supports parsed and/or macro-expanded exprs, but not lowered exprs +""" +function expr_to_syntaxtree(@nospecialize(e), lnn::Union{LineNumberNode, Nothing}=nothing) + graph = ensure_attributes!( + SyntaxGraph(), + kind=Kind, syntax_flags=UInt16, + source=SourceAttrType, var_id=Int, value=Any, + name_val=String, is_toplevel_thunk=Bool, + scope_layer=LayerId, meta=CompileHints, + toplevel_pure=Bool) + expr_to_syntaxtree(graph, e, lnn) +end + +@fzone "JL: expr_to_syntaxtree" function expr_to_syntaxtree(ctx, @nospecialize(e), lnn::Union{LineNumberNode, Nothing}) + graph = syntax_graph(ctx) + toplevel_src = if isnothing(lnn) + # Provenance sinkhole for all nodes until we hit a linenode + dummy_src = SourceRef( + SourceFile("No source for expression"), + 1, JS.GreenNode(K"None", 0)) + _insert_tree_node(graph, K"None", dummy_src) + else + lnn + end + st_id, _ = _insert_convert_expr(e, graph, toplevel_src) + out = SyntaxTree(graph, st_id) + return out +end + +function _expr_replace!(@nospecialize(e), replace_pred::Function, replacer!::Function, + recurse_pred=(@nospecialize e)->true) + if replace_pred(e) + replacer!(e) + end + if e isa Expr && recurse_pred(e) + for a in e.args + _expr_replace!(a, replace_pred, replacer!, recurse_pred) + end + end +end + +function _to_iterspec(exs::Vector, is_generator::Bool) + if length(exs) === 1 && exs[1].head === :filter + @assert length(exs[1].args) >= 2 + return Expr(:filter, _to_iterspec(exs[1].args[2:end], true), exs[1].args[1]) + end + outex = Expr(:iteration) + for e in exs + if e.head === :block && !is_generator + for iter in e.args + push!(outex.args, Expr(:in, iter.args...)) + end + elseif e.head === :(=) + push!(outex.args, Expr(:in, e.args...)) + else + @assert false "unknown iterspec in $e" + end + end + return outex +end + +""" +Return `e.args`, but with any parameters in SyntaxTree (flattened, source) order. +Parameters are expected to be as `e.args[pos]`. + +e.g. orderings of (a,b,c;d;e;f): + Expr: (tuple (parameters (parameters (parameters f) e) d) a b c) + SyntaxTree: (tuple a b c (parameters d) (parameters e) (parameters f)) +""" +function collect_expr_parameters(e::Expr, pos::Int) + params = expr_parameters(e, pos) + isnothing(params) && return copy(e.args) + args = Any[e.args[1:pos-1]..., e.args[pos+1:end]...] + return _flatten_params!(args, params) +end +function _flatten_params!(out::Vector{Any}, params::Expr) + p,p_esc = unwrap_esc(params) + p1 = expr_parameters(p, 1) + if !isnothing(p1) + push!(out, p_esc(Expr(:parameters, p.args[2:end]...))) + _flatten_params!(out, p_esc(p1)) + else + push!(out, params::Any) + end + return out +end +function expr_parameters(p::Expr, pos::Int) + if pos <= length(p.args) + e,_ = unwrap_esc(p.args[pos]) + if e isa Expr && e.head === :parameters + return p.args[pos] + end + end + return nothing +end + +""" +If `b` (usually a block) has exactly one non-LineNumberNode argument, unwrap it. +""" +function maybe_unwrap_arg(b) + if !(b isa Expr) + return b + end + e1 = findfirst(c -> !isa(c, LineNumberNode), b.args) + isnothing(e1) && return b + e2 = findfirst(c -> !isa(c, LineNumberNode), b.args[e1+1:end]) + !isnothing(e2) && return b + return b.args[e1] +end + +function maybe_extract_lnn(b, default) + !(b isa Expr) && return default + lnn_i = findfirst(a->isa(a, LineNumberNode), b.args) + return isnothing(lnn_i) ? default : b.args[lnn_i] +end + +# Get kind by string if exists. TODO relies on internals +function find_kind(s::String) + out = get(JS._kind_str_to_int, s, nothing) + return isnothing(out) ? nothing : JS.Kind(out) +end + +function is_dotted_operator(s::AbstractString) + return length(s) >= 2 && + s[1] === '.' && + JS.is_operator(something(find_kind(s[2:end]), K"None")) +end + +function is_eventually_call(e) + return e isa Expr && (e.head === :call || + e.head in (:escape, :where, :(::)) && is_eventually_call(e.args[1])) +end + +function rewrap_escapes(hyg, ex) + if hyg isa Expr && hyg.head in (:escape, :var"hygienic-scope") + ex = Expr(hyg.head, rewrap_escapes(hyg.args[1], ex)) + if hyg.head === :var"hygienic-scope" + append!(ex.args, @view hyg.args[2:end]) + end + end + return ex +end + +# Unwrap Expr(:escape) and Expr(:hygienic-scope). Return the unwrapped +# expression and a function which will rewrap a derived expression in the +# correct hygiene wrapper. +function unwrap_esc(ex) + orig_ex = ex + while ex isa Expr && ex.head in (:escape, :var"hygienic-scope") + @assert length(ex.args) >= 1 + ex = ex.args[1] + end + return ex, e->rewrap_escapes(orig_ex, e) +end + +function unwrap_esc_(e) + unwrap_esc(e)[1] +end + +""" +Insert `e` converted to a syntaxtree into graph and recurse on children. Return +a pair (my_node_id, last_srcloc). Should not mutate `e`. + +`src` is the latest location found in the pre-order traversal, and is the line +number node to be associated with `e`. +""" +function _insert_convert_expr(@nospecialize(e), graph::SyntaxGraph, src::SourceAttrType) + #--------------------------------------------------------------------------- + # Non-expr types + if isnothing(e) + st_id = _insert_tree_node(graph, K"core", src; name_val="nothing") + return st_id, src + elseif e isa LineNumberNode + # A LineNumberNode in value position evaluates to nothing + st_id = _insert_tree_node(graph, K"core", src; name_val="nothing") + return st_id, e + elseif e isa Symbol + st_id = _insert_tree_node(graph, K"Identifier", src; name_val=String(e)) + return st_id, src + elseif e isa QuoteNode + if e.value isa Symbol + return _insert_convert_expr(Expr(:quoted_symbol, e.value), graph, src) + elseif e.value isa Expr + return _insert_convert_expr(Expr(:inert, e.value), graph, src) + elseif e.value isa LineNumberNode + return _insert_tree_node(graph, K"Value", src; value=e.value), src + else + return _insert_convert_expr(e.value, graph, src) + end + elseif e isa String + st_id = _insert_tree_node(graph, K"string", src) + id_inner = _insert_tree_node(graph, K"String", src; value=e) + setchildren!(graph, st_id, [id_inner]) + return st_id, src + elseif !(e isa Expr) + # There are other kinds we could potentially back-convert (e.g. Float), + # but Value should work fine. + st_k = e isa Bool ? K"Bool" : + e isa Integer ? K"Integer" : + find_kind(string(typeof(e))) + st_id = _insert_tree_node(graph, isnothing(st_k) ? K"Value" : st_k, src; value=e) + return st_id, src + end + + #--------------------------------------------------------------------------- + # `e` is an expr. In many cases, it suffices to + # - guess that the kind name is the same as the expr head + # - add no syntax flags or attrs + # - map e.args to syntax tree children one-to-one + e::Expr + nargs = length(e.args) + maybe_kind = find_kind(string(e.head)) + st_k = isnothing(maybe_kind) ? K"None" : maybe_kind + st_flags = 0x0000 + st_attrs = Dict{Symbol, Any}() + # Note that SyntaxTree/Node differentiate 0-child non-terminals and leaves + child_exprs::Union{Nothing, Vector{Any}} = copy(e.args) + + # However, the following are (many) special cases where the kind, flags, + # children, or attributes are different from what we guessed above + if Base.isoperator(e.head) && st_k === K"None" + # e.head is an updating assignment operator (+=, .-=, etc). Non-= + # dotted ops are wrapped in a call, so we don't reach this. + s = string(e.head) + @assert s[end] === '=' && nargs === 2 + if s[1] === '.' + st_k = K".op=" + op = s[2:end-1] + else + st_k = K"op=" + op = s[1:end-1] + end + child_exprs = Any[e.args[1], Symbol(op), e.args[2]] + elseif e.head === :comparison + for i = 2:2:length(child_exprs) + op,op_esc = unwrap_esc(child_exprs[i]) + @assert op isa Symbol + op_s = string(op) + if is_dotted_operator(op_s) + child_exprs[i] = Expr(:., op_esc(Symbol(op_s[2:end]))) + end + end + elseif e.head === :macrocall + @assert nargs >= 2 + a1,a1_esc = unwrap_esc(e.args[1]) + child_exprs = collect_expr_parameters(e, 3) + if child_exprs[2] isa LineNumberNode + src = child_exprs[2] + end + deleteat!(child_exprs, 2) + if a1 isa Symbol + child_exprs[1] = a1_esc(Expr(:macro_name, a1)) + elseif a1 isa Expr && a1.head === :(.) + a12,a12_esc = unwrap_esc(a1.args[2]) + if a12 isa QuoteNode + child_exprs[1] = a1_esc(Expr(:(.), a1.args[1], + Expr(:macro_name, a12_esc(a12.value)))) + end + elseif a1 isa GlobalRef && a1.mod === Core + # Syntax-introduced macrocalls are listed here for reference. We + # probably don't need to convert these. + if a1.name === Symbol("@cmd") + elseif a1.name === Symbol("@doc") && nargs === 4 # two macro args only + # Single-arg @doc is a lookup not corresponding to K"doc" + # Revise sometimes calls @doc with three args, but probably shouldn't + st_k = K"doc" + child_exprs = child_exprs[2:3] + elseif a1.name === Symbol("@int128_str") + elseif a1.name === Symbol("@int128_str") + elseif a1.name === Symbol("@big_str") + end + end + elseif e.head === Symbol("'") + @assert nargs === 1 + st_k = K"call" + child_exprs = Any[e.head, e.args[1]] + elseif e.head === :. && nargs === 2 + a2, a2_esc = unwrap_esc(e.args[2]) + if a2 isa Expr && a2.head === :tuple + st_k = K"dotcall" + tuple_exprs = collect_expr_parameters(a2_esc(a2), 1) + child_exprs = pushfirst!(tuple_exprs, e.args[1]) + elseif a2 isa QuoteNode + child_exprs[2] = a2_esc(a2.value) + end + elseif e.head === :for + @assert nargs === 2 + child_exprs = Any[_to_iterspec(Any[e.args[1]], false), e.args[2]] + elseif e.head === :where + @assert nargs >= 2 + e2,_ = unwrap_esc(e.args[2]) + if !(e2 isa Expr && e2.head === :braces) + child_exprs = Any[e.args[1], Expr(:braces, e.args[2:end]...)] + end + elseif e.head in (:tuple, :vect, :braces) + child_exprs = collect_expr_parameters(e, 1) + elseif e.head in (:curly, :ref) + child_exprs = collect_expr_parameters(e, 2) + elseif e.head === :try + child_exprs = Any[e.args[1]] + # Expr: + # (try (block ...) var (block ...) [block ...] [block ...]) + # # try catch_var catch finally else + # SyntaxTree: + # (try (block ...) + # [catch var (block ...)] + # [else (block ...)] + # [finally (block ...)]) + e2 = unwrap_esc_(e.args[2]) + e3 = unwrap_esc_(e.args[3]) + if e2 !== false || e3 !== false + push!(child_exprs, + Expr(:catch, + e2 === false ? Expr(:catch_var_placeholder) : e.args[2], + e3 === false ? nothing : e.args[3])) + end + if nargs >= 5 + push!(child_exprs, Expr(:else, e.args[5])) + end + if nargs >= 4 && unwrap_esc_(e.args[4]) !== false + push!(child_exprs, Expr(:finally, e.args[4])) + end + elseif e.head === :flatten || e.head === :generator + st_k = K"generator" + child_exprs = Any[] + next = e + while next.head === :flatten + @assert next.args[1].head === :generator + push!(child_exprs, _to_iterspec(next.args[1].args[2:end], true)) + next = next.args[1].args[1] + end + @assert next.head === :generator + push!(child_exprs, _to_iterspec(next.args[2:end], true)) + pushfirst!(child_exprs, next.args[1]) + elseif e.head === :ncat || e.head === :nrow + dim = unwrap_esc_(popfirst!(child_exprs)) + st_flags |= JS.set_numeric_flags(dim) + elseif e.head === :typed_ncat + st_flags |= JS.set_numeric_flags(unwrap_esc_(e.args[2])) + deleteat!(child_exprs, 2) + elseif e.head === :(->) + @assert nargs === 2 + a1, a1_esc = unwrap_esc(e.args[1]) + if a1 isa Expr && a1.head === :block + # Expr parsing fails to make :parameters here... + lam_args = Any[] + lam_eqs = Any[] + for a in a1.args + a isa LineNumberNode && continue + a isa Expr && a.head === :(=) ? push!(lam_eqs, a) : push!(lam_args, a) + end + !isempty(lam_eqs) && push!(lam_args, Expr(:parameters, lam_eqs...)) + child_exprs[1] = a1_esc(Expr(:tuple, lam_args...)) + elseif !(a1 isa Expr && (a1.head in (:tuple, :where))) + child_exprs[1] = a1_esc(Expr(:tuple, a1)) + end + src = maybe_extract_lnn(e.args[2], src) + child_exprs[2] = maybe_unwrap_arg(e.args[2]) + elseif e.head === :call + child_exprs = collect_expr_parameters(e, 2) + a1,a1_esc = unwrap_esc(child_exprs[1]) + if a1 isa Symbol + a1s = string(a1) + if is_dotted_operator(a1s) + # non-assigning dotop like .+ or .== + st_k = K"dotcall" + child_exprs[1] = a1_esc(Symbol(a1s[2:end])) + end + end + elseif e.head === :function + if nargs >= 2 + src = maybe_extract_lnn(e.args[2], src) + end + elseif e.head === :(=) + if is_eventually_call(e.args[1]) + st_k = K"function" + st_flags |= JS.SHORT_FORM_FUNCTION_FLAG + src = maybe_extract_lnn(e.args[2], src) + child_exprs[2] = maybe_unwrap_arg(e.args[2]) + end + elseif e.head === :module + @assert nargs === 3 + if !e.args[1] + st_flags |= JS.BARE_MODULE_FLAG + end + child_exprs = Any[e.args[2], e.args[3]] + elseif e.head === :do + # Expr: + # (do (call f args...) (-> (tuple lam_args...) (block ...))) + # SyntaxTree: + # (call f args... (do (tuple lam_args...) (block ...))) + callargs = collect_expr_parameters(e.args[1], 2) + if e.args[1].head === :macrocall + st_k = K"macrocall" + if callargs[2] isa LineNumberNode + src = callargs[2] + end + deleteat!(callargs, 2) + c1,c1_esc = unwrap_esc(callargs[1]) + callargs[1] = c1_esc(Expr(:macro_name, c1)) + else + st_k = K"call" + end + child_exprs = Any[callargs..., Expr(:do_lambda, e.args[2].args...)] + elseif e.head === :let + if nargs >= 1 + a1,_ = unwrap_esc(e.args[1]) + if !(a1 isa Expr && a1.head === :block) + child_exprs[1] = Expr(:block, e.args[1]) + end + end + elseif e.head === :struct + e.args[1] && (st_flags |= JS.MUTABLE_FLAG) + child_exprs = child_exprs[2:end] + # TODO handle docstrings after refactor + elseif (e.head === :using || e.head === :import) + _expr_replace!(e, + (e)->(e isa Expr && e.head === :.), + (e)->(e.head = :importpath)) + elseif e.head === :kw + st_k = K"=" + elseif e.head in (:local, :global) && nargs > 1 + # Possible normalization + # child_exprs = Any[Expr(:tuple, child_exprs...)] + elseif e.head === :error + # Zero-child errors from parsing are leaf nodes. We could change this + # upstream for consistency. + if nargs === 0 + child_exprs = nothing + st_attrs[:value] = JS.ErrorVal() + st_flags |= JS.TRIVIA_FLAG + end + end + + #--------------------------------------------------------------------------- + # The following heads are not emitted from parsing, but old macros could + # produce these and they would historically be accepted by flisp lowering. + if e.head === Symbol("latestworld-if-toplevel") + st_k = K"latestworld_if_toplevel" + elseif e.head === Symbol("hygienic-scope") + st_k = K"hygienic_scope" + elseif e.head === :meta + # Messy and undocumented. Only sometimes we want a K"meta". + if e.args[1] isa Expr && e.args[1].head === :purity + st_k = K"meta" + child_exprs = [Expr(:quoted_symbol, :purity), Base.EffectsOverride(e.args[1].args...)] + else + @assert e.args[1] isa Symbol + if e.args[1] === :nospecialize + if nargs > 2 + st_k = K"block" + # Kick the can down the road (should only be simple atoms?) + child_exprs = map(c->Expr(:meta, :nospecialize, c), child_exprs[2:end]) + else + st_id, src = _insert_convert_expr(e.args[2], graph, src) + setmeta!(SyntaxTree(graph, st_id); nospecialize=true) + return st_id, src + end + elseif e.args[1] in (:inline, :noinline, :generated, :generated_only, + :max_methods, :optlevel, :toplevel, :push_loc, :pop_loc, + :no_constprop, :aggressive_constprop, :specialize, :compile, :infer, + :nospecializeinfer, :force_compile, :propagate_inbounds, :doc) + # TODO: Some need to be handled in lowering + for (i, ma) in enumerate(e.args) + if ma isa Symbol + # @propagate_inbounds becomes (meta inline + # propagate_inbounds), but usually(?) only args[1] is + # converted here + child_exprs[i] = Expr(:quoted_symbol, e.args[i]) + end + end + else + # Can't throw a hard error; it is explicitly tested that meta can take arbitrary keys. + @error("Unknown meta form at $src: `$e`\n$(sprint(dump, e))") + child_exprs[1] = Expr(:quoted_symbol, e.args[1]) + end + end + elseif e.head === :scope_layer + @assert nargs === 2 + @assert e.args[1] isa Symbol + @assert e.args[2] isa LayerId + st_id, src = _insert_convert_expr(e.args[1], graph, src) + setattr!(graph, st_id, scope_layer=e.args[2]) + return st_id, src + elseif e.head === :symbolicgoto || e.head === :symboliclabel + @assert nargs === 1 + st_k = e.head === :symbolicgoto ? K"symbolic_label" : K"symbolic_goto" + st_attrs[:name_val] = string(e.args[1]) + child_exprs = nothing + elseif e.head in (:inline, :noinline) + @assert nargs === 1 && e.args[1] isa Bool + # TODO: JuliaLowering doesn't accept this (non-:meta) form yet + st_k = K"TOMBSTONE" + child_exprs = nothing + elseif e.head === :inbounds + @assert nargs === 1 && typeof(e.args[1]) in (Symbol, Bool) + # TODO: JuliaLowering doesn't accept this form yet + st_k = K"TOMBSTONE" + child_exprs = nothing + elseif e.head === :core + @assert nargs === 1 + @assert e.args[1] isa Symbol + st_attrs[:name_val] = string(e.args[1]) + child_exprs = nothing + elseif e.head === :islocal || e.head === :isglobal + st_k = K"extension" + child_exprs = [Expr(:quoted_symbol, e.head), e.args[1]] + elseif e.head === :block && nargs >= 1 && + e.args[1] isa Expr && e.args[1].head === :softscope + # (block (softscope true) ex) produced with every REPL prompt. + # :hardscope exists too, but should just be a let, and appears to be + # unused in the wild. + ensure_attributes!(graph; scope_type=Symbol) + st_k = K"scope_block" + st_attrs[:scope_type] = :soft + child_exprs = e.args[2:end] + end + + #--------------------------------------------------------------------------- + # Possibly-temporary heads introduced by us converting the parent expr + if e.head === :macro_name + @assert nargs === 1 + # Trim `@` for a correct SyntaxTree, although we need to add it back + # later for finding the macro + if e.args[1] === :(.) + mac_name = string(e.args[1][2]) + mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end] + child_exprs[1] = Expr(:(.), e.args[1][1], Symbol(mac_name)) + else + mac_name = string(e.args[1]) + mac_name = mac_name == "@__dot__" ? "." : mac_name[2:end] + child_exprs[1] = Symbol(mac_name) + end + elseif e.head === :catch_var_placeholder + st_k = K"Placeholder" + st_attrs[:name_val] = "" + child_exprs = nothing + elseif e.head === :quoted_symbol + st_k = K"Symbol" + st_attrs[:name_val] = String(e.args[1]) + child_exprs = nothing + elseif e.head === :do_lambda + st_k = K"do" + end + + #--------------------------------------------------------------------------- + # Throw if this function isn't complete. Finally, insert a new node into the + # graph and recurse on child_exprs + if st_k === K"None" + error("Unknown expr head at $src: `$(e.head)`\n$(sprint(dump, e))") + elseif st_k === K"TOMBSTONE" + return nothing, src + end + + st_id = _insert_tree_node(graph, st_k, src, st_flags; st_attrs...) + + # child_exprs === nothing means we want a leaf. Note that setchildren! with + # an empty list makes a node non-leaf. + if isnothing(child_exprs) + return st_id, src + else + st_child_ids, last_src = _insert_child_exprs(e.head, child_exprs, graph, src) + setchildren!(graph, st_id, st_child_ids) + return st_id, last_src + end +end + +function _insert_child_exprs(head::Symbol, child_exprs::Vector{Any}, + graph::SyntaxGraph, src::SourceAttrType) + st_child_ids = NodeId[] + last_src = src + for (i, c) in enumerate(child_exprs) + c_unwrapped, _ = unwrap_esc(c) + # If c::LineNumberNode is anywhere in a block OR c is not in tail + # position, we don't need to insert `nothing` here + if c_unwrapped isa LineNumberNode && (head === :block || head === :toplevel && i != length(child_exprs)) + last_src = c_unwrapped + else + (c_id, last_src) = _insert_convert_expr(c, graph, last_src) + if !isnothing(c_id) + push!(st_child_ids, c_id) + end + end + end + return st_child_ids, last_src +end diff --git a/JuliaLowering/src/desugaring.jl b/JuliaLowering/src/desugaring.jl new file mode 100644 index 0000000000000..2e9aba254fc1a --- /dev/null +++ b/JuliaLowering/src/desugaring.jl @@ -0,0 +1,4598 @@ +# Lowering Pass 2 - syntax desugaring + +struct DesugaringContext{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + scope_layers::Vector{ScopeLayer} + mod::Module + expr_compat_mode::Bool +end + +function DesugaringContext(ctx, expr_compat_mode::Bool) + graph = ensure_attributes(syntax_graph(ctx), + kind=Kind, syntax_flags=UInt16, + source=SourceAttrType, + value=Any, name_val=String, + scope_type=Symbol, # :hard or :soft + var_id=IdTag, + is_toplevel_thunk=Bool, + toplevel_pure=Bool) + DesugaringContext(graph, + ctx.bindings, + ctx.scope_layers, + current_layer(ctx).mod, + expr_compat_mode) +end + +#------------------------------------------------------------------------------- + +# Return true when `x` and `y` are "the same identifier", but also works with +# bindings (and hence ssa vars). See also `is_identifier_like()` +function is_same_identifier_like(ex::SyntaxTree, y::SyntaxTree) + return (kind(ex) == K"Identifier" && kind(y) == K"Identifier" && NameKey(ex) == NameKey(y)) || + (kind(ex) == K"BindingId" && kind(y) == K"BindingId" && ex.var_id == y.var_id) +end + +function is_same_identifier_like(ex::SyntaxTree, name::AbstractString) + return kind(ex) == K"Identifier" && ex.name_val == name +end + +function contains_identifier(ex::SyntaxTree, idents::AbstractVector{<:SyntaxTree}) + contains_unquoted(ex) do e + any(is_same_identifier_like(e, id) for id in idents) + end +end + +function contains_identifier(ex::SyntaxTree, idents...) + contains_unquoted(ex) do e + any(is_same_identifier_like(e, id) for id in idents) + end +end + +function contains_ssa_binding(ctx, ex) + contains_unquoted(ex) do e + kind(e) == K"BindingId" && lookup_binding(ctx, e).is_ssa + end +end + +# Return true if `f(e)` is true for any unquoted child of `ex`, recursively. +function contains_unquoted(f::Function, ex::SyntaxTree) + if f(ex) + return true + elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta") + return any(contains_unquoted(f, e) for e in children(ex)) + else + return false + end +end + +# Identify some expressions that are safe to repeat +# +# TODO: Can we use this in more places? +function is_effect_free(ex) + k = kind(ex) + # TODO: metas + is_literal(k) || is_identifier_like(ex) || k == K"Symbol" || + k == K"inert" || k == K"top" || k == K"core" || k == K"Value" + # flisp also includes `a.b` with simple `a`, but this seems like a bug + # because this calls the user-defined getproperty? +end + +function check_no_parameters(ex::SyntaxTree, msg) + i = find_parameters_ind(children(ex)) + if i > 0 + throw(LoweringError(ex[i], msg)) + end +end + +function check_no_assignment(exs, msg="misplaced assignment statement in `[ ... ]`") + i = findfirst(kind(e) == K"=" for e in exs) + if !isnothing(i) + throw(LoweringError(exs[i], msg)) + end +end + +#------------------------------------------------------------------------------- +# Destructuring + +# Convert things like `(x,y,z) = (a,b,c)` to assignments, eliminating the +# tuple. Includes support for slurping/splatting. This function assumes that +# `_tuple_sides_match` returns true, so the following have already been +# checked: +# * There's max one `...` on the left hand side +# * There's max one `...` on the right hand side, in the last place, or +# matched with an lhs... in the last place. (required so that +# pairwise-matching terms from the right is valid) +# * Neither side has any key=val terms or parameter blocks +# +# Tuple elimination must act /as if/ the right hand side tuple was first +# constructed followed by destructuring. In particular, any side effects due to +# evaluating the individual terms in the right hand side tuple must happen in +# order. +function tuple_to_assignments(ctx, ex, is_const) + lhs = ex[1] + rhs = ex[2] + wrap(asgn) = is_const ? (@ast ctx ex [K"const" asgn]) : asgn + + # Tuple elimination aims to turn assignments between tuples into lists of assignments. + # + # However, there's a complex interplay of side effects due to the + # individual assignments and these can be surprisingly complicated to + # model. For example `(x[i], y) = (f(), g)` can contain the following + # surprises: + # * `tmp = f()` calls `f` which might throw, or modify the bindings for + # `x` or `y`. + # * `x[i] = tmp` is lowered to `setindex!` which might throw or modify the + # bindings for `x` or `y`. + # * `g` might throw an `UndefVarError` + # + # Thus for correctness we introduce temporaries for all right hand sides + # with observable side effects and ensure they're evaluated in order. + n_lhs = numchildren(lhs) + n_rhs = numchildren(rhs) + stmts = SyntaxList(ctx) + rhs_tmps = SyntaxList(ctx) + for i in 1:n_rhs + rh = rhs[i] + r = if kind(rh) == K"..." + rh[1] + else + rh + end + k = kind(r) + if is_literal(k) || k == K"Symbol" || k == K"inert" || k == K"top" || + k == K"core" || k == K"Value" + # Effect-free and nothrow right hand sides do not need a temporary + # (we require nothrow because the order of rhs terms is observable + # due to sequencing, thus identifiers are not allowed) + else + # Example rhs which need a temporary + # * `f()` - arbitrary side effects to any binding + # * `z` - might throw UndefVarError + tmp = emit_assign_tmp(stmts, ctx, r) + rh = kind(rh) == K"..." ? @ast(ctx, rh, [K"..." tmp]) : tmp + end + push!(rhs_tmps, rh) + end + + il = 0 + ir = 0 + while il < n_lhs + il += 1 + ir += 1 + lh = lhs[il] + if kind(lh) == K"..." + # Exactly one lhs `...` occurs in the middle somewhere, with a + # general rhs which has at least as many non-`...` terms or one + # `...` term at the end. + # Examples: + # (x, ys..., z) = (a, b, c, d) + # (x, ys..., z) = (a, bs...) + # (xs..., y) = (a, bs...) + # (xs...) = (a, b, c) + # in this case we can pairwise-match arguments from the end + # backward and emit a general tuple assignment for the middle. + jl = n_lhs + jr = n_rhs + while jl > il && jr > ir + if kind(lhs[jl]) == K"..." || kind(rhs_tmps[jr]) == K"..." + break + end + jl -= 1 + jr -= 1 + end + middle = emit_assign_tmp(stmts, ctx, + @ast(ctx, rhs, [K"tuple" rhs_tmps[ir:jr]...]), + "rhs_tmp" + ) + if il == jl + # (x, ys...) = (a,b,c) + # (x, ys...) = (a,bs...) + # (ys...) = () + push!(stmts, wrap(@ast ctx ex [K"=" lh[1] middle])) + else + # (x, ys..., z) = (a, b, c, d) + # (x, ys..., z) = (a, bs...) + # (xs..., y) = (a, bs...) + push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:jl]...] middle])) + end + # Continue with the remainder of the list of non-splat terms + il = jl + ir = jr + else + rh = rhs_tmps[ir] + if kind(rh) == K"..." + push!(stmts, wrap(@ast ctx ex [K"=" [K"tuple" lhs[il:end]...] rh[1]])) + break + else + push!(stmts, wrap(@ast ctx ex [K"=" lh rh])) + end + end + end + + @ast ctx ex [K"block" + stmts... + [K"removable" [K"tuple" rhs_tmps...]] + ] +end + +# Create an assignment `$lhs = $rhs` where `lhs` must be "simple". If `rhs` is +# a block, sink the assignment into the last statement of the block to keep +# more expressions at top level. `rhs` should already be expanded. +# +# flisp: sink-assignment +function sink_assignment(ctx, srcref, lhs, rhs) + @assert is_identifier_like(lhs) + if kind(rhs) == K"block" + @ast ctx srcref [K"block" + rhs[1:end-1]... + [K"=" lhs rhs[end]] + ] + else + @ast ctx srcref [K"=" lhs rhs] + end +end + +function _tuple_sides_match(lhs, rhs) + N = max(length(lhs), length(rhs)) + for i = 1:N+1 + if i > length(lhs) + # (x, y) = (a, b) # match + # (x,) = (a, b) # no match + return i > length(rhs) + elseif kind(lhs[i]) == K"..." + # (x, ys..., z) = (a, b) # match + # (x, ys...) = (a,) # match + return true + elseif i > length(rhs) + # (x, y) = (a,) # no match + # (x, y, zs...) = (a,) # no match + return false + elseif kind(rhs[i]) == K"..." + # (x, y) = (as...,) # match + # (x, y, z) = (a, bs...) # match + # (x, y) = (as..., b) # no match + return i == length(rhs) + end + end +end + +# Lower `(lhss...) = rhs` in contexts where `rhs` must be a tuple at runtime +# by assuming that `getfield(rhs, i)` works and is efficient. +function lower_tuple_assignment(ctx, assignment_srcref, lhss, rhs) + stmts = SyntaxList(ctx) + tmp = emit_assign_tmp(stmts, ctx, rhs, "rhs_tmp") + for (i, lh) in enumerate(lhss) + push!(stmts, @ast ctx assignment_srcref [K"=" + lh + [K"call" "getfield"::K"core" tmp i::K"Integer"] + ]) + end + makenode(ctx, assignment_srcref, K"block", stmts) +end + +# Implement destructuring with `lhs` a tuple expression (possibly with +# slurping) and `rhs` a general expression. +# +# Destructuring in this context is done via the iteration interface, though +# calls `Base.indexed_iterate()` to allow for a fast path in cases where the +# right hand side is directly indexable. +function _destructure(ctx, assignment_srcref, stmts, lhs, rhs, is_const) + n_lhs = numchildren(lhs) + if n_lhs > 0 + iterstate = new_local_binding(ctx, rhs, "iterstate") + end + + end_stmts = SyntaxList(ctx) + wrap(asgn) = is_const ? (@ast ctx assignment_srcref [K"const" asgn]) : asgn + + i = 0 + for lh in children(lhs) + i += 1 + if kind(lh) == K"..." + lh1 = if is_identifier_like(lh[1]) && !is_const + lh[1] + else + lhs_tmp = ssavar(ctx, lh[1], "lhs_tmp") + push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh[1] [K"=" lh[1] lhs_tmp]))) + lhs_tmp + end + if i == n_lhs + # Slurping as last lhs, eg, for `zs` in + # (x, y, zs...) = rhs + if kind(lh1) != K"Placeholder" + push!(stmts, expand_forms_2(ctx, + @ast ctx assignment_srcref [K"=" + lh1 + [K"call" + "rest"::K"top" + rhs + if i > 1 + iterstate + end + ] + ] + )) + end + else + # Slurping before last lhs. Eg, for `xs` in + # (xs..., y, z) = rhs + # For this we call + # (xs, tail) = Base.split_rest(...) + # then continue iteration with `tail` as new rhs. + tail = ssavar(ctx, lh, "tail") + push!(stmts, + expand_forms_2(ctx, + lower_tuple_assignment(ctx, + assignment_srcref, + (lh1, tail), + @ast ctx assignment_srcref [K"call" + "split_rest"::K"top" + rhs + (n_lhs - i)::K"Integer" + if i > 1 + iterstate + end + ] + ) + ) + ) + rhs = tail + n_lhs = n_lhs - i + i = 0 + end + else + # Normal case, eg, for `y` in + # (x, y, z) = rhs + lh1 = if is_identifier_like(lh) && !is_const + lh + # elseif is_eventually_call(lh) (TODO??) + else + lhs_tmp = ssavar(ctx, lh, "lhs_tmp") + push!(end_stmts, expand_forms_2(ctx, wrap(@ast ctx lh [K"=" lh lhs_tmp]))) + lhs_tmp + end + push!(stmts, + expand_forms_2(ctx, + lower_tuple_assignment(ctx, + assignment_srcref, + i == n_lhs ? (lh1,) : (lh1, iterstate), + @ast ctx assignment_srcref [K"call" + "indexed_iterate"::K"top" + rhs + i::K"Integer" + if i > 1 + iterstate + end + ] + ) + ) + ) + end + end + # Actual assignments must happen after the whole iterator is desctructured + # (https://github.com/JuliaLang/julia/issues/40574) + append!(stmts, end_stmts) + stmts +end + +# Expands cases of property destructuring +function expand_property_destruct(ctx, ex, is_const) + @assert numchildren(ex) == 2 + lhs = ex[1] + @assert kind(lhs) == K"tuple" + if numchildren(lhs) != 1 + throw(LoweringError(lhs, "Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs`")) + end + params = lhs[1] + @assert kind(params) == K"parameters" + rhs = ex[2] + stmts = SyntaxList(ctx) + rhs1 = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) + for prop in children(params) + propname = kind(prop) == K"Identifier" ? prop : + kind(prop) == K"::" && kind(prop[1]) == K"Identifier" ? prop[1] : + throw(LoweringError(prop, "invalid assignment location")) + push!(stmts, expand_forms_2(ctx, @ast ctx rhs1 [K"=" + prop + [K"call" + "getproperty"::K"top" + rhs1 + propname=>K"Symbol" + ] + ])) + end + push!(stmts, @ast ctx rhs1 [K"removable" rhs1]) + makenode(ctx, ex, K"block", stmts) +end + +# Expands all cases of general tuple destructuring, eg +# (x,y) = (a,b) +function expand_tuple_destruct(ctx, ex, is_const) + lhs = ex[1] + @assert kind(lhs) == K"tuple" + rhs = ex[2] + + num_slurp = 0 + for lh in children(lhs) + num_slurp += (kind(lh) == K"...") + if num_slurp > 1 + throw(LoweringError(lh, "multiple `...` in destructuring assignment are ambiguous")) + end + end + + if kind(rhs) == K"tuple" + num_splat = sum(kind(rh) == K"..." for rh in children(rhs); init=0) + if num_splat == 0 && (numchildren(lhs) - num_slurp) > numchildren(rhs) + throw(LoweringError(ex, "More variables on left hand side than right hand in tuple assignment")) + end + + if !any_assignment(children(rhs)) && !has_parameters(rhs) && + _tuple_sides_match(children(lhs), children(rhs)) + return expand_forms_2(ctx, tuple_to_assignments(ctx, ex, is_const)) + end + end + + stmts = SyntaxList(ctx) + rhs1 = if is_ssa(ctx, rhs) || + (is_identifier_like(rhs) && + !any(is_same_identifier_like(kind(l) == K"..." ? l[1] : l, rhs) + for l in children(lhs))) + rhs + else + emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs)) + end + _destructure(ctx, ex, stmts, lhs, rhs1, is_const) + push!(stmts, @ast ctx rhs1 [K"removable" rhs1]) + makenode(ctx, ex, K"block", stmts) +end + +#------------------------------------------------------------------------------- +# Expand comparison chains + +function expand_scalar_compare_chain(ctx, srcref, terms, i) + comparisons = nothing + while i + 2 <= length(terms) + lhs = terms[i] + op = terms[i+1] + rhs = terms[i+2] + if kind(op) == K"." + break + end + comp = @ast ctx op [K"call" + op + lhs + rhs + ] + if isnothing(comparisons) + comparisons = comp + else + comparisons = @ast ctx srcref [K"&&" + comparisons + comp + ] + end + i += 2 + end + (comparisons, i) +end + +# Expanding comparison chains: (comparison a op b op c ...) +# +# We use && to combine pairs of adjacent scalar comparisons and .& to combine +# vector-vector and vector-scalar comparisons. Combining scalar comparisons are +# treated as having higher precedence than vector comparisons, thus: +# +# a < b < c ==> (a < b) && (b < c) +# a .< b .< c ==> (a .< b) .& (b .< c) +# a < b < c .< d .< e ==> (a < b && b < c) .& (c .< d) .& (d .< e) +# a .< b .< c < d < e ==> (a .< b) .& (b .< c) .& (c < d && d < e) +function expand_compare_chain(ctx, ex) + @assert kind(ex) == K"comparison" + terms = children(ex) + @chk numchildren(ex) >= 3 + @chk isodd(numchildren(ex)) + i = 1 + comparisons = nothing + # Combine any number of dotted comparisons + while i + 2 <= length(terms) + if kind(terms[i+1]) != K"." + (comp, i) = expand_scalar_compare_chain(ctx, ex, terms, i) + else + lhs = terms[i] + op = terms[i+1] + rhs = terms[i+2] + i += 2 + comp = @ast ctx op [K"dotcall" + op[1] + lhs + rhs + ] + end + if isnothing(comparisons) + comparisons = comp + else + comparisons = @ast ctx ex [K"dotcall" + "&"::K"top" + # ^^ NB: Flisp bug. Flisp lowering essentially does + # adopt_scope("&"::K"Identifier", ctx.mod) + # here which seems wrong if the comparison chain arose from + # a macro in a different module. One fix would be to use + # adopt_scope("&"::K"Identifier", ex) + # to get the module of the comparison expression for the + # `&` operator. But a simpler option is probably to always + # use `Base.&` so we do that. + comparisons + comp + ] + end + end + comparisons +end + +#------------------------------------------------------------------------------- +# Expansion of array indexing +function _arg_to_temp(ctx, stmts, ex, eq_is_kw=false) + k = kind(ex) + if is_effect_free(ex) + ex + elseif k == K"..." + @ast ctx ex [k _arg_to_temp(ctx, stmts, ex[1])] + elseif k == K"=" && eq_is_kw + @ast ctx ex [K"=" ex[1] _arg_to_temp(ex[2])] + else + emit_assign_tmp(stmts, ctx, ex) + end +end + +# Make the *arguments* of an expression safe for multiple evaluation, for +# example +# +# a[f(x)] => (temp=f(x); a[temp]) +# +# Any assignments are added to `stmts` and a result expression returned which +# may be used in further desugaring. +function remove_argument_side_effects(ctx, stmts, ex) + if is_literal(ex) || is_identifier_like(ex) + ex + else + k = kind(ex) + if k == K"let" + emit_assign_tmp(stmts, ctx, ex) + else + args = SyntaxList(ctx) + eq_is_kw = ((k == K"call" || k == K"dotcall") && is_prefix_call(ex)) || k == K"ref" + for (i,e) in enumerate(children(ex)) + push!(args, _arg_to_temp(ctx, stmts, e, eq_is_kw && i > 1)) + end + # TODO: Copy attributes? + @ast ctx ex [k args...] + end + end +end + +# Replace any `begin` or `end` symbols with an expression indexing the array +# `arr` in the `n`th index. `splats` are a list of the splatted arguments that +# precede index `n` `is_last` is true when this is this +# last index +function replace_beginend(ctx, ex, arr, n, splats, is_last) + k = kind(ex) + if k == K"Identifier" && ex.name_val in ("begin", "end") + indexfunc = @ast ctx ex (ex.name_val == "begin" ? "firstindex" : "lastindex")::K"top" + if length(splats) == 0 + if is_last && n == 1 + @ast ctx ex [K"call" indexfunc arr] + else + @ast ctx ex [K"call" indexfunc arr n::K"Integer"] + end + else + splat_lengths = SyntaxList(ctx) + for splat in splats + push!(splat_lengths, @ast ctx ex [K"call" "length"::K"top" splat]) + end + @ast ctx ex [K"call" + indexfunc + arr + [K"call" + "+"::K"top" + (n - length(splats))::K"Integer" + splat_lengths... + ] + ] + end + elseif is_leaf(ex) || is_quoted(ex) + ex + elseif k == K"ref" + # inside ref, only replace within the first argument + @ast ctx ex [k + replace_beginend(ctx, ex[1], arr, n, splats, is_last) + ex[2:end]... + ] + # elseif k == K"kw" - keyword args - what does this mean here? + # # note from flisp + # # TODO: this probably should not be allowed since keyword args aren't + # # positional, but in this context we have just used their positions anyway + else + mapchildren(e->replace_beginend(ctx, e, arr, n, splats, is_last), ctx, ex) + end +end + +# Go through indices and replace the `begin` or `end` symbol +# `arr` - array being indexed +# `idxs` - list of indices +# returns the expanded indices. Any statements that need to execute first are +# added to ctx.stmts. +function process_indices(sctx::StatementListCtx, arr, idxs) + has_splats = any(kind(i) == K"..." for i in idxs) + idxs_out = SyntaxList(sctx) + splats = SyntaxList(sctx) + for (n, idx0) in enumerate(idxs) + is_splat = kind(idx0) == K"..." + val = replace_beginend(sctx, is_splat ? idx0[1] : idx0, + arr, n, splats, n == length(idxs)) + # TODO: kwarg? + idx = !has_splats || is_simple_atom(sctx, val) ? val : emit_assign_tmp(sctx, val) + if is_splat + push!(splats, idx) + end + push!(idxs_out, is_splat ? @ast(sctx, idx0, [K"..." idx]) : idx) + end + return idxs_out +end + +# Expand things like `f()[i,end]`, add to `sctx.stmts` (temporaries for +# computing indices) and return +# * `arr` - The array (may be a temporary ssa value) +# * `idxs` - List of indices +function expand_ref_components(sctx::StatementListCtx, ex) + check_no_parameters(ex, "unexpected semicolon in array expression") + @assert kind(ex) == K"ref" + @chk numchildren(ex) >= 1 + arr = ex[1] + idxs = ex[2:end] + if any(contains_identifier(e, "begin", "end") for e in idxs) + arr = emit_assign_tmp(sctx, arr) + end + new_idxs = process_indices(sctx, arr, idxs) + return (arr, new_idxs) +end + +function expand_setindex(ctx, ex) + @assert kind(ex) == K"=" && numchildren(ex) == 2 + lhs = ex[1] + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, lhs) + rhs = emit_assign_tmp(sctx, ex[2]) + @ast ctx ex [K"block" + sctx.stmts... + expand_forms_2(ctx, [K"call" + "setindex!"::K"top" + arr + rhs + idxs... + ]) + [K"removable" rhs] + ] +end + +#------------------------------------------------------------------------------- +# Expansion of broadcast notation `f.(x .+ y)` + +function expand_dotcall(ctx, ex) + k = kind(ex) + if k == K"dotcall" + @chk numchildren(ex) >= 1 + farg = ex[1] + args = SyntaxList(ctx) + append!(args, ex[2:end]) + kws = remove_kw_args!(ctx, args) + @ast ctx ex [K"call" + (isnothing(kws) ? "broadcasted" : "broadcasted_kwsyntax")::K"top" + farg # todo: What about (z=f).(x,y) ? + (expand_dotcall(ctx, arg) for arg in args)... + if !isnothing(kws) + [K"parameters" + kws... + ] + end + ] + elseif k == K"comparison" + expand_dotcall(ctx, expand_compare_chain(ctx, ex)) + elseif k == K".&&" || k == K".||" + @ast ctx ex [K"call" + "broadcasted"::K"top" + (k == K".&&" ? "andand" : "oror")::K"top" + (expand_dotcall(ctx, arg) for arg in children(ex))... + ] + else + ex + end +end + +function expand_fuse_broadcast(ctx, ex) + if kind(ex) == K".=" || kind(ex) == K".op=" + @chk numchildren(ex) == 2 + lhs = ex[1] + kl = kind(lhs) + rhs = expand_dotcall(ctx, ex[2]) + @ast ctx ex [K"call" + "materialize!"::K"top" + if kl == K"ref" + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, lhs) + [K"block" + sctx.stmts... + [K"call" + "dotview"::K"top" + arr + idxs... + ] + ] + elseif kl == K"." && numchildren(lhs) == 2 + [K"call" + "dotgetproperty"::K"top" + children(lhs)... + ] + else + lhs + end + if !(kind(rhs) == K"call" && kind(rhs[1]) == K"top" && rhs[1].name_val == "broadcasted") + # Ensure the rhs of .= is always wrapped in a call to `broadcasted()` + [K"call"(rhs) + "broadcasted"::K"top" + "identity"::K"top" + rhs + ] + else + rhs + end + ] + else + @ast ctx ex [K"call" + "materialize"::K"top" + expand_dotcall(ctx, ex) + ] + end +end + +#------------------------------------------------------------------------------- +# Expansion of generators and comprehensions + +# Return any subexpression which is a 'return` statement, not including any +# inside quoted sections or method bodies. +function find_return(ex::SyntaxTree) + if kind(ex) == K"return" + return ex + elseif !is_leaf(ex) && !(kind(ex) in KSet"quote inert meta function ->") + for e in children(ex) + r = find_return(e) + if !isnothing(r) + return r + end + end + else + return nothing + end +end + +function check_no_return(ex) + r = find_return(ex) + if !isnothing(r) + throw(LoweringError(r, "`return` not allowed inside comprehension or generator")) + end +end + +# Return true for nested tuples of the same identifiers +function similar_tuples_or_identifiers(a, b) + if kind(a) == K"tuple" && kind(b) == K"tuple" + return numchildren(a) == numchildren(b) && + all( ((x,y),)->similar_tuples_or_identifiers(x,y), + zip(children(a), children(b))) + else + is_same_identifier_like(a,b) + end +end + +# Return the anonymous function taking an iterated value, for use with the +# first argument to `Base.Generator` +function func_for_generator(ctx, body, iter_value_destructuring) + if similar_tuples_or_identifiers(iter_value_destructuring, body) + # Use Base.identity for generators which are filters such as + # `(x for x in xs if f(x))`. This avoids creating a new type. + @ast ctx body "identity"::K"top" + else + @ast ctx body [K"->" + [K"tuple" + iter_value_destructuring + ] + [K"block" + body + ] + ] + end +end + +function expand_generator(ctx, ex) + @chk numchildren(ex) >= 2 + body = ex[1] + check_no_return(body) + if numchildren(ex) > 2 + # Uniquify outer vars by NameKey + outervars_by_key = Dict{NameKey,typeof(ex)}() + for iterspecs in ex[2:end-1] + for iterspec in children(iterspecs) + foreach_lhs_name(iterspec[1]) do var + @assert kind(var) == K"Identifier" # Todo: K"BindingId"? + outervars_by_key[NameKey(var)] = var + end + end + end + outervar_assignments = SyntaxList(ctx) + for (k,v) in sort(collect(pairs(outervars_by_key)), by=first) + push!(outervar_assignments, @ast ctx v [K"=" v v]) + end + body = @ast ctx ex [K"let" + [K"block" + outervar_assignments... + ] + [K"block" + body + ] + ] + end + for iterspecs_ind in numchildren(ex):-1:2 + iterspecs = ex[iterspecs_ind] + filter_test = nothing + if kind(iterspecs) == K"filter" + filter_test = iterspecs[2] + iterspecs = iterspecs[1] + end + if kind(iterspecs) != K"iteration" + throw(LoweringError("""Expected `K"iteration"` iteration specification in generator""")) + end + iter_ranges = SyntaxList(ctx) + iter_lhss = SyntaxList(ctx) + for iterspec in children(iterspecs) + @chk kind(iterspec) == K"in" + @chk numchildren(iterspec) == 2 + push!(iter_lhss, iterspec[1]) + push!(iter_ranges, iterspec[2]) + end + iter_value_destructuring = if numchildren(iterspecs) == 1 + iterspecs[1][1] + else + iter_lhss = SyntaxList(ctx) + for iterspec in children(iterspecs) + push!(iter_lhss, iterspec[1]) + end + @ast ctx iterspecs [K"tuple" iter_lhss...] + end + iter = if length(iter_ranges) > 1 + @ast ctx iterspecs [K"call" + "product"::K"top" + iter_ranges... + ] + else + iter_ranges[1] + end + if !isnothing(filter_test) + iter = @ast ctx ex [K"call" + "Filter"::K"top" + func_for_generator(ctx, filter_test, iter_value_destructuring) + iter + ] + end + body = @ast ctx ex [K"call" + "Generator"::K"top" + func_for_generator(ctx, body, iter_value_destructuring) + iter + ] + if iterspecs_ind < numchildren(ex) + body = @ast ctx ex [K"call" + "Flatten"::K"top" + body + ] + end + end + body +end + +function expand_comprehension_to_loops(ctx, ex) + @assert kind(ex) == K"typed_comprehension" + element_type = ex[1] + gen = ex[2] + @assert kind(gen) == K"generator" + body = gen[1] + check_no_return(body) + # TODO: check_no_break_continue + iterspecs = gen[2] + @assert kind(iterspecs) == K"iteration" + new_iterspecs = SyntaxList(ctx) + iters = SyntaxList(ctx) + iter_defs = SyntaxList(ctx) + for iterspec in children(iterspecs) + iter = emit_assign_tmp(iter_defs, ctx, iterspec[2], "iter") + push!(iters, iter) + push!(new_iterspecs, @ast ctx iterspec [K"in" iterspec[1] iter]) + end + # Lower to nested for loops + idx = new_local_binding(ctx, iterspecs, "idx") + @ast ctx ex [K"block" + iter_defs... + full_iter := if length(iters) == 1 + iters[1] + else + [K"call" + "product"::K"top" + iters... + ] + end + iter_size := [K"call" "IteratorSize"::K"top" full_iter] + size_unknown := [K"call" "isa"::K"core" iter_size "SizeUnknown"::K"top"] + result := [K"call" "_array_for"::K"top" element_type full_iter iter_size] + [K"=" idx [K"call" "first"::K"top" [K"call" "LinearIndices"::K"top" result]]] + [K"for" [K"iteration" Iterators.reverse(new_iterspecs)...] + [K"block" + val := body + # TODO: inbounds setindex + [K"if" size_unknown + [K"call" "push!"::K"top" result val] + [K"call" "setindex!"::K"top" result val idx] + ] + #[K"call" "println"::K"top" [K"call" "typeof"::K"core" idx]] + [K"=" idx [K"call" "add_int"::K"top" idx 1::K"Integer"]] + ] + ] + result + ] +end + +# Mimics native lowerer's tuple-wrap function (julia-syntax.scm:2723-2736) +# Unwraps only ONE layer of `...` and wraps sequences of non-splat args in tuples. +# Example: `[a, b, xs..., c]` -> `[tuple(a, b), xs, tuple(c)]` +function _wrap_unsplatted_args(ctx, call_ex, args) + result = SyntaxList(ctx) + non_splat_run = SyntaxList(ctx) + for arg in args + if kind(arg) == K"..." + # Flush any accumulated non-splat args + if !isempty(non_splat_run) + push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...]) + non_splat_run = SyntaxList(ctx) + end + # Unwrap only ONE layer of `...` (corresponds to (cadr x) in native lowerer) + push!(result, arg[1]) + else + # Accumulate non-splat args + push!(non_splat_run, arg) + end + end + # Flush any remaining non-splat args + if !isempty(non_splat_run) + push!(result, @ast ctx call_ex [K"call" "tuple"::K"core" non_splat_run...]) + end + result +end + +function expand_splat(ctx, ex, topfunc, args) + # Matches native lowerer's algorithm + # https://github.com/JuliaLang/julia/blob/f362f47338de099cdeeb1b2d81b3ec1948443274/src/julia-syntax.scm#L2761-2762: + # 1. Unwrap one layer of `...` from each argument (via _wrap_unsplatted_args) + # 2. Create `_apply_iterate(iterate, f, wrapped_args...)` WITHOUT expanding args yet + # 3. Recursively expand the entire call - if any wrapped_arg still contains `...`, + # the recursive expansion will handle it, naturally building nested structure + # + # Example: tuple((xs...)...) recursion: + # Pass 1: unwrap outer `...` -> _apply_iterate(iterate, tuple, (xs...)) + # Pass 2: expand sees (xs...) in call context, unwraps again + # -> _apply_iterate(iterate, _apply_iterate, tuple(iterate, tuple), xs) + + wrapped_args = _wrap_unsplatted_args(ctx, ex, args) + + # Construct the unevaluated _apply_iterate call + result = @ast ctx ex [K"call" + "_apply_iterate"::K"core" + "iterate"::K"top" + topfunc + wrapped_args... + ] + + # Recursively expand the entire call (matching native's expand-forms) + return expand_forms_2(ctx, result) +end + +function expand_array(ctx, ex, topfunc) + args = children(ex) + check_no_assignment(args) + topfunc = @ast ctx ex topfunc::K"top" + if any(kind(arg) == K"..." for arg in args) + expand_splat(ctx, ex, topfunc, args) + else + @ast ctx ex [K"call" + topfunc + expand_forms_2(ctx, args)... + ] + end +end + +#------------------------------------------------------------------------------- +# Expansion of array concatenation notation `[a b ; c d]` etc + +function expand_vcat(ctx, ex) + check_no_parameters(ex, "unexpected semicolon in array expression") + check_no_assignment(children(ex)) + had_row = false + had_row_splat = false + is_typed = kind(ex) == K"typed_vcat" + eltype = is_typed ? ex[1] : nothing + elements = is_typed ? ex[2:end] : ex[1:end] + for e in elements + k = kind(e) + if k == K"row" + had_row = true + had_row_splat = had_row_splat || any(kind(e1) == K"..." for e1 in children(e)) + end + end + if had_row_splat + # In case there is splatting inside `hvcat`, collect each row as a + # separate tuple and pass those to `hvcat_rows` instead (ref #38844) + rows = SyntaxList(ctx) + for e in elements + if kind(e) == K"row" + push!(rows, @ast ctx e [K"tuple" children(e)...]) + else + push!(rows, @ast ctx e [K"tuple" e]) + end + end + fname = is_typed ? "typed_hvcat_rows" : "hvcat_rows" + @ast ctx ex [K"call" + fname::K"top" + eltype + rows... + ] + else + row_sizes = SyntaxList(ctx) + flat_elems = SyntaxList(ctx) + for e in elements + if kind(e) == K"row" + rowsize = numchildren(e) + append!(flat_elems, children(e)) + else + rowsize = 1 + push!(flat_elems, e) + end + push!(row_sizes, @ast ctx e rowsize::K"Integer") + end + if had_row + fname = is_typed ? "typed_hvcat" : "hvcat" + @ast ctx ex [K"call" + fname::K"top" + eltype + [K"tuple" row_sizes...] + flat_elems... + ] + else + fname = is_typed ? "typed_vcat" : "vcat" + @ast ctx ex [K"call" + fname::K"top" + eltype + flat_elems... + ] + end + end +end + +function ncat_contains_row(ex) + k = kind(ex) + if k == K"row" + return true + elseif k == K"nrow" + return any(ncat_contains_row(e) for e in children(ex)) + else + return false + end +end + +# flip first and second dimension for row major layouts +function nrow_flipdim(row_major, d) + return !row_major ? d : + d == 1 ? 2 : + d == 2 ? 1 : d +end + +function flatten_ncat_rows!(flat_elems, nrow_spans, row_major, parent_layout_dim, ex) + # Note that most of the checks for valid nesting here are also checked in + # the parser - they can only fail when nrcat is constructed + # programmatically (eg, by a macro). + k = kind(ex) + if k == K"row" + layout_dim = 1 + @chk parent_layout_dim != 1 (ex,"Badly nested rows in `ncat`") + elseif k == K"nrow" + dim = numeric_flags(ex) + @chk dim > 0 (ex,"Unsupported dimension $dim in ncat") + @chk !row_major || dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`") + layout_dim = nrow_flipdim(row_major, dim) + elseif kind(ex) == K"..." + throw(LoweringError(ex, "Splatting ... in an `ncat` with multiple dimensions is not supported")) + else + push!(flat_elems, ex) + for ld in parent_layout_dim-1:-1:1 + push!(nrow_spans, (ld, 1)) + end + return + end + row_start = length(flat_elems) + @chk parent_layout_dim > layout_dim (ex, "Badly nested rows in `ncat`") + for e in children(ex) + if layout_dim == 1 + @chk kind(e) ∉ KSet"nrow row" (e,"Badly nested rows in `ncat`") + end + flatten_ncat_rows!(flat_elems, nrow_spans, row_major, layout_dim, e) + end + n_elems_in_row = length(flat_elems) - row_start + for ld in parent_layout_dim-1:-1:layout_dim + push!(nrow_spans, (ld, n_elems_in_row)) + end +end + +# ncat comes in various layouts which we need to lower to special cases +# - one dimensional along some dimension +# - balanced column first or row first +# - ragged column first or row first +function expand_ncat(ctx, ex) + is_typed = kind(ex) == K"typed_ncat" + outer_dim = numeric_flags(ex) + @chk outer_dim > 0 (ex,"Unsupported dimension in ncat") + eltype = is_typed ? ex[1] : nothing + elements = is_typed ? ex[2:end] : ex[1:end] + hvncat_name = is_typed ? "typed_hvncat" : "hvncat" + if !any(kind(e) in KSet"row nrow" for e in elements) + # One-dimensional ncat along some dimension + # [a ;;; b ;;; c] + return @ast ctx ex [K"call" + hvncat_name::K"top" + eltype + outer_dim::K"Integer" + elements... + ] + end + # N-dimensional case. May be + # * column first or row first: + # [a;b ;;; c;d] + # [a b ;;; c d] + # * balanced or ragged: + # [a ; b ;;; c ; d] + # [a ; b ;;; c] + row_major = any(ncat_contains_row, elements) + @chk !row_major || outer_dim != 2 (ex,"2D `nrow` cannot be mixed with `row` in `ncat`") + flat_elems = SyntaxList(ctx) + # `ncat` syntax nests lower dimensional `nrow` inside higher dimensional + # ones (with the exception of K"row" when `row_major` is true). Each nrow + # spans a number of elements and we first extract that. + nrow_spans = Vector{Tuple{Int,Int}}() + for e in elements + flatten_ncat_rows!(flat_elems, nrow_spans, row_major, + nrow_flipdim(row_major, outer_dim), e) + end + push!(nrow_spans, (outer_dim, length(flat_elems))) + # Construct the shape specification by postprocessing the flat list of + # spans. + sort!(nrow_spans, by=first) # depends on a stable sort + is_balanced = true + i = 1 + dim_lengths = zeros(outer_dim) + prev_dimspan = 1 + while i <= length(nrow_spans) + layout_dim, dimspan = nrow_spans[i] + while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim + if dimspan != nrow_spans[i][2] + is_balanced = false + break + end + i += 1 + end + is_balanced || break + @assert dimspan % prev_dimspan == 0 + dim_lengths[layout_dim] = dimspan ÷ prev_dimspan + prev_dimspan = dimspan + end + shape_spec = SyntaxList(ctx) + if is_balanced + if row_major + dim_lengths[1], dim_lengths[2] = dim_lengths[2], dim_lengths[1] + end + # For balanced concatenations, the shape is specified by the length + # along each dimension. + for dl in dim_lengths + push!(shape_spec, @ast ctx ex dl::K"Integer") + end + else + # For unbalanced/ragged concatenations, the shape is specified by the + # number of elements in each N-dimensional slice of the array, from layout + # dimension 1 to N. See the documentation for `hvncat` for details. + i = 1 + while i <= length(nrow_spans) + groups_for_dim = Int[] + layout_dim = nrow_spans[i][1] + while i <= length(nrow_spans) && nrow_spans[i][1] == layout_dim + push!(groups_for_dim, nrow_spans[i][2]) + i += 1 + end + push!(shape_spec, + @ast ctx ex [K"tuple" + [i::K"Integer" for i in groups_for_dim]... + ] + ) + end + end + @ast ctx ex [K"call" + hvncat_name::K"top" + eltype + [K"tuple" shape_spec...] + row_major::K"Bool" + flat_elems... + ] +end + +#------------------------------------------------------------------------------- +# Expand assignments + +# Expand UnionAll definitions, eg `X{T} = Y{T,T}` +function expand_unionall_def(ctx, srcref, lhs, rhs, is_const=true) + if numchildren(lhs) <= 1 + throw(LoweringError(lhs, "empty type parameter list in type alias")) + end + name = lhs[1] + expand_forms_2( + ctx, + @ast ctx srcref [K"block" + rr := [K"where" rhs lhs[2:end]...] + [is_const ? K"constdecl" : K"assign_or_constdecl_if_global" name rr] + [K"removable" rr] + ] + ) +end + +# Expand general assignment syntax, including +# * UnionAll definitions +# * Chained assignments +# * Setting of structure fields +# * Assignments to array elements +# * Destructuring +# * Typed variable declarations +function expand_assignment(ctx, ex, is_const=false) + @chk numchildren(ex) == 2 + lhs = ex[1] + rhs = ex[2] + kl = kind(lhs) + if kind(ex) == K"function" + # `const f() = ...` - The `const` here is inoperative, but the syntax + # happened to work in earlier versions, so simply strip `const`. + expand_forms_2(ctx, ex[1]) + elseif kl == K"curly" + expand_unionall_def(ctx, ex, lhs, rhs, is_const) + elseif kind(rhs) == K"=" + # Expand chains of assignments + # a = b = c ==> b=c; a=c + stmts = SyntaxList(ctx) + push!(stmts, lhs) + while kind(rhs) == K"=" + push!(stmts, rhs[1]) + rhs = rhs[2] + end + if is_identifier_like(rhs) + tmp_rhs = nothing + rr = rhs + else + tmp_rhs = ssavar(ctx, rhs, "rhs") + rr = tmp_rhs + end + # In const a = b = c, only a is const + stmts[1] = @ast ctx ex [(is_const ? K"constdecl" : K"=") stmts[1] rr] + for i in 2:length(stmts) + stmts[i] = @ast ctx ex [K"=" stmts[i] rr] + end + if !isnothing(tmp_rhs) + pushfirst!(stmts, @ast ctx ex [K"=" tmp_rhs rhs]) + end + expand_forms_2(ctx, + @ast ctx ex [K"block" + stmts... + [K"removable" rr] + ] + ) + elseif is_identifier_like(lhs) + if is_const + @ast ctx ex [K"block" + rr := expand_forms_2(ctx, rhs) + [K"constdecl" lhs rr] + [K"removable" rr] + ] + else + sink_assignment(ctx, ex, lhs, expand_forms_2(ctx, rhs)) + end + elseif kl == K"." + # a.b = rhs ==> setproperty!(a, :b, rhs) + @chk !is_const (ex, "cannot declare `.` form const") + @chk numchildren(lhs) == 2 + a = lhs[1] + b = lhs[2] + stmts = SyntaxList(ctx) + # TODO: Do we need these first two temporaries? + if !is_identifier_like(a) + a = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, a), "a_tmp") + end + if kind(b) != K"Symbol" + b = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, b), "b_tmp") + end + if !is_identifier_like(rhs) && !is_literal(rhs) + rhs = emit_assign_tmp(stmts, ctx, expand_forms_2(ctx, rhs), "rhs_tmp") + end + @ast ctx ex [K"block" + stmts... + [K"call" "setproperty!"::K"top" a b rhs] + [K"removable" rhs] + ] + elseif kl == K"tuple" + if has_parameters(lhs) + expand_property_destruct(ctx, ex, is_const) + else + expand_tuple_destruct(ctx, ex, is_const) + end + elseif kl == K"ref" + # a[i1, i2] = rhs + @chk !is_const (ex, "cannot declare ref form const") + expand_forms_2(ctx, expand_setindex(ctx, ex)) + elseif kl == K"::" && numchildren(lhs) == 2 + x = lhs[1] + T = lhs[2] + res = if is_const + expand_forms_2(ctx, @ast ctx ex [K"const" + [K"=" + lhs[1] + convert_for_type_decl(ctx, ex, rhs, T, true) + ]]) + elseif is_identifier_like(x) + # Identifier in lhs[1] is a variable type declaration, eg + # x::T = rhs + @ast ctx ex [K"block" + [K"decl" lhs[1] lhs[2]] + is_const ? [K"const" [K"=" lhs[1] rhs]] : [K"=" lhs[1] rhs] + ] + else + # Otherwise just a type assertion, eg + # a[i]::T = rhs ==> (a[i]::T; a[i] = rhs) + # a[f(x)]::T = rhs ==> (tmp = f(x); a[tmp]::T; a[tmp] = rhs) + stmts = SyntaxList(ctx) + l1 = remove_argument_side_effects(ctx, stmts, lhs[1]) + # TODO: What about (f(z),y)::T = rhs? That's broken syntax and + # needs to be detected somewhere but won't be detected here. Maybe + # it shows that remove_argument_side_effects() is not the ideal + # solution here? + # TODO: handle underscore? + @ast ctx ex [K"block" + stmts... + [K"::" l1 lhs[2]] + [K"=" l1 rhs] + ] + end + expand_forms_2(ctx, res) + elseif kl == K"dotcall" + throw(LoweringError(lhs, "invalid dot call syntax on left hand side of assignment")) + elseif kl == K"typed_hcat" + throw(LoweringError(lhs, "invalid spacing in left side of indexed assignment")) + elseif kl == K"typed_vcat" || kl == K"typed_ncat" + throw(LoweringError(lhs, "unexpected `;` in left side of indexed assignment")) + elseif kl == K"vect" || kl == K"hcat" || kl == K"vcat" || kl == K"ncat" + throw(LoweringError(lhs, "use `(a, b) = ...` to assign multiple values")) + else + throw(LoweringError(lhs, "invalid assignment location")) + end +end + +function expand_update_operator(ctx, ex) + k = kind(ex) + dotted = k == K".op=" + + @chk numchildren(ex) == 3 + lhs = ex[1] + op = ex[2] + rhs = ex[3] + + stmts = SyntaxList(ctx) + + declT = nothing + if kind(lhs) == K"::" + # eg `a[i]::T += 1` + declT = lhs[2] + decl_lhs = lhs + lhs = lhs[1] + end + + if kind(lhs) == K"ref" + # eg `a[end] = rhs` + sctx = with_stmts(ctx, stmts) + (arr, idxs) = expand_ref_components(sctx, lhs) + lhs = @ast ctx lhs [K"ref" arr idxs...] + end + + lhs = remove_argument_side_effects(ctx, stmts, lhs) + + if dotted + if !(kind(lhs) == K"ref" || (kind(lhs) == K"." && numchildren(lhs) == 2)) + # `f() .+= rhs` + lhs = emit_assign_tmp(stmts, ctx, lhs) + end + else + if kind(lhs) == K"tuple" && contains_ssa_binding(ctx, lhs) + # If remove_argument_side_effects needed to replace an expression + # with an ssavalue, then it can't be updated by assignment + # (JuliaLang/julia#30062) + throw(LoweringError(lhs, "invalid multiple assignment location")) + end + end + + @ast ctx ex [K"block" + stmts... + [(dotted ? K".=" : K"=") + lhs + [(dotted ? K"dotcall" : K"call") + op + if isnothing(declT) + lhs + else + [K"::"(decl_lhs) lhs declT] + end + rhs + ] + ] + ] +end + +#------------------------------------------------------------------------------- +# Expand logical conditional statements + +# Flatten nested && or || nodes and expand their children +function expand_cond_children(ctx, ex, cond_kind=kind(ex), flat_children=SyntaxList(ctx)) + for e in children(ex) + if kind(e) == cond_kind + expand_cond_children(ctx, e, cond_kind, flat_children) + else + push!(flat_children, expand_forms_2(ctx, e)) + end + end + flat_children +end + +# Expand condition in, eg, `if` or `while` +function expand_condition(ctx, ex) + isblock = kind(ex) == K"block" + test = isblock ? ex[end] : ex + k = kind(test) + if k == K"&&" || k == K"||" + # `||` and `&&` get special lowering so that they compile directly to + # jumps rather than first computing a bool and then jumping. + cs = expand_cond_children(ctx, test) + @assert length(cs) > 1 + test = makenode(ctx, test, k, cs) + else + test = expand_forms_2(ctx, test) + end + if isblock + # Special handling so that the rules for `&&` and `||` can be applied + # to the last statement of a block + @ast ctx ex [K"block" map(e->expand_forms_2(ctx,e), ex[1:end-1])... test] + else + test + end +end + +#------------------------------------------------------------------------------- +# Expand let blocks + +function expand_let(ctx, ex) + @chk numchildren(ex) == 2 + bindings = ex[1] + @chk kind(bindings) == K"block" + blk = ex[2] + scope_type = get(ex, :scope_type, :hard) + if numchildren(bindings) == 0 + return @ast ctx ex [K"scope_block"(scope_type=scope_type) blk] + end + for binding in Iterators.reverse(children(bindings)) + kb = kind(binding) + if is_sym_decl(kb) + blk = @ast ctx ex [K"scope_block"(scope_type=scope_type) + [K"local" binding] + blk + ] + elseif kb == K"=" && numchildren(binding) == 2 + lhs = binding[1] + rhs = binding[2] + kl = kind(lhs) + if kl == K"Identifier" || kl == K"BindingId" + blk = @ast ctx binding [K"block" + tmp := rhs + [K"scope_block"(ex, scope_type=scope_type) + [K"local"(lhs) lhs] + [K"always_defined" lhs] + [K"="(binding) lhs tmp] + blk + ] + ] + elseif kl == K"::" + var = lhs[1] + if !(kind(var) in KSet"Identifier BindingId") + throw(LoweringError(var, "Invalid assignment location in let syntax")) + end + blk = @ast ctx binding [K"block" + tmp := rhs + type := lhs[2] + [K"scope_block"(ex, scope_type=scope_type) + [K"local"(lhs) [K"::" var type]] + [K"always_defined" var] + [K"="(binding) var tmp] + blk + ] + ] + elseif kind(lhs) == K"tuple" + lhs_locals = SyntaxList(ctx) + foreach_lhs_name(lhs) do var + push!(lhs_locals, @ast ctx var [K"local" var]) + push!(lhs_locals, @ast ctx var [K"always_defined" var]) + end + blk = @ast ctx binding [K"block" + tmp := rhs + [K"scope_block"(ex, scope_type=scope_type) + lhs_locals... + [K"="(binding) lhs tmp] + blk + ] + ] + else + throw(LoweringError(lhs, "Invalid assignment location in let syntax")) + end + elseif kind(binding) == K"function" + sig = binding[1] + func_name = assigned_function_name(sig) + if isnothing(func_name) + # Some valid function syntaxes define methods on existing types and + # don't really make sense with let: + # let A.f() = 1 ... end + # let (obj::Callable)() = 1 ... end + throw(LoweringError(sig, "Function signature does not define a local function name")) + end + blk = @ast ctx binding [K"block" + [K"scope_block"(ex, scope_type=scope_type) + [K"local"(func_name) func_name] + [K"always_defined" func_name] + binding + [K"scope_block"(ex, scope_type=scope_type) + # The inside of the block is isolated from the closure, + # which itself can only capture values from the outside. + blk + ] + ] + ] + else + throw(LoweringError(binding, "Invalid binding in let")) + continue + end + end + return blk +end + +#------------------------------------------------------------------------------- +# Expand named tuples + +function _named_tuple_expr(ctx, srcref, names, values) + if isempty(names) + @ast ctx srcref [K"call" "NamedTuple"::K"core"] + else + @ast ctx srcref [K"call" + [K"curly" "NamedTuple"::K"core" [K"tuple" names...]] + # NOTE: don't use `tuple` head, so an assignment expression as a value + # doesn't turn this into another named tuple. + [K"call" "tuple"::K"core" values...] + ] + end +end + +function _merge_named_tuple(ctx, srcref, old, new) + if isnothing(old) + new + else + @ast ctx srcref [K"call" "merge"::K"top" old new] + end +end + +function expand_named_tuple(ctx, ex, kws; + field_name="named tuple field", + element_name="named tuple element") + name_strs = Set{String}() + names = SyntaxList(ctx) + values = SyntaxList(ctx) + current_nt = nothing + for (i,kw) in enumerate(kws) + k = kind(kw) + appended_nt = nothing + name = nothing + if kind(k) == K"Identifier" + # x ==> x = x + name = to_symbol(ctx, kw) + value = kw + elseif k == K"=" + # x = a + if kind(kw[1]) != K"Identifier" && kind(kw[1]) != K"Placeholder" + throw(LoweringError(kw[1], "invalid $field_name name")) + end + if kind(kw[2]) == K"..." + throw(LoweringError(kw[2], "`...` cannot be used in a value for a $field_name")) + end + name = to_symbol(ctx, kw[1]) + value = kw[2] + elseif k == K"." + # a.x ==> x=a.x + if kind(kw[2]) != K"Symbol" + throw(LoweringError(kw, "invalid $element_name")) + end + name = to_symbol(ctx, kw[2]) + value = kw + elseif k == K"call" && is_infix_op_call(kw) && numchildren(kw) == 3 && + is_same_identifier_like(kw[1], "=>") + # a=>b ==> $a=b + appended_nt = _named_tuple_expr(ctx, kw, (kw[2],), (kw[3],)) + nothing, nothing + elseif k == K"..." + # args... ==> splat pairs + appended_nt = kw[1] + if isnothing(current_nt) && isempty(names) + # Must call merge to create NT from an initial splat + current_nt = _named_tuple_expr(ctx, ex, (), ()) + end + nothing, nothing + else + throw(LoweringError(kw, "Invalid $element_name")) + end + if !isnothing(name) + if kind(name) == K"Symbol" + name_str = name.name_val + if name_str in name_strs + throw(LoweringError(name, "Repeated $field_name name")) + end + push!(name_strs, name_str) + end + push!(names, name) + push!(values, value) + end + if !isnothing(appended_nt) + if !isempty(names) + current_nt = _merge_named_tuple(ctx, ex, current_nt, + _named_tuple_expr(ctx, ex, names, values)) + empty!(names) + empty!(values) + end + current_nt = _merge_named_tuple(ctx, ex, current_nt, appended_nt) + end + end + if !isempty(names) || isnothing(current_nt) + current_nt = _merge_named_tuple(ctx, ex, current_nt, + _named_tuple_expr(ctx, ex, names, values)) + end + @assert !isnothing(current_nt) + current_nt +end + +#------------------------------------------------------------------------------- +# Call expansion + +function expand_kw_call(ctx, srcref, farg, args, kws) + @ast ctx srcref [K"block" + func := farg + kw_container := expand_named_tuple(ctx, srcref, kws; + field_name="keyword argument", + element_name="keyword argument") + if all(kind(kw) == K"..." for kw in kws) + # In this case need to check kws nonempty at runtime + [K"if" + [K"call" "isempty"::K"top" kw_container] + [K"call" func args...] + [K"call" "kwcall"::K"core" kw_container func args...] + ] + else + [K"call" "kwcall"::K"core" kw_container func args...] + end + ] +end + +# Expand the (sym,lib) argument to ccall/cglobal +function expand_C_library_symbol(ctx, ex) + expanded = expand_forms_2(ctx, ex) + if kind(ex) == K"tuple" + expanded = @ast ctx ex [K"static_eval"(meta=name_hint("function name and library expression")) + expanded + ] + end + return expanded +end + +function expand_ccall(ctx, ex) + @assert kind(ex) == K"call" && is_core_ref(ex[1], "ccall") + if numchildren(ex) < 4 + throw(LoweringError(ex, "too few arguments to ccall")) + end + cfunc_name = ex[2] + # Detect calling convention if present. + known_conventions = ("cdecl", "stdcall", "fastcall", "thiscall", "llvmcall") + cconv = if any(is_same_identifier_like(ex[3], id) for id in known_conventions) + ex[3] + end + if isnothing(cconv) + rt_idx = 3 + else + rt_idx = 4 + if numchildren(ex) < 5 + throw(LoweringError(ex, "too few arguments to ccall with calling convention specified")) + end + end + return_type = ex[rt_idx] + arg_type_tuple = ex[rt_idx+1] + args = ex[rt_idx+2:end] + if kind(arg_type_tuple) != K"tuple" + msg = "ccall argument types must be a tuple; try `(T,)`" + if kind(return_type) == K"tuple" + throw(LoweringError(return_type, msg*" and check if you specified a correct return type")) + else + throw(LoweringError(arg_type_tuple, msg)) + end + end + arg_types = children(arg_type_tuple) + vararg_type = nothing + num_required_args = length(arg_types) + if length(arg_types) >= 1 + va = arg_types[end] + if kind(va) == K"..." + @chk numchildren(va) == 1 + # Ok: vararg function + vararg_type = va + if length(arg_types) <= 1 + throw(LoweringError(vararg_type, "C ABI prohibits vararg without one required argument")) + else + num_required_args = length(arg_types) - 1 + end + end + end + # todo: use multi-range errors here + if length(args) < num_required_args + throw(LoweringError(ex, "Too few arguments in ccall compared to argument types")) + elseif length(args) > length(arg_types) && isnothing(vararg_type) + throw(LoweringError(ex, "More arguments than types in ccall")) + end + sctx = with_stmts(ctx) + expanded_types = SyntaxList(ctx) + for (i, argt) in enumerate(arg_types) + if kind(argt) == K"..." + if i == length(arg_types) + argt = argt[1] + else + throw(LoweringError(argt, "only the trailing ccall argument type should have `...`")) + end + end + if is_same_identifier_like(argt, "Any") + # Special rule: Any becomes core.Any regardless of the module + # scope, and don't need GC roots. + argt = @ast ctx argt "Any"::K"core" + end + push!(expanded_types, expand_forms_2(ctx, argt)) + end + # + # An improvement might be wrap the use of types in cconvert in a special + # K"global_scope" expression which modifies the scope resolution. This + # would at least make the rules self consistent if not pretty. + # + # One small improvement we make here is to emit temporaries for all the + # types used during expansion so at least we don't have their side effects + # more than once. + types_for_conv = SyntaxList(ctx) + for argt in expanded_types + push!(types_for_conv, emit_assign_tmp(sctx, argt)) + end + gc_roots = SyntaxList(ctx) + unsafe_args = SyntaxList(ctx) + for (i,arg) in enumerate(args) + if i > length(expanded_types) + raw_argt = expanded_types[end] + push!(expanded_types, raw_argt) + argt = types_for_conv[end] + else + raw_argt = expanded_types[i] + argt = types_for_conv[i] + end + exarg = expand_forms_2(ctx, arg) + if is_core_Any(raw_argt) + push!(unsafe_args, exarg) + else + cconverted_arg = emit_assign_tmp(sctx, + @ast ctx argt [K"call" + "cconvert"::K"top" + argt + exarg + ] + ) + push!(gc_roots, cconverted_arg) + push!(unsafe_args, + @ast ctx argt [K"call" + "unsafe_convert"::K"top" + argt + cconverted_arg + ] + ) + end + end + @ast ctx ex [K"block" + sctx.stmts... + [K"foreigncall" + expand_C_library_symbol(ctx, cfunc_name) + [K"static_eval"(meta=name_hint("ccall return type")) + expand_forms_2(ctx, return_type) + ] + [K"static_eval"(meta=name_hint("ccall argument type")) + [K"call" + "svec"::K"core" + expanded_types... + ] + ] + (isnothing(vararg_type) ? 0 : num_required_args)::K"Integer" + if isnothing(cconv) + "ccall"::K"Symbol" + else + cconv=>K"Symbol" + end + unsafe_args... + gc_roots... # GC roots + ] + ] +end + +function remove_kw_args!(ctx, args::SyntaxList) + kws = nothing + j = 0 + num_parameter_blocks = 0 + for i in 1:length(args) + arg = args[i] + k = kind(arg) + if k == K"=" + if isnothing(kws) + kws = SyntaxList(ctx) + end + push!(kws, arg) + elseif k == K"parameters" + num_parameter_blocks += 1 + if num_parameter_blocks > 1 + throw(LoweringError(arg, "Cannot have more than one group of keyword arguments separated with `;`")) + end + if numchildren(arg) == 0 + continue # ignore empty parameters (issue #18845) + end + if isnothing(kws) + kws = SyntaxList(ctx) + end + append!(kws, children(arg)) + else + j += 1 + if j < i + args[j] = args[i] + end + end + end + resize!(args, j) + return kws +end + +function expand_call(ctx, ex) + farg = ex[1] + if is_core_ref(farg, "ccall") + return expand_ccall(ctx, ex) + elseif is_core_ref(farg, "cglobal") + @chk numchildren(ex) in 2:3 (ex, "cglobal must have one or two arguments") + return @ast ctx ex [K"call" + ex[1] + expand_C_library_symbol(ctx, ex[2]) + if numchildren(ex) == 3 + expand_forms_2(ctx, ex[3]) + end + ] + end + args = copy(ex[2:end]) + kws = remove_kw_args!(ctx, args) + if !isnothing(kws) + return expand_forms_2(ctx, expand_kw_call(ctx, ex, farg, args, kws)) + end + if any(kind(arg) == K"..." for arg in args) + # Splatting, eg, `f(a, xs..., b)` + expand_splat(ctx, ex, expand_forms_2(ctx, farg), args) + elseif kind(farg) == K"Identifier" && farg.name_val == "include" + # world age special case + r = ssavar(ctx, ex) + @ast ctx ex [K"block" + [K"=" r [K"call" + expand_forms_2(ctx, farg) + expand_forms_2(ctx, args)... + ]] + (::K"latestworld_if_toplevel") + r + ] + else + @ast ctx ex [K"call" + expand_forms_2(ctx, farg) + expand_forms_2(ctx, args)... + ] + end +end + +#------------------------------------------------------------------------------- + +function expand_dot(ctx, ex) + @chk numchildren(ex) in (1,2) (ex, "`.` form requires either one or two children") + + if numchildren(ex) == 1 + # eg, `f = .+` + # Upstream TODO: Remove the (. +) representation and replace with use + # of DOTOP_FLAG? This way, `K"."` will be exclusively used for + # getproperty. + @ast ctx ex [K"call" + "BroadcastFunction"::K"top" + ex[1] + ] + elseif numchildren(ex) == 2 + # eg, `x.a` syntax + rhs = ex[2] + # Required to support the possibly dubious syntax `a."b"`. See + # https://github.com/JuliaLang/julia/issues/26873 + # Syntax edition TODO: reconsider this; possibly restrict to only K"String"? + if !(kind(rhs) == K"string" || is_leaf(rhs)) + throw(LoweringError(rhs, "Unrecognized field access syntax")) + end + @ast ctx ex [K"call" + "getproperty"::K"top" + ex[1] + rhs + ] + end +end + +#------------------------------------------------------------------------------- +# Expand for loops + +function expand_for(ctx, ex) + iterspecs = ex[1] + + @chk kind(iterspecs) == K"iteration" + + # Loop variables not declared `outer` are reassigned for each iteration of + # the innermost loop in case the user assigns them to something else. + # (Maybe we should filter these to remove vars not assigned in the loop? + # But that would ideally happen after the variable analysis pass, not + # during desugaring.) + copied_vars = SyntaxList(ctx) + for iterspec in iterspecs[1:end-1] + @chk kind(iterspec) == K"in" + lhs = iterspec[1] + if kind(lhs) != K"outer" + foreach_lhs_name(lhs) do var + push!(copied_vars, @ast ctx var [K"=" var var]) + end + end + end + + loop = ex[2] + for i in numchildren(iterspecs):-1:1 + iterspec = iterspecs[i] + lhs = iterspec[1] + + outer = kind(lhs) == K"outer" + lhs_local_defs = SyntaxList(ctx) + lhs_outer_defs = SyntaxList(ctx) + if outer + lhs = lhs[1] + end + foreach_lhs_name(lhs) do var + if outer + push!(lhs_outer_defs, @ast ctx var var) + else + push!(lhs_local_defs, @ast ctx var [K"local" var]) + end + end + + iter_ex = iterspec[2] + next = new_local_binding(ctx, iterspec, "next") + state = ssavar(ctx, iterspec, "state") + collection = ssavar(ctx, iter_ex, "collection") + + # Assign iteration vars and next state + body = @ast ctx iterspec [K"block" + lhs_local_defs... + lower_tuple_assignment(ctx, iterspec, (lhs, state), next) + loop + ] + + body = if i == numchildren(iterspecs) + # Innermost loop gets the continue label and copied vars + @ast ctx ex [K"break_block" + "loop_cont"::K"symbolic_label" + [K"let"(scope_type=:neutral) + [K"block" + copied_vars... + ] + body + ] + ] + else + # Outer loops get a scope block to contain the iteration vars + @ast ctx ex [K"scope_block"(scope_type=:neutral) + body + ] + end + + loop = @ast ctx ex [K"block" + if outer + [K"assert" + "require_existing_locals"::K"Symbol" + lhs_outer_defs... + ] + end + [K"="(iter_ex) collection iter_ex] + # First call to iterate is unrolled + # next = top.iterate(collection) + [K"="(iterspec) next [K"call" "iterate"::K"top" collection]] + [K"if"(iterspec) # if next !== nothing + [K"call"(iterspec) + "not_int"::K"top" + [K"call" "==="::K"core" next "nothing"::K"core"] + ] + [K"_do_while"(ex) + [K"block" + body + # Advance iterator + [K"="(iterspec) next [K"call" "iterate"::K"top" collection state]] + ] + [K"call"(iterspec) + "not_int"::K"top" + [K"call" "==="::K"core" next "nothing"::K"core"] + ] + ] + ] + ] + end + + @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" + loop + ] +end + +#------------------------------------------------------------------------------- +# Expand try/catch/finally + +function match_try(ex) + @chk numchildren(ex) > 1 "Invalid `try` form" + try_ = ex[1] + catch_ = nothing + finally_ = nothing + else_ = nothing + for e in ex[2:end] + k = kind(e) + if k == K"catch" && isnothing(catch_) + @chk numchildren(e) == 2 "Invalid `catch` form" + catch_ = e + elseif k == K"else" && isnothing(else_) + @chk numchildren(e) == 1 + else_ = e[1] + elseif k == K"finally" && isnothing(finally_) + @chk numchildren(e) == 1 + finally_ = e[1] + else + throw(LoweringError(ex, "Invalid clause in `try` form")) + end + end + (try_, catch_, else_, finally_) +end + +function expand_try(ctx, ex) + (try_, catch_, else_, finally_) = match_try(ex) + + if !isnothing(finally_) + # TODO: check unmatched symbolic gotos in try. + end + + try_body = @ast ctx try_ [K"scope_block"(scope_type=:neutral) try_] + + if isnothing(catch_) + try_block = try_body + else + exc_var = catch_[1] + catch_block = catch_[2] + if !is_identifier_like(exc_var) + throw(LoweringError(exc_var, "Expected an identifier as exception variable")) + end + try_block = @ast ctx ex [K"trycatchelse" + try_body + [K"scope_block"(catch_, scope_type=:neutral) + if kind(exc_var) != K"Placeholder" + [K"block" + [K"="(exc_var) exc_var [K"call" current_exception::K"Value"]] + catch_block + ] + else + catch_block + end + ] + if !isnothing(else_) + [K"scope_block"(else_, scope_type=:neutral) else_] + end + ] + end + + if isnothing(finally_) + try_block + else + @ast ctx ex [K"tryfinally" + try_block + [K"scope_block"(finally_, scope_type=:neutral) finally_] + ] + end +end + +#------------------------------------------------------------------------------- +# Expand local/global/const declarations + +# Create local/global declarations, and possibly type declarations for each name +# on an assignment LHS. Works recursively with complex left hand side +# assignments containing tuple destructuring. Eg, given +# (x::T, (y::U, z)) +# strip out stmts = (local x) (decl x T) (local x) (decl y U) (local z) +# and return (x, (y, z)) +function make_lhs_decls(ctx, stmts, declkind, declmeta, ex, type_decls=true) + k = kind(ex) + if k == K"Identifier" || k == K"Value" && ex.value isa GlobalRef + # TODO: consider removing support for Expr(:global, GlobalRef(...)) and + # other Exprs that cannot be produced by the parser (tested by + # test/precompile.jl #50538). + if !isnothing(declmeta) + push!(stmts, makenode(ctx, ex, declkind, ex; meta=declmeta)) + else + push!(stmts, makenode(ctx, ex, declkind, ex)) + end + elseif k == K"Placeholder" + nothing + elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where" + if type_decls + @chk numchildren(ex) == 2 + name = ex[1] + @chk kind(name) == K"Identifier" + push!(stmts, makenode(ctx, ex, K"decl", name, ex[2])) + end + make_lhs_decls(ctx, stmts, declkind, declmeta, ex[1], type_decls) + elseif k == K"tuple" || k == K"parameters" + for e in children(ex) + make_lhs_decls(ctx, stmts, declkind, declmeta, e, type_decls) + end + else + throw(LoweringError(ex, "invalid kind $k in $declkind declaration")) + end +end + +# Separate decls and assignments (which require re-expansion) +# local x, (y=2), z ==> local x; local z; y = 2 +function expand_decls(ctx, ex) + declkind = kind(ex) + @assert declkind in KSet"local global" + declmeta = get(ex, :meta, nothing) + bindings = children(ex) + stmts = SyntaxList(ctx) + for binding in bindings + if is_prec_assignment(kind(binding)) + @chk numchildren(binding) == 2 + # expand_assignment will create the type decls + make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false) + push!(stmts, expand_assignment(ctx, binding)) + elseif is_sym_decl(binding) || kind(binding) == K"Value" + make_lhs_decls(ctx, stmts, declkind, declmeta, binding, true) + elseif kind(binding) == K"function" + make_lhs_decls(ctx, stmts, declkind, declmeta, binding[1], false) + push!(stmts, expand_forms_2(ctx, binding)) + else + throw(LoweringError(ex, "invalid syntax in variable declaration")) + end + end + makenode(ctx, ex, K"block", stmts) +end + +# Iterate over the variable names assigned to from a "fancy assignment left hand +# side" such as nested tuple destructuring, curlies, and calls. +function foreach_lhs_name(f::Function, ex) + k = kind(ex) + if k == K"Placeholder" + # Ignored + elseif is_identifier_like(ex) + f(ex) + elseif (k === K"::" && numchildren(ex) === 2) || k in KSet"call curly where" + foreach_lhs_name(f, ex[1]) + elseif k in KSet"tuple parameters" + for c in children(ex) + foreach_lhs_name(f, c) + end + end + return nothing +end + +function expand_const_decl(ctx, ex) + k = kind(ex[1]) + if k == K"global" + asgn = ex[1][1] + @chk (kind(asgn) == K"=" || kind(asgn) == K"function") (ex, "expected assignment after `const`") + globals = SyntaxList(ctx) + foreach_lhs_name(asgn[1]) do x + push!(globals, @ast ctx ex [K"global" x]) + end + @ast ctx ex [K"block" + globals... + expand_assignment(ctx, asgn, true) + ] + elseif k == K"=" || k == K"function" + expand_assignment(ctx, ex[1], true) + elseif k == K"local" + throw(LoweringError(ex, "unsupported `const local` declaration")) + elseif k == K"Identifier" || k == K"Value" + # Expr(:const, v) where v is a Symbol or a GlobalRef is an unfortunate + # remnant from the days when const-ness was a flag that could be set on + # any global. It creates a binding with kind PARTITION_KIND_UNDEF_CONST. + # TODO: deprecate and delete this "feature" + @chk numchildren(ex) == 1 + @ast ctx ex [K"constdecl" ex[1]] + else + throw(LoweringError(ex, "expected assignment after `const`")) + end +end + +#------------------------------------------------------------------------------- +# Expansion of function definitions + +function expand_function_arg(ctx, body_stmts, arg, is_last_arg, is_kw) + ex = arg + + if kind(ex) == K"=" + default = ex[2] + ex = ex[1] + else + default = nothing + end + + if kind(ex) == K"..." + if !is_last_arg + typmsg = is_kw ? "keyword" : "positional" + throw(LoweringError(arg, "`...` may only be used for the last $typmsg argument")) + end + @chk numchildren(ex) == 1 + slurp_ex = ex + ex = ex[1] + else + slurp_ex = nothing + end + + if kind(ex) == K"::" + @chk numchildren(ex) in (1,2) + if numchildren(ex) == 1 + type = ex[1] + ex = @ast ctx ex "_"::K"Placeholder" + else + type = ex[2] + ex = ex[1] + end + if is_kw && !isnothing(slurp_ex) + throw(LoweringError(slurp_ex, "keyword argument with `...` may not be given a type")) + end + else + type = @ast ctx ex "Any"::K"core" + end + if !isnothing(slurp_ex) + type = @ast ctx slurp_ex [K"curly" "Vararg"::K"core" type] + end + + k = kind(ex) + if k == K"tuple" && !is_kw + # Argument destructuring + is_nospecialize = getmeta(arg, :nospecialize, false) + name = new_local_binding(ctx, ex, "destructured_arg"; + kind=:argument, is_nospecialize=is_nospecialize) + push!(body_stmts, @ast ctx ex [ + K"local"(meta=CompileHints(:is_destructured_arg, true)) + [K"=" ex name] + ]) + elseif k == K"Identifier" || k == K"Placeholder" + name = ex + else + throw(LoweringError(ex, is_kw ? "Invalid keyword name" : "Invalid function argument")) + end + + return (name, type, default, !isnothing(slurp_ex)) +end + +# Expand `where` clause(s) of a function into (typevar_names, typevar_stmts) where +# - `typevar_names` are the names of the type's type parameters +# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter +# name in `typevar_names`, with exactly one per `typevar_name`. Some of these +# may already have been emitted. +# - `new_typevar_stmts` is the list of statements which needs to to be emitted +# prior to uses of `typevar_names`. +function _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex) + if kind(ex) == K"where" && numchildren(ex) == 2 + vars_kind = kind(ex[2]) + if vars_kind == K"_typevars" + append!(typevar_names, children(ex[2][1])) + append!(typevar_stmts, children(ex[2][2])) + else + params = vars_kind == K"braces" ? ex[2][1:end] : ex[2:2] + n_existing = length(new_typevar_stmts) + expand_typevars!(ctx, typevar_names, new_typevar_stmts, params) + append!(typevar_stmts, view(new_typevar_stmts, n_existing+1:length(new_typevar_stmts))) + end + _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, ex[1]) + else + ex + end +end + +function method_def_expr(ctx, srcref, callex_srcref, method_table, + typevar_names, arg_names, arg_types, body, ret_var=nothing) + @ast ctx srcref [K"block" + # metadata contains svec(types, sparms, location) + method_metadata := [K"call"(callex_srcref) + "svec" ::K"core" + [K"call" + "svec" ::K"core" + arg_types... + ] + [K"call" + "svec" ::K"core" + typevar_names... + ] + ::K"SourceLocation"(callex_srcref) + ] + [K"method" + isnothing(method_table) ? "nothing"::K"core" : method_table + method_metadata + [K"lambda"(body, is_toplevel_thunk=false, toplevel_pure=false) + [K"block" arg_names...] + [K"block" typevar_names...] + body + ret_var # might be `nothing` and hence removed + ] + ] + [K"removable" method_metadata] + ] +end + +# Select static parameters which are used in function arguments `arg_types`, or +# transitively used. +# +# The transitive usage check probably doesn't guarantee that the types are +# inferable during dispatch as they may only be part of the bounds of another +# type. Thus we might get false positives here but we shouldn't get false +# negatives. +function select_used_typevars(arg_types, typevar_names, typevar_stmts) + n_typevars = length(typevar_names) + @assert n_typevars == length(typevar_stmts) + # Filter typevar names down to those which are directly used in the arg list + typevar_used = Bool[any(contains_identifier(argtype, tn) for argtype in arg_types) + for tn in typevar_names] + # _Or_ used transitively via other typevars. The following code + # computes this by incrementally coloring the graph of dependencies + # between type vars. + found_used = true + while found_used + found_used = false + for (i,tn) in enumerate(typevar_names) + if typevar_used[i] + continue + end + for j = i+1:n_typevars + if typevar_used[j] && contains_identifier(typevar_stmts[j], tn) + found_used = true + typevar_used[i] = true + break + end + end + end + end + typevar_used +end + +function check_all_typevars_used(arg_types, typevar_names, typevar_stmts) + selected = select_used_typevars(arg_types, typevar_names, typevar_stmts) + unused_typevar = findfirst(s->!s, selected) + if !isnothing(unused_typevar) + # Type variables which may be statically determined to be unused in + # any function argument and therefore can't be inferred during + # dispatch. + throw(LoweringError(typevar_names[unused_typevar], + "Method definition declares type variable but does not use it in the type of any function parameter")) + end +end + +# Return `typevar_names` which are used directly or indirectly in `arg_types`. +function trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) + typevar_used = select_used_typevars(arg_types, typevar_names, typevar_stmts) + trimmed_typevar_names = SyntaxList(ctx) + for (used,tn) in zip(typevar_used, typevar_names) + if used + push!(trimmed_typevar_names, tn) + end + end + return trimmed_typevar_names +end + +function is_if_generated(ex) + kind(ex) == K"if" && kind(ex[1]) == K"generated" +end + +# Return true if a function body contains a code generator from `@generated` in +# the form `[K"if" [K"generated"] ...]` +function is_generated(ex) + if is_if_generated(ex) + return true + elseif is_quoted(ex) || kind(ex) == K"function" + return false + else + return any(is_generated, children(ex)) + end +end + +function split_generated(ctx, ex, gen_part) + if is_leaf(ex) + ex + elseif is_if_generated(ex) + gen_part ? @ast(ctx, ex, [K"$" ex[2]]) : ex[3] + else + mapchildren(e->split_generated(ctx, e, gen_part), ctx, ex) + end +end + +# Split @generated function body into two parts: +# * The code generator +# * The non-generated function body +function expand_function_generator(ctx, srcref, callex_srcref, func_name, func_name_str, body, arg_names, typevar_names) + gen_body = if is_if_generated(body) + body[2] # Simple case - don't need interpolation when the whole body is generated + else + expand_quote(ctx, @ast ctx body [K"block" split_generated(ctx, body, true)]) + end + gen_name_str = reserve_module_binding_i(ctx.mod, + "#$(isnothing(func_name_str) ? "_" : func_name_str)@generator#") + gen_name = new_global_binding(ctx, body, gen_name_str, ctx.mod) + + # Set up the arguments for the code generator + gen_arg_names = SyntaxList(ctx) + gen_arg_types = SyntaxList(ctx) + # Self arg + push!(gen_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument)) + push!(gen_arg_types, @ast ctx callex_srcref [K"function_type" gen_name]) + # Macro expansion context arg + if kind(func_name) != K"Identifier" + TODO(func_name, "Which scope do we adopt for @generated generator `__context__` in this case?") + end + push!(gen_arg_names, adopt_scope(@ast(ctx, callex_srcref, "__context__"::K"Identifier"), func_name)) + push!(gen_arg_types, @ast(ctx, callex_srcref, MacroContext::K"Value")) + # Trailing arguments to the generator are provided by the Julia runtime. They are: + # static_parameters... parent_function arg_types... + first_trailing_arg = length(gen_arg_names) + 1 + append!(gen_arg_names, typevar_names) + append!(gen_arg_names, arg_names) + # Apply nospecialize to all arguments to prevent so much codegen and add + # Core.Any type for them + for i in first_trailing_arg:length(gen_arg_names) + gen_arg_names[i] = setmeta(gen_arg_names[i]; nospecialize=true) + push!(gen_arg_types, @ast ctx gen_arg_names[i] "Any"::K"core") + end + # Code generator definition + gen_func_method_defs = @ast ctx srcref [K"block" + [K"function_decl" gen_name] + [K"scope_block"(scope_type=:hard) + [K"method_defs" + gen_name + [K"block" + method_def_expr(ctx, srcref, callex_srcref, nothing, SyntaxList(ctx), + gen_arg_names, gen_arg_types, gen_body, nothing) + ] + ] + ] + ] + + # Extract non-generated body + nongen_body = @ast ctx body [K"block" + # The Julia runtime associates the code generator with the + # non-generated method by adding this meta to the body. This feels like + # a hack though since the generator ultimately gets attached to the + # method rather than the CodeInfo which we're putting it inside. + [K"meta" + "generated"::K"Symbol" + # The following is code to be evaluated at top level and will wrap + # whatever code comes from the user's generator into an appropriate + # K"lambda" (+ K"with_static_parameters") suitable for lowering + # into a CodeInfo. + # + # todo: As isolated top-level code, we don't actually want to apply + # the normal scope rules of the surrounding function ... it should + # technically have scope resolved at top level. + [K"new" + GeneratedFunctionStub::K"Value" # Use stub type from JuliaLowering + gen_name + # Truncate provenance to just the source file range, as this + # will live permanently in the IR and we probably don't want + # the full provenance tree and intermediate expressions + # (TODO: More truncation. We certainly don't want to store the + # source file either.) + sourceref(srcref)::K"Value" + [K"call" + "svec"::K"core" + "#self#"::K"Symbol" + (n.name_val::K"Symbol"(n) for n in arg_names[2:end])... + ] + [K"call" + "svec"::K"core" + (n.name_val::K"Symbol"(n) for n in typevar_names)... + ] + ] + ] + split_generated(ctx, body, false) + ] + + return gen_func_method_defs, nongen_body +end + +# Generate a method for every number of allowed optional arguments +# For example for `f(x, y=1, z=2)` we generate two additional methods +# f(x) = f(x, 1, 2) +# f(x, y) = f(x, y, 2) +function optional_positional_defs!(ctx, method_stmts, srcref, callex, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, + arg_defaults) + # Replace placeholder arguments with variables - we need to pass them to + # the inner method for dispatch even when unused in the inner method body + def_arg_names = map(arg_names) do arg + kind(arg) == K"Placeholder" ? + new_local_binding(ctx, arg, arg.name_val; kind=:argument) : + arg + end + for def_idx = 1:length(arg_defaults) + first_omitted = first_default + def_idx - 1 + trimmed_arg_names = def_arg_names[1:first_omitted-1] + # Call the full method directly if no arguments are reused in + # subsequent defaults. Otherwise conservatively call the function with + # only one additional default argument supplied and let the chain of + # function calls eventually lead to the full method. + any_args_in_trailing_defaults = + any(arg_defaults[def_idx+1:end]) do defaultval + contains_identifier(defaultval, def_arg_names[first_omitted:end]) + end + last_used_default = any_args_in_trailing_defaults ? + def_idx : lastindex(arg_defaults) + body = @ast ctx callex [K"block" + [K"call" + trimmed_arg_names... + arg_defaults[def_idx:last_used_default]... + ] + ] + trimmed_arg_types = arg_types[1:first_omitted-1] + trimmed_typevar_names = trim_used_typevars(ctx, trimmed_arg_types, + typevar_names, typevar_stmts) + # TODO: Ensure we preserve @nospecialize metadata in args + push!(method_stmts, + method_def_expr(ctx, srcref, callex, method_table, + trimmed_typevar_names, trimmed_arg_names, trimmed_arg_types, + body)) + end +end + +function scope_nest(ctx, names, values, body) + for (name, value) in Iterators.reverse(zip(names, values)) + body = @ast ctx name [K"let" [K"block" [K"=" name value]] + body + ] + end + body +end + +# Generate body function and `Core.kwcall` overloads for functions taking keywords. +function keyword_function_defs(ctx, srcref, callex_srcref, name_str, typevar_names, + typevar_stmts, new_typevar_stmts, arg_names, + arg_types, has_slurp, first_default, arg_defaults, + keywords, body, ret_var) + mangled_name = let n = isnothing(name_str) ? "_" : name_str + reserve_module_binding_i(ctx.mod, string(startswith(n, '#') ? "" : "#", n, "#")) + end + # TODO: Is the layer correct here? Which module should be the parent module + # of this body function? + layer = new_scope_layer(ctx) + body_func_name = adopt_scope(@ast(ctx, callex_srcref, mangled_name::K"Identifier"), layer) + + kwcall_arg_names = SyntaxList(ctx) + kwcall_arg_types = SyntaxList(ctx) + + push!(kwcall_arg_names, new_local_binding(ctx, callex_srcref, "#self#"; kind=:argument)) + push!(kwcall_arg_types, + @ast ctx callex_srcref [K"call" + "typeof"::K"core" + "kwcall"::K"core" + ] + ) + kws_arg = new_local_binding(ctx, keywords, "kws"; kind=:argument) + push!(kwcall_arg_names, kws_arg) + push!(kwcall_arg_types, @ast ctx keywords "NamedTuple"::K"core") + + body_arg_names = SyntaxList(ctx) + body_arg_types = SyntaxList(ctx) + push!(body_arg_names, new_local_binding(ctx, body_func_name, "#self#"; kind=:argument)) + push!(body_arg_types, @ast ctx body_func_name [K"function_type" body_func_name]) + + non_positional_typevars = typevar_names[map(!, + select_used_typevars(arg_types, typevar_names, typevar_stmts))] + + kw_values = SyntaxList(ctx) + kw_defaults = SyntaxList(ctx) + kw_names = SyntaxList(ctx) + kw_name_syms = SyntaxList(ctx) + has_kw_slurp = false + kwtmp = new_local_binding(ctx, keywords, "kwtmp") + for (i,arg) in enumerate(children(keywords)) + (aname, atype, default, is_slurp) = + expand_function_arg(ctx, nothing, arg, i == numchildren(keywords), true) + push!(kw_names, aname) + name_sym = @ast ctx aname aname=>K"Symbol" + push!(body_arg_names, aname) + + if is_slurp + if !isnothing(default) + throw(LoweringError(arg, "keyword argument with `...` cannot have a default value")) + end + has_kw_slurp = true + push!(body_arg_types, @ast ctx arg [K"call" "pairs"::K"top" "NamedTuple"::K"core"]) + push!(kw_defaults, @ast ctx arg [K"call" "pairs"::K"top" [K"call" "NamedTuple"::K"core"]]) + continue + else + push!(body_arg_types, atype) + end + + if isnothing(default) + default = @ast ctx arg [K"call" + "throw"::K"core" + [K"call" + "UndefKeywordError"::K"core" + name_sym + ] + ] + end + push!(kw_defaults, default) + + # Extract the keyword argument value and check the type + push!(kw_values, @ast ctx arg [K"block" + [K"if" + [K"call" "isdefined"::K"core" kws_arg name_sym] + [K"block" + kwval := [K"call" "getfield"::K"core" kws_arg name_sym] + if is_core_Any(atype) || contains_identifier(atype, non_positional_typevars) + # <- Do nothing in this branch because `atype` includes + # something from the typevars and those static + # parameters don't have values yet. Instead, the type + # will be picked up when the body method is called and + # result in a MethodError during dispatch rather than + # the `TypeError` below. + # + # In principle we could probably construct the + # appropriate UnionAll here in some simple cases but + # the fully general case probably requires simulating + # the runtime's dispatch machinery. + else + [K"if" [K"call" "isa"::K"core" kwval atype] + "nothing"::K"core" + [K"call" + "throw"::K"core" + [K"new" "TypeError"::K"core" + "keyword argument"::K"Symbol" + name_sym + atype + kwval + ] + ] + ] + end + # Compiler performance hack: we reuse the kwtmp slot in all + # keyword if blocks rather than using the if block in value + # position. This cuts down on the number of slots required + # https://github.com/JuliaLang/julia/pull/44333 + [K"=" kwtmp kwval] + ] + [K"=" kwtmp default] + ] + kwtmp + ]) + + push!(kw_name_syms, name_sym) + end + append!(body_arg_names, arg_names) + append!(body_arg_types, arg_types) + + first_default += length(kwcall_arg_names) + append!(kwcall_arg_names, arg_names) + append!(kwcall_arg_types, arg_types) + + kwcall_mtable = @ast(ctx, srcref, "nothing"::K"core") + + kwcall_method_defs = SyntaxList(ctx) + if !isempty(arg_defaults) + # Construct kwcall overloads which forward default positional args on + # to the main kwcall overload. + optional_positional_defs!(ctx, kwcall_method_defs, srcref, callex_srcref, + kwcall_mtable, typevar_names, typevar_stmts, + kwcall_arg_names, kwcall_arg_types, first_default, arg_defaults) + end + + positional_forwarding_args = if has_slurp + a = copy(arg_names) + a[end] = @ast ctx a[end] [K"..." a[end]] + a + else + arg_names + end + + #-------------------------------------------------- + # Construct the "main kwcall overload" which unpacks keywords and checks + # their consistency before dispatching to the user's code in the body + # method. + defaults_depend_on_kw_names = any(val->contains_identifier(val, kw_names), kw_defaults) + defaults_have_assign = any(val->contains_unquoted(e->kind(e) == K"=", val), kw_defaults) + use_ssa_kw_temps = !defaults_depend_on_kw_names && !defaults_have_assign + + if use_ssa_kw_temps + kw_val_stmts = SyntaxList(ctx) + for n in kw_names + # If not using slots for the keyword argument values, still declare + # them for reflection purposes. + push!(kw_val_stmts, @ast ctx n [K"local" n]) + end + kw_val_vars = SyntaxList(ctx) + for val in kw_values + v = emit_assign_tmp(kw_val_stmts, ctx, val, "kwval") + push!(kw_val_vars, v) + end + else + kw_val_vars = kw_names + end + + kwcall_body_tail = @ast ctx keywords [K"block" + if has_kw_slurp + # Slurp remaining keywords into last arg + remaining_kws := [K"call" + "pairs"::K"top" + if isempty(kw_name_syms) + kws_arg + else + [K"call" + "structdiff"::K"top" + kws_arg + [K"curly" + "NamedTuple"::K"core" + [K"tuple" kw_name_syms...] + ] + ] + end + ] + else + # Check that there's no unexpected keywords + [K"if" + [K"call" + "isempty"::K"top" + [K"call" + "diff_names"::K"top" + [K"call" "keys"::K"top" kws_arg] + [K"tuple" kw_name_syms...] + ] + ] + "nothing"::K"core" + [K"call" + "kwerr"::K"top" + kws_arg + positional_forwarding_args... + ] + ] + end + [K"call" + body_func_name + kw_val_vars... + if has_kw_slurp + remaining_kws + end + positional_forwarding_args... + ] + ] + kwcall_body = if use_ssa_kw_temps + @ast ctx keywords [K"block" + kw_val_stmts... + kwcall_body_tail + ] + else + scope_nest(ctx, kw_names, kw_values, kwcall_body_tail) + end + main_kwcall_typevars = trim_used_typevars(ctx, kwcall_arg_types, typevar_names, typevar_stmts) + push!(kwcall_method_defs, + method_def_expr(ctx, srcref, callex_srcref, kwcall_mtable, + main_kwcall_typevars, kwcall_arg_names, kwcall_arg_types, kwcall_body)) + + # Check kws of body method + check_all_typevars_used(body_arg_types, typevar_names, typevar_stmts) + + kw_func_method_defs = @ast ctx srcref [K"block" + [K"function_decl" body_func_name] + [K"scope_block"(scope_type=:hard) + [K"method_defs" + body_func_name + [K"block" + new_typevar_stmts... + method_def_expr(ctx, srcref, callex_srcref, "nothing"::K"core", + typevar_names, body_arg_names, body_arg_types, + [K"block" + [K"meta" "nkw"::K"Symbol" numchildren(keywords)::K"Integer"] + body + ], + ret_var) + ] + ] + ] + [K"scope_block"(scope_type=:hard) + [K"method_defs" + "nothing"::K"core" + [K"block" + new_typevar_stmts... + kwcall_method_defs... + ] + ] + ] + ] + + #-------------------------------------------------- + # Body for call with no keywords + body_for_positional_args_only = if defaults_depend_on_kw_names + scope_nest(ctx, kw_names, kw_defaults, + @ast ctx srcref [K"call" body_func_name + kw_names... + positional_forwarding_args... + ] + ) + else + @ast ctx srcref [K"call" body_func_name + kw_defaults... + positional_forwarding_args... + ] + end + + kw_func_method_defs, body_for_positional_args_only +end + +# Check valid identifier/function names +function is_invalid_func_name(ex) + k = kind(ex) + if k == K"Identifier" + name = ex.name_val + elseif k == K"." && numchildren(ex) == 2 && kind(ex[2]) == K"Symbol" + # `function A.f(x,y) ...` + name = ex[2].name_val + else + return true + end + return is_ccall_or_cglobal(name) +end + +function expand_function_def(ctx, ex, docs, rewrite_call=identity, rewrite_body=identity; doc_only=false) + @chk numchildren(ex) in (1,2) + name = ex[1] + if numchildren(ex) == 1 && is_identifier_like(name) + # Function declaration with no methods + if is_invalid_func_name(name) + throw(LoweringError(name, "Invalid function name")) + end + return @ast ctx ex [K"block" + [K"function_decl" name] + name + ] + end + + typevar_names = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) + new_typevar_stmts = SyntaxList(ctx) + if kind(name) == K"where" + # `where` vars end up in two places + # 1. Argument types - the `T` in `x::T` becomes a `TypeVar` parameter in + # the method sig, eg, `function f(x::T) where T ...`. These define the + # static parameters of the method. + # 2. In the method body - either explicitly or implicitly via the method + # return type or default arguments - where `T` turns up as the *name* of + # a special slot of kind ":static_parameter" + name = _split_wheres!(ctx, typevar_names, typevar_stmts, new_typevar_stmts, name) + end + + return_type = nothing + if kind(name) == K"::" + @chk numchildren(name) == 2 + return_type = name[2] + name = name[1] + end + + callex = if kind(name) == K"call" + name + elseif kind(name) == K"tuple" + # Anonymous function syntax `function (x,y) ... end` + @ast ctx name [K"call" + "#anon#"::K"Placeholder" + children(name)... + ] + elseif kind(name) == K"dotcall" + throw(LoweringError(name, "Cannot define function using `.` broadcast syntax")) + else + throw(LoweringError(name, "Bad function definition")) + end + + # Fixup for `new` constructor sigs if necessary + callex = rewrite_call(callex) + + # Construct method argument lists of names and types. + # + # First, match the "self" argument: In the method signature, each function + # gets a self argument name+type. For normal generic functions, this is a + # singleton and subtype of `Function`. But objects of any type can be made + # callable when the self argument is explicitly given using `::` syntax in + # the function name. + name = callex[1] + bare_func_name = nothing + name_str = nothing + doc_obj = nothing + self_name = nothing + if kind(name) == K"::" + # Self argument is specified by user + if numchildren(name) == 1 + # function (::T)() ... + self_type = name[1] + else + # function (f::T)() ... + @chk numchildren(name) == 2 + self_name = name[1] + self_type = name[2] + end + doc_obj = self_type + else + if kind(name) == K"Placeholder" + # Anonymous function. In this case we may use an ssavar for the + # closure's value. + name_str = name.name_val + name = ssavar(ctx, name, name.name_val) + bare_func_name = name + elseif is_invalid_func_name(name) + throw(LoweringError(name, "Invalid function name")) + elseif is_identifier_like(name) + # Add methods to a global `Function` object, or local closure + # type function f() ... + name_str = name.name_val + bare_func_name = name + else + # Add methods to an existing Function + # function A.B.f() ... + if kind(name) == K"." && kind(name[2]) == K"Symbol" + name_str = name[2].name_val + end + end + doc_obj = name # todo: can closures be documented? + self_type = @ast ctx name [K"function_type" name] + end + # Add self argument + if isnothing(self_name) + # TODO: #self# should be symbolic rather than a binding for the cases + # where it's reused in `optional_positional_defs!` because it's + # probably unsafe to reuse bindings for multiple different methods in + # the presence of closure captures or other global binding properties. + # + # This is reminiscent of the need to renumber SSA vars in certain cases + # in the flisp implementation. + self_name = new_local_binding(ctx, name, "#self#"; kind=:argument) + end + + # Expand remaining argument names and types + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + push!(arg_names, self_name) + push!(arg_types, self_type) + args = callex[2:end] + keywords = nothing + if !isempty(args) && kind(args[end]) == K"parameters" + keywords = args[end] + args = args[1:end-1] + if numchildren(keywords) == 0 + keywords = nothing + end + end + body_stmts = SyntaxList(ctx) + has_slurp = false + first_default = 0 # index into arg_names/arg_types + arg_defaults = SyntaxList(ctx) + for (i,arg) in enumerate(args) + (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, + i == length(args), false) + has_slurp |= is_slurp + push!(arg_names, aname) + + # TODO: Ideally, ensure side effects of evaluating arg_types only + # happen once - we should create an ssavar if there's any following + # defaults. (flisp lowering doesn't ensure this either). Beware if + # fixing this that optional_positional_defs! depends on filtering the + # *symbolic* representation of arg_types. + push!(arg_types, atype) + + if isnothing(default) + if !isempty(arg_defaults) && !is_slurp + # TODO: Referring to multiple pieces of syntax in one error message is necessary. + # TODO: Poison ASTs with error nodes and continue rather than immediately throwing. + # + # We should make something like the following kind of thing work! + # arg_defaults[1] = @ast_error ctx arg_defaults[1] """ + # Positional arguments with defaults must occur at the end. + # + # We found a [non-optional position argument]($arg) *after* + # one with a [default value]($(first(arg_defaults))) + # """ + # + throw(LoweringError(args[first_default-1], "optional positional arguments must occur at end")) + end + else + if isempty(arg_defaults) + first_default = i + 1 # Offset for self argument + end + push!(arg_defaults, default) + end + end + + if doc_only + # The (doc str (call ...)) form requires method signature lowering, but + # does not execute or define any method, so we can't use function_type. + # This is a bit of a messy case in the docsystem which we'll hopefully + # be able to delete at some point. + sig_stmts = SyntaxList(ctx) + @assert first_default != 1 && length(arg_types) >= 1 + last_required = first_default === 0 ? length(arg_types) : first_default - 1 + for i in last_required:length(arg_types) + push!(sig_stmts, @ast(ctx, ex, [K"curly" "Tuple"::K"core" arg_types[2:i]...])) + end + sig_type = @ast ctx ex [K"where" + [K"curly" "Union"::K"core" sig_stmts...] + [K"_typevars" [K"block" typevar_names...] [K"block"]] + ] + out = @ast ctx docs [K"block" + typevar_stmts... + [K"call" + bind_static_docs!::K"Value" + (kind(name) == K"." ? name[1] : ctx.mod::K"Value") + name_str::K"Symbol" + docs[1] + ::K"SourceLocation"(ex) + sig_type + ] + ] + return expand_forms_2(ctx, out) + end + + if !isnothing(return_type) + ret_var = ssavar(ctx, return_type, "return_type") + push!(body_stmts, @ast ctx return_type [K"=" ret_var return_type]) + else + ret_var = nothing + end + + body = rewrite_body(ex[2]) + if !isempty(body_stmts) + body = @ast ctx body [ + K"block" + body_stmts... + body + ] + end + + gen_func_method_defs = nothing + if is_generated(body) + gen_func_method_defs, body = + expand_function_generator(ctx, ex, callex, name, name_str, body, arg_names, typevar_names) + + end + + if isnothing(keywords) + kw_func_method_defs = nothing + # NB: The following check seems good as it statically catches any useless + # static parameters which can't be bound during method invocation. + # However it wasn't previously an error so we might need to reduce it + # to a warning? + check_all_typevars_used(arg_types, typevar_names, typevar_stmts) + main_typevar_names = typevar_names + else + # Rewrite `body` here so that the positional-only versions dispatch there. + kw_func_method_defs, body = + keyword_function_defs(ctx, ex, callex, name_str, typevar_names, typevar_stmts, + new_typevar_stmts, arg_names, arg_types, has_slurp, + first_default, arg_defaults, keywords, body, ret_var) + # The main function (but without keywords) needs its typevars trimmed, + # as some of them may be for the keywords only. + main_typevar_names = trim_used_typevars(ctx, arg_types, typevar_names, typevar_stmts) + # ret_var is used only in the body method + ret_var = nothing + end + + method_table_val = nothing # TODO: method overlays + method_table = isnothing(method_table_val) ? + @ast(ctx, callex, "nothing"::K"core") : + ssavar(ctx, ex, "method_table") + method_stmts = SyntaxList(ctx) + + if !isempty(arg_defaults) + optional_positional_defs!(ctx, method_stmts, ex, callex, + method_table, typevar_names, typevar_stmts, + arg_names, arg_types, first_default, arg_defaults) + end + + # The method with all non-default arguments + push!(method_stmts, + method_def_expr(ctx, ex, callex, method_table, main_typevar_names, arg_names, + arg_types, body, ret_var)) + + if !isnothing(docs) + method_stmts[end] = @ast ctx docs [K"block" + method_metadata := method_stmts[end] + [K"call" + bind_docs!::K"Value" + doc_obj + docs[1] + method_metadata + ] + ] + end + + @ast ctx ex [K"block" + if !isnothing(bare_func_name) + # Need the main function type created here before running any code + # in kw_func_method_defs + [K"function_decl"(bare_func_name) bare_func_name] + end + gen_func_method_defs + kw_func_method_defs + [K"scope_block"(scope_type=:hard) + [K"method_defs" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + [K"block" + new_typevar_stmts... + if !isnothing(method_table_val) + [K"=" method_table method_table_val] + end + method_stmts... + ] + ] + ] + [K"removable" + isnothing(bare_func_name) ? "nothing"::K"core" : bare_func_name + ] + ] +end + +#------------------------------------------------------------------------------- +# Anon function syntax +function expand_arrow_arglist(ctx, arglist, arrowname) + k = kind(arglist) + if k == K"where" + @ast ctx arglist [K"where" + expand_arrow_arglist(ctx, arglist[1], arrowname) + arglist[2] + ] + else + # The arglist can sometimes be parsed as a block, or something else, and + # fixing this is extremely awkward when nested inside `where`. See + # https://github.com/JuliaLang/JuliaSyntax.jl/pull/522 + if k == K"block" + @chk numchildren(arglist) == 2 + arglist = @ast ctx arglist [K"tuple" + arglist[1] + [K"parameters" arglist[2]] + ] + elseif k != K"tuple" + arglist = @ast ctx arglist [K"tuple" + arglist[1] + ] + end + @ast ctx arglist [K"call" + arrowname::K"Placeholder" + children(arglist)... + ] + end +end + +function expand_arrow(ctx, ex) + @chk numchildren(ex) == 2 + expand_forms_2(ctx, + @ast ctx ex [K"function" + expand_arrow_arglist(ctx, ex[1], string(kind(ex))) + ex[2] + ] + ) +end + +function expand_opaque_closure(ctx, ex) + arg_types_spec = ex[1] + return_lower_bound = ex[2] + return_upper_bound = ex[3] + allow_partial = ex[4] + func_expr = ex[5] + @chk kind(func_expr) == K"->" + @chk numchildren(func_expr) == 2 + args = func_expr[1] + @chk kind(args) == K"tuple" + check_no_parameters(ex, args) + + arg_names = SyntaxList(ctx) + arg_types = SyntaxList(ctx) + push!(arg_names, new_local_binding(ctx, args, "#self#"; kind=:argument)) + body_stmts = SyntaxList(ctx) + is_va = false + for (i, arg) in enumerate(children(args)) + (aname, atype, default, is_slurp) = expand_function_arg(ctx, body_stmts, arg, + i == numchildren(args), false) + is_va |= is_slurp + push!(arg_names, aname) + push!(arg_types, atype) + if !isnothing(default) + throw(LoweringError(default, "Default positional arguments cannot be used in an opaque closure")) + end + end + + nargs = length(arg_names) - 1 # ignoring #self# + + @ast ctx ex [K"_opaque_closure" + ssavar(ctx, ex, "opaque_closure_id") # only a placeholder. Must be :local + if is_core_nothing(arg_types_spec) + [K"curly" + "Tuple"::K"core" + arg_types... + ] + else + arg_types_spec + end + is_core_nothing(return_lower_bound) ? [K"curly" "Union"::K"core"] : return_lower_bound + is_core_nothing(return_upper_bound) ? "Any"::K"core" : return_upper_bound + allow_partial + nargs::K"Integer" + is_va::K"Bool" + ::K"SourceLocation"(func_expr) + [K"lambda"(func_expr, is_toplevel_thunk=false, toplevel_pure=false) + [K"block" arg_names...] + [K"block"] + [K"block" + body_stmts... + func_expr[2] + ] + ] + ] +end + +#------------------------------------------------------------------------------- +# Expand macro definitions + +function _make_macro_name(ctx, ex) + k = kind(ex) + if k == K"Identifier" || k == K"Symbol" + name = mapleaf(ctx, ex, k) + name.name_val = "@$(ex.name_val)" + name + elseif is_valid_modref(ex) + @chk numchildren(ex) == 2 + @ast ctx ex [K"." ex[1] _make_macro_name(ctx, ex[2])] + else + throw(LoweringError(ex, "invalid macro name")) + end +end + +# flisp: expand-macro-def +function expand_macro_def(ctx, ex) + @chk numchildren(ex) >= 1 (ex,"invalid macro definition") + if numchildren(ex) == 1 + name = ex[1] + # macro with zero methods + # `macro m end` + return @ast ctx ex [K"function" _make_macro_name(ctx, name)] + end + # TODO: Making this manual pattern matching robust is such a pain!!! + sig = ex[1] + @chk (kind(sig) == K"call" && numchildren(sig) >= 1) (sig, "invalid macro signature") + name = sig[1] + args = remove_empty_parameters(children(sig)) + @chk kind(args[end]) != K"parameters" (args[end], "macros cannot accept keyword arguments") + scope_ref = kind(name) == K"." ? name[1] : name + if ctx.expr_compat_mode + @ast ctx ex [K"function" + [K"call"(sig) + _make_macro_name(ctx, name) + [K"::" + # TODO: should we be adopting the scope of the K"macro" expression itself? + adopt_scope(@ast(ctx, sig, "__source__"::K"Identifier"), scope_ref) + LineNumberNode::K"Value" + ] + [K"::" + adopt_scope(@ast(ctx, sig, "__module__"::K"Identifier"), scope_ref) + Module::K"Value" + ] + map(e->_apply_nospecialize(ctx, e), args[2:end])... + ] + ex[2] + ] + else + @ast ctx ex [K"function" + [K"call"(sig) + _make_macro_name(ctx, name) + [K"::" + adopt_scope(@ast(ctx, sig, "__context__"::K"Identifier"), scope_ref) + MacroContext::K"Value" + ] + # flisp: We don't mark these @nospecialize because all arguments to + # new macros will be of type SyntaxTree + args[2:end]... + ] + ex[2] + ] + end +end + +#------------------------------------------------------------------------------- +# Expand type definitions + +# Match `x<:T<:y` etc, returning `(name, lower_bound, upper_bound)` +# A bound is `nothing` if not specified +function analyze_typevar(ctx, ex) + k = kind(ex) + if k == K"Identifier" + (ex, nothing, nothing) + elseif k == K"comparison" && numchildren(ex) == 5 + kind(ex[3]) == K"Identifier" || throw(LoweringError(ex[3], "expected type name")) + if !((kind(ex[2]) == K"Identifier" && ex[2].name_val == "<:") && + (kind(ex[4]) == K"Identifier" && ex[4].name_val == "<:")) + throw(LoweringError(ex, "invalid type bounds")) + end + # a <: b <: c + (ex[3], ex[1], ex[5]) + elseif k == K"<:" && numchildren(ex) == 2 + kind(ex[1]) == K"Identifier" || throw(LoweringError(ex[1], "expected type name")) + (ex[1], nothing, ex[2]) + elseif k == K">:" && numchildren(ex) == 2 + kind(ex[2]) == K"Identifier" || throw(LoweringError(ex[2], "expected type name")) + (ex[1], ex[2], nothing) + else + throw(LoweringError(ex, "expected type name or type bounds")) + end +end + +function bounds_to_TypeVar(ctx, srcref, bounds) + name, lb, ub = bounds + # Generate call to one of + # TypeVar(name) + # TypeVar(name, ub) + # TypeVar(name, lb, ub) + @ast ctx srcref [K"call" + "TypeVar"::K"core" + name=>K"Symbol" + lb + if isnothing(ub) && !isnothing(lb) + "Any"::K"core" + else + ub + end + ] +end + +# Analyze type signatures such as `A{C} <: B where C` +# +# Return (name, typevar_names, typevar_stmts, supertype) where +# - `name` is the name of the type +# - `supertype` is the super type of the type +function analyze_type_sig(ctx, ex) + k = kind(ex) + if k == K"Identifier" + name = ex + type_params = () + supertype = @ast ctx ex "Any"::K"core" + elseif k == K"curly" && numchildren(ex) >= 1 && kind(ex[1]) == K"Identifier" + # name{type_params} + name = ex[1] + type_params = ex[2:end] + supertype = @ast ctx ex "Any"::K"core" + elseif k == K"<:" && numchildren(ex) == 2 + if kind(ex[1]) == K"Identifier" + name = ex[1] + type_params = () + supertype = ex[2] + elseif kind(ex[1]) == K"curly" && numchildren(ex[1]) >= 1 && kind(ex[1][1]) == K"Identifier" + name = ex[1][1] + type_params = ex[1][2:end] + supertype = ex[2] + end + end + @isdefined(name) || throw(LoweringError(ex, "invalid type signature")) + + return (name, type_params, supertype) +end + +# Expand type_params into (typevar_names, typevar_stmts) where +# - `typevar_names` are the names of the type's type parameters +# - `typevar_stmts` are a list of statements to define a `TypeVar` for each parameter +# name in `typevar_names`, to be emitted prior to uses of `typevar_names`. +# There is exactly one statement from each typevar. +function expand_typevars!(ctx, typevar_names, typevar_stmts, type_params) + for param in type_params + bounds = analyze_typevar(ctx, param) + n = bounds[1] + push!(typevar_names, n) + push!(typevar_stmts, @ast ctx param [K"block" + [K"local" n] + [K"=" n bounds_to_TypeVar(ctx, param, bounds)] + ]) + end + return nothing +end + +function expand_typevars(ctx, type_params) + typevar_names = SyntaxList(ctx) + typevar_stmts = SyntaxList(ctx) + expand_typevars!(ctx, typevar_names, typevar_stmts, type_params) + return (typevar_names, typevar_stmts) +end + +function expand_abstract_or_primitive_type(ctx, ex) + is_abstract = kind(ex) == K"abstract" + if is_abstract + @chk numchildren(ex) == 1 + else + @assert kind(ex) == K"primitive" + @chk numchildren(ex) == 2 + nbits = ex[2] + end + name, type_params, supertype = analyze_type_sig(ctx, ex[1]) + typevar_names, typevar_stmts = expand_typevars(ctx, type_params) + newtype_var = ssavar(ctx, ex, "new_type") + @ast ctx ex [K"block" + [K"scope_block"(scope_type=:hard) + [K"block" + [K"local" name] + [K"always_defined" name] + typevar_stmts... + [K"=" + newtype_var + [K"call" + (is_abstract ? "_abstracttype" : "_primitivetype")::K"core" + ctx.mod::K"Value" + name=>K"Symbol" + [K"call" "svec"::K"core" typevar_names...] + if !is_abstract + nbits + end + ] + ] + [K"=" name newtype_var] + [K"call" "_setsuper!"::K"core" newtype_var supertype] + [K"call" "_typebody!"::K"core" false::K"Bool" name] + ] + ] + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] + [K"global" name] + [K"if" + [K"&&" + [K"call" + "isdefinedglobal"::K"core" + ctx.mod::K"Value" + name=>K"Symbol" + false::K"Bool"] + [K"call" "_equiv_typedef"::K"core" name newtype_var] + ] + nothing_(ctx, ex) + [K"constdecl" name newtype_var] + ] + nothing_(ctx, ex) + ] +end + +function _match_struct_field(x0) + type=nothing + docs=nothing + atomic=false + _const=false + x = x0 + while true + k = kind(x) + if k == K"Identifier" + return (name=x, type=type, atomic=atomic, _const=_const, docs=docs) + elseif k == K"::" && numchildren(x) == 2 + isnothing(type) || throw(LoweringError(x0, "multiple types in struct field")) + type = x[2] + x = x[1] + elseif k == K"atomic" + atomic = true + x = x[1] + elseif k == K"const" + _const = true + x = x[1] + elseif k == K"doc" + docs = x[1] + x = x[2] + else + return nothing + end + end +end + +function _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, inner_defs, exs) + for e in exs + if kind(e) == K"block" + _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, + inner_defs, children(e)) + elseif kind(e) == K"=" + throw(LoweringError(e, "assignment syntax in structure fields is reserved")) + else + m = _match_struct_field(e) + if !isnothing(m) + # Struct field + push!(field_names, m.name) + n = length(field_names) + push!(field_types, isnothing(m.type) ? @ast(ctx, e, "Any"::K"core") : m.type) + if m.atomic + push!(field_attrs, @ast ctx e n::K"Integer") + push!(field_attrs, @ast ctx e "atomic"::K"Symbol") + end + if m._const + push!(field_attrs, @ast ctx e n::K"Integer") + push!(field_attrs, @ast ctx e "const"::K"Symbol") + end + if !isnothing(m.docs) + push!(field_docs, @ast ctx e n::K"Integer") + push!(field_docs, @ast ctx e m.docs) + end + else + # Inner constructors and inner functions + # TODO: Disallow arbitrary expressions inside `struct`? + push!(inner_defs, e) + end + end + end +end + +# generate call to `convert()` for `(call new ...)` expressions +function _new_call_convert_arg(ctx, full_struct_type, field_type, field_index, val) + if is_core_Any(field_type) + return val + end + # kt = kind(field_type) + # TODO: Allow kt == K"Identifier" && kt in static_params to avoid fieldtype call? + @ast ctx field_type [K"block" + tmp_type := [K"call" + "fieldtype"::K"core" + full_struct_type + field_index::K"Integer" + ] + convert_for_type_decl(ctx, field_type, val, tmp_type, false) + ] +end + +function default_inner_constructors(ctx, srcref, global_struct_name, + typevar_names, typevar_stmts, field_names, field_types) + # TODO: Consider using srcref = @HERE ? + exact_ctor = if isempty(typevar_names) + # Definition with exact types for all arguments + field_decls = SyntaxList(ctx) + @ast ctx srcref [K"function" + [K"call" + [K"::" [K"curly" "Type"::K"core" global_struct_name]] + [[K"::" n t] for (n,t) in zip(field_names, field_types)]... + ] + [K"new" + global_struct_name + field_names... + ] + ] + end + maybe_non_Any_field_types = filter(!is_core_Any, field_types) + converting_ctor = if !isempty(typevar_names) || !isempty(maybe_non_Any_field_types) + # Definition which takes `Any` for all arguments and uses + # `Base.convert()` to convert those to the exact field type. Only + # defined if at least one field type is not Any. + ctor_self = new_local_binding(ctx, srcref, "#ctor-self#"; kind=:argument) + @ast ctx srcref [K"function" + [K"call" + [K"::" + ctor_self + if isempty(typevar_names) + [K"curly" "Type"::K"core" global_struct_name] + else + [K"where" + [K"curly" + "Type"::K"core" + [K"curly" + global_struct_name + typevar_names... + ] + ] + [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]] + ] + end + ] + field_names... + ] + [K"block" + [K"new" + ctor_self + [_new_call_convert_arg(ctx, ctor_self, type, i, name) + for (i, (name,type)) in enumerate(zip(field_names, field_types))]... + ] + ] + ] + end + if isnothing(exact_ctor) + converting_ctor + else + if isnothing(converting_ctor) + exact_ctor + else + @ast ctx srcref [K"block" + [K"if" + # Only define converting_ctor if at least one field type is not Any. + mapfoldl(t -> [K"call" "==="::K"core" "Any"::K"core" t], + (t,u) -> [K"&&" u t], + maybe_non_Any_field_types) + [K"block"] + converting_ctor + ] + exact_ctor + ] + end + end +end + +# Generate outer constructor for structs with type parameters. Eg, for +# struct X{U,V} +# x::U +# y::V +# end +# +# We basically generate +# function (::Type{X})(x::U, y::V) where {U,V} +# new(X{U,V}, x, y) +# end +# +function default_outer_constructor(ctx, srcref, global_struct_name, + typevar_names, typevar_stmts, field_names, field_types) + @ast ctx srcref [K"function" + [K"where" + [K"call" + # We use `::Type{$global_struct_name}` here rather than just + # `struct_name` because global_struct_name is a binding to a + # type - we know we're not creating a new `Function` and + # there's no reason to emit the 1-arg `Expr(:method, name)` in + # the next phase of expansion. + [K"::" [K"curly" "Type"::K"core" global_struct_name]] + [[K"::" n t] for (n,t) in zip(field_names, field_types)]... + ] + [K"_typevars" [K"block" typevar_names...] [K"block" typevar_stmts...]] + ] + [K"new" [K"curly" global_struct_name typevar_names...] field_names...] + ] +end + +function _is_new_call(ex) + kind(ex) == K"call" && + ((kind(ex[1]) == K"Identifier" && ex[1].name_val == "new") || + (kind(ex[1]) == K"curly" && kind(ex[1][1]) == K"Identifier" && ex[1][1].name_val == "new")) +end + +# Rewrite inner constructor signatures for struct `X` from `X(...)` +# to `(ctor_self::Type{X})(...)` +function _rewrite_ctor_sig(ctx, callex, struct_name, global_struct_name, struct_typevars, ctor_self) + @assert kind(callex) == K"call" + name = callex[1] + if is_same_identifier_like(struct_name, name) + # X(x,y) ==> (#ctor-self#::Type{X})(x,y) + ctor_self[] = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument) + @ast ctx callex [K"call" + [K"::" + ctor_self[] + [K"curly" "Type"::K"core" global_struct_name] + ] + callex[2:end]... + ] + elseif kind(name) == K"curly" && is_same_identifier_like(struct_name, name[1]) + # X{T}(x,y) ==> (#ctor-self#::Type{X{T}})(x,y) + self = new_local_binding(ctx, callex, "#ctor-self#"; kind=:argument) + if numchildren(name) - 1 == length(struct_typevars) + # Self fully parameterized - can be used as the full type to + # rewrite new() calls in constructor body. + ctor_self[] = self + end + @ast ctx callex [K"call" + [K"::" + self + [K"curly" + "Type"::K"core" + [K"curly" + global_struct_name + name[2:end]... + ] + ] + ] + callex[2:end]... + ] + else + callex + end +end + +# Rewrite calls to `new` in bodies of inner constructors and inner functions +# into `new` or `splatnew` expressions. For example: +# +# struct X{T,S} +# X() = new() +# X() = new{A,B}() +# X{T,S}() where {T,S} = new() +# X{A,B}() = new() +# X{A}() = new() +# (t::Type{X})() = new{A,B}() +# f() = new() +# f() = new{A,B}() +# f() = new{Ts...}() +# end +# +# Map to the following +# +# X() = ERROR +# (#ctor-self#::Type{X})() = (new X{A,B}) +# (Type{X{T,S}}() where {T,S} = (new #ctor-self#) +# X{A,B}() = (new #ctor-self#) +# X{A}() = ERROR +# (t::Type{X})() = (new X{A,B}) +# f() = ERROR +# f() = (new X{A,B}) +# f() = (new X{Ts...}) +# +# TODO: Arguably the following "could also work", but any symbolic match of +# this case would be heuristic and rely on assuming Type == Core.Type. So +# runtime checks would really be required and flisp lowering doesn't catch +# this case either. +# +# (t::Type{X{A,B}})() = new() +function _rewrite_ctor_new_calls(ctx, ex, struct_name, global_struct_name, ctor_self, + struct_typevars, field_types) + if is_leaf(ex) + return ex + elseif !_is_new_call(ex) + return mapchildren( + e->_rewrite_ctor_new_calls(ctx, e, struct_name, global_struct_name, + ctor_self, struct_typevars, field_types), + ctx, ex + ) + end + # Rewrite a call to new() + kw_arg_i = findfirst(e->(k = kind(e); k == K"=" || k == K"parameters"), children(ex)) + if !isnothing(kw_arg_i) + throw(LoweringError(ex[kw_arg_i], "`new` does not accept keyword arguments")) + end + full_struct_type = if kind(ex[1]) == K"curly" + # new{A,B}(...) + new_type_params = ex[1][2:end] + n_type_splat = sum(kind(t) == K"..." for t in new_type_params; init=0) + n_type_nonsplat = length(new_type_params) - n_type_splat + if n_type_splat == 0 && n_type_nonsplat < length(struct_typevars) + throw(LoweringError(ex[1], "too few type parameters specified in `new{...}`")) + elseif n_type_nonsplat > length(struct_typevars) + throw(LoweringError(ex[1], "too many type parameters specified in `new{...}`")) + end + @ast ctx ex[1] [K"curly" global_struct_name new_type_params...] + elseif !isnothing(ctor_self) + # new(...) in constructors + ctor_self + else + # new(...) inside non-constructor inner functions + if isempty(struct_typevars) + global_struct_name + else + throw(LoweringError(ex[1], "too few type parameters specified in `new`")) + end + end + new_args = ex[2:end] + n_splat = sum(kind(t) == K"..." for t in new_args; init=0) + n_nonsplat = length(new_args) - n_splat + n_fields = length(field_types) + function throw_n_fields_error(desc) + @ast ctx ex [K"call" + "throw"::K"core" + [K"call" + "ArgumentError"::K"top" + "too $desc arguments in `new` (expected $n_fields)"::K"String" + ] + ] + end + if n_nonsplat > n_fields + return throw_n_fields_error("many") + else + # "Too few" args are allowed in partially initialized structs + end + if n_splat == 0 + @ast ctx ex [K"block" + struct_type := full_struct_type + [K"new" + struct_type + [_new_call_convert_arg(ctx, struct_type, type, i, name) + for (i, (name,type)) in enumerate(zip(ex[2:end], field_types))]... + ] + ] + else + fields_all_Any = all(is_core_Any, field_types) + if fields_all_Any + @ast ctx ex [K"block" + struct_type := full_struct_type + [K"splatnew" + struct_type + # Note: `jl_new_structt` ensures length of this tuple is + # exactly the number of fields. + [K"call" "tuple"::K"core" ex[2:end]...] + ] + ] + else + # `new` with splatted args which are symbolically not `Core.Any` + # (might be `Any` at runtime but we can't know that here.) + @ast ctx ex [K"block" + args := [K"call" "tuple"::K"core" ex[2:end]...] + n_args := [K"call" "nfields"::K"core" args] + [K"if" + [K"call" "ult_int"::K"top" n_args n_fields::K"Integer"] + throw_n_fields_error("few") + ] + [K"if" + [K"call" "ult_int"::K"top" n_fields::K"Integer" n_args] + throw_n_fields_error("many") + ] + struct_type := full_struct_type + [K"new" + struct_type + [_new_call_convert_arg(ctx, struct_type, type, i, + [K"call" "getfield"::K"core" args i::K"Integer"]) + for (i, type) in enumerate(field_types)]... + ] + ] + end + end +end + +# Rewrite calls to `new( ... )` to `new` expressions on the appropriate +# type, determined by the containing type and constructor definitions. +# +# This is mainly for constructors, but also needs to work for inner functions +# which may call new() but are not constructors. +function rewrite_new_calls(ctx, ex, struct_name, global_struct_name, + typevar_names, field_names, field_types) + if kind(ex) == K"doc" + docs = ex[1] + ex = ex[2] + else + docs = nothing + end + if kind(ex) != K"function" + return ex + end + if !(numchildren(ex) == 2 && is_eventually_call(ex[1])) + throw(LoweringError(ex, "Expected constructor or named inner function")) + end + + ctor_self = Ref{Union{Nothing,SyntaxTree}}(nothing) + expand_function_def(ctx, ex, docs, + callex->_rewrite_ctor_sig(ctx, callex, struct_name, + global_struct_name, typevar_names, ctor_self), + body->_rewrite_ctor_new_calls(ctx, body, struct_name, global_struct_name, + ctor_self[], typevar_names, field_types) + ) +end + +function _constructor_min_initialized(ex::SyntaxTree) + if _is_new_call(ex) + if any(kind(e) == K"..." for e in ex[2:end]) + # Lowering ensures new with splats always inits all fields + # or in the case of splatnew this is enforced by the runtime. + typemax(Int) + else + numchildren(ex) - 1 + end + elseif !is_leaf(ex) + minimum((_constructor_min_initialized(e) for e in children(ex)), init=typemax(Int)) + else + typemax(Int) + end +end + +# Let S be a struct we're defining in module M. Below is a hack to allow its +# field types to refer to S as M.S. See #56497. +function insert_struct_shim(ctx, fieldtypes, name) + function replace_type(ex) + if kind(ex) == K"." && + numchildren(ex) == 2 && + kind(ex[2]) == K"Symbol" && + ex[2].name_val == name.name_val + @ast ctx ex [K"call" "struct_name_shim"::K"core" ex[1] ex[2] ctx.mod::K"Value" name] + elseif numchildren(ex) > 0 + mapchildren(replace_type, ctx, ex) + else + ex + end + end + map(replace_type, fieldtypes) +end + +function expand_struct_def(ctx, ex, docs) + @chk numchildren(ex) == 2 + type_sig = ex[1] + type_body = ex[2] + if kind(type_body) != K"block" + throw(LoweringError(type_body, "expected block for `struct` fields")) + end + struct_name, type_params, supertype = analyze_type_sig(ctx, type_sig) + typevar_names, typevar_stmts = expand_typevars(ctx, type_params) + field_names = SyntaxList(ctx) + field_types = SyntaxList(ctx) + field_attrs = SyntaxList(ctx) + field_docs = SyntaxList(ctx) + inner_defs = SyntaxList(ctx) + _collect_struct_fields(ctx, field_names, field_types, field_attrs, field_docs, + inner_defs, children(type_body)) + is_mutable = has_flags(ex, JuliaSyntax.MUTABLE_FLAG) + min_initialized = minimum((_constructor_min_initialized(e) for e in inner_defs), + init=length(field_names)) + newtype_var = ssavar(ctx, ex, "struct_type") + hasprev = ssavar(ctx, ex, "hasprev") + prev = ssavar(ctx, ex, "prev") + newdef = ssavar(ctx, ex, "newdef") + layer = new_scope_layer(ctx, struct_name) + global_struct_name = adopt_scope(struct_name, layer) + if !isempty(typevar_names) + # Generate expression like `prev_struct.body.body.parameters` + prev_typevars = global_struct_name + for _ in 1:length(typevar_names) + prev_typevars = @ast ctx type_sig [K"." prev_typevars "body"::K"Symbol"] + end + prev_typevars = @ast ctx type_sig [K"." prev_typevars "parameters"::K"Symbol"] + end + + # New local variable names for constructor args to avoid clashing with any + # type names + if isempty(inner_defs) + field_names_2 = adopt_scope(field_names, layer) + end + + need_outer_constructor = false + if isempty(inner_defs) && !isempty(typevar_names) + # To generate an outer constructor each struct type parameter must be + # able to be inferred from the list of fields passed as constructor + # arguments. + # + # More precisely, it must occur in a field type, or in the bounds of a + # subsequent type parameter. For example the following won't work + # struct X{T} + # a::Int + # end + # X(a::Int) where T = #... construct X{T} ?? + # + # But the following does + # struct X{T} + # a::T + # end + # X(a::T) where {T} = # construct X{typeof(a)}(a) + need_outer_constructor = true + for i in 1:length(typevar_names) + typevar_name = typevar_names[i] + typevar_in_fields = any(contains_identifier(ft, typevar_name) for ft in field_types) + if !typevar_in_fields + typevar_in_bounds = any(type_params[i+1:end]) do param + # Check the bounds of subsequent type params + (_,lb,ub) = analyze_typevar(ctx, param) + # todo: flisp lowering tests `lb` here so we also do. But + # in practice this doesn't seem to constrain `typevar_name` + # and the generated constructor doesn't work? + (!isnothing(ub) && contains_identifier(ub, typevar_name)) || + (!isnothing(lb) && contains_identifier(lb, typevar_name)) + end + if !typevar_in_bounds + need_outer_constructor = false + break + end + end + end + end + + # The following lowering covers several subtle issues in the ordering of + # typevars when "redefining" structs. + # See https://github.com/JuliaLang/julia/pull/36121 + @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex] ] + [K"scope_block"(scope_type=:hard) + # Needed for later constdecl to work, though plain global form may be removed soon. + [K"global" global_struct_name] + [K"block" + [K"local" struct_name] + [K"always_defined" struct_name] + typevar_stmts... + [K"=" + newtype_var + [K"call" + "_structtype"::K"core" + ctx.mod::K"Value" + struct_name=>K"Symbol" + [K"call"(type_sig) "svec"::K"core" typevar_names...] + [K"call"(type_body) "svec"::K"core" [n=>K"Symbol" for n in field_names]...] + [K"call"(type_body) "svec"::K"core" field_attrs...] + is_mutable::K"Bool" + min_initialized::K"Integer" + ] + ] + [K"=" struct_name newtype_var] + [K"call"(supertype) "_setsuper!"::K"core" newtype_var supertype] + [K"=" hasprev + [K"&&" [K"call" "isdefinedglobal"::K"core" + ctx.mod::K"Value" + struct_name=>K"Symbol" + false::K"Bool"] + [K"call" "_equiv_typedef"::K"core" global_struct_name newtype_var] + ]] + [K"=" prev [K"if" hasprev global_struct_name false::K"Bool"]] + [K"if" hasprev + [K"block" + # if this is compatible with an old definition, use the old parameters, but the + # new object. This will fail to capture recursive cases, but the call to typebody! + # below is permitted to choose either type definition to put into the binding table + if !isempty(typevar_names) + # And resassign the typevar_names - these may be + # referenced in the definition of the field + # types below + [K"=" [K"tuple" typevar_names...] prev_typevars] + end + ] + ] + [K"=" newdef + [K"call"(type_body) + "_typebody!"::K"core" + prev + newtype_var + [K"call" "svec"::K"core" insert_struct_shim(ctx, field_types, struct_name)...] + ]] + [K"constdecl" + global_struct_name + newdef + ] + # Default constructors + if isempty(inner_defs) + default_inner_constructors(ctx, ex, global_struct_name, + typevar_names, typevar_stmts, field_names_2, field_types) + else + map!(inner_defs, inner_defs) do def + rewrite_new_calls(ctx, def, struct_name, global_struct_name, + typevar_names, field_names, field_types) + end + [K"block" inner_defs...] + end + if need_outer_constructor + default_outer_constructor(ctx, ex, global_struct_name, + typevar_names, typevar_stmts, field_names_2, field_types) + end + ] + ] + + # Documentation + if !isnothing(docs) || !isempty(field_docs) + [K"call"(isnothing(docs) ? ex : docs) + bind_docs!::K"Value" + struct_name + isnothing(docs) ? nothing_(ctx, ex) : docs[1] + ::K"SourceLocation"(ex) + [K"=" + "field_docs"::K"Identifier" + [K"call" "svec"::K"core" field_docs...] + ] + ] + end + nothing_(ctx, ex) + ] +end + +#------------------------------------------------------------------------------- +# Expand `where` syntax + +function expand_where(ctx, srcref, lhs, rhs) + bounds = analyze_typevar(ctx, rhs) + v = bounds[1] + @ast ctx srcref [K"let" + [K"block" [K"=" v bounds_to_TypeVar(ctx, srcref, bounds)]] + [K"call" "UnionAll"::K"core" v lhs] + ] +end + +function expand_wheres(ctx, ex) + body = ex[1] + rhs = ex[2] + if kind(rhs) == K"braces" + # S{X,Y} where {X,Y} + for r in reverse(children(rhs)) + body = expand_where(ctx, ex, body, r) + end + elseif kind(rhs) == K"_typevars" + # Eg, `S{X,Y} where {X, Y}` but with X and Y + # already allocated `TypeVar`s + for r in reverse(children(rhs[1])) + body = @ast ctx ex [K"call" "UnionAll"::K"core" r body] + end + else + # S{X} where X + body = expand_where(ctx, ex, body, rhs) + end + body +end + +# Match implicit where parameters for `Foo{<:Bar}` ==> `Foo{T} where T<:Bar` +function expand_curly(ctx, ex) + @assert kind(ex) == K"curly" + check_no_parameters(ex, "unexpected semicolon in type parameter list") + check_no_assignment(children(ex), "misplace assignment in type parameter list") + + typevar_stmts = SyntaxList(ctx) + type_args = SyntaxList(ctx) + implicit_typevars = SyntaxList(ctx) + + i = 1 + for e in children(ex) + k = kind(e) + if (k == K"<:" || k == K">:") && numchildren(e) == 1 + # `X{<:A}` and `X{>:A}` + name = @ast ctx e "#T$i"::K"Placeholder" + i += 1 + typevar = k == K"<:" ? + bounds_to_TypeVar(ctx, e, (name, nothing, e[1])) : + bounds_to_TypeVar(ctx, e, (name, e[1], nothing)) + arg = emit_assign_tmp(typevar_stmts, ctx, typevar) + push!(implicit_typevars, arg) + else + arg = e + end + push!(type_args, arg) + end + + type = @ast ctx ex [K"call" "apply_type"::K"core" type_args...] + if !isempty(implicit_typevars) + type = @ast ctx ex [K"block" + typevar_stmts... + [K"where" type [K"_typevars" [K"block" implicit_typevars...] [K"block" typevar_stmts...]]] + ] + end + + return type +end + +#------------------------------------------------------------------------------- +# Expand import / using / export + +function expand_importpath(path) + @chk kind(path) == K"importpath" + path_spec = Expr(:.) + prev_was_dot = true + for component in children(path) + k = kind(component) + if k == K"quote" + # Permit quoted path components as in + # import A.(:b).:c + component = component[1] + end + @chk kind(component) in (K"Identifier", K".") + name = component.name_val + is_dot = kind(component) == K"." + if is_dot && !prev_was_dot + throw(LoweringError(component, "invalid import path: `.` in identifier path")) + end + prev_was_dot = is_dot + push!(path_spec.args, Symbol(name)) + end + return path_spec +end + +function expand_import_or_using(ctx, ex) + if kind(ex[1]) == K":" + # import M: x.y as z, w + # (import (: (importpath M) (as (importpath x y) z) (importpath w))) + # => + # (call module_import + # false + # (call core.svec "M") + # (call core.svec 2 "x" "y" "z" 1 "w" "w")) + @chk numchildren(ex[1]) >= 2 + from = ex[1][1] + from_path = @ast ctx from QuoteNode(expand_importpath(from))::K"Value" + paths = ex[1][2:end] + else + # import A.B + # (using (importpath A B)) + # (call eval_import true nothing (call core.svec 1 "w")) + @chk numchildren(ex) >= 1 + from_path = nothing + paths = children(ex) + end + # Here we represent the paths as quoted `Expr` data structures + path_specs = SyntaxList(ctx) + for spec in paths + as_name = nothing + if kind(spec) == K"as" + @chk numchildren(spec) == 2 + @chk kind(spec[2]) == K"Identifier" + as_name = Symbol(spec[2].name_val) + path = QuoteNode(Expr(:as, expand_importpath(spec[1]), as_name)) + else + path = QuoteNode(expand_importpath(spec)) + end + push!(path_specs, @ast ctx spec path::K"Value") + end + is_using = kind(ex) == K"using" + stmts = SyntaxList(ctx) + if isnothing(from_path) + for spec in path_specs + if is_using + push!(stmts, + @ast ctx spec [K"call" + eval_using ::K"Value" + ctx.mod ::K"Value" + spec + ] + ) + else + push!(stmts, + @ast ctx spec [K"call" + eval_import ::K"Value" + (!is_using) ::K"Bool" + ctx.mod ::K"Value" + "nothing" ::K"top" + spec + ] + ) + end + # latestworld required between imports so that previous symbols + # become visible + push!(stmts, @ast ctx spec (::K"latestworld")) + end + else + push!(stmts, @ast ctx ex [K"call" + eval_import ::K"Value" + (!is_using) ::K"Bool" + ctx.mod ::K"Value" + from_path + path_specs... + ]) + push!(stmts, @ast ctx ex (::K"latestworld")) + end + @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + stmts... + [K"removable" "nothing"::K"core"] + ] +end + +# Expand `public` or `export` +function expand_public(ctx, ex) + identifiers = String[] + for e in children(ex) + @chk kind(e) == K"Identifier" (ex, "Expected identifier") + push!(identifiers, e.name_val) + end + (e.name_val::K"String" for e in children(ex)) + @ast ctx ex [K"call" + eval_public::K"Value" + ctx.mod::K"Value" + (kind(ex) == K"export")::K"Bool" + identifiers::K"Value" + ] +end + +#------------------------------------------------------------------------------- +# Expand docstring-annotated expressions + +function expand_doc(ctx, ex, docex, mod=ctx.mod) + if kind(ex) in (K"Identifier", K".") + expand_forms_2(ctx, @ast ctx docex [K"call" + bind_static_docs!::K"Value" + (kind(ex) === K"." ? ex[1] : ctx.mod::K"Value") + (kind(ex) === K"." ? ex[2] : ex).name_val::K"Symbol" + docex[1] + ::K"SourceLocation"(ex) + Union{}::K"Value" + ]) + elseif is_eventually_call(ex) + expand_function_def(ctx, @ast(ctx, ex, [K"function" ex [K"block"]]), + docex; doc_only=true) + else + expand_forms_2(ctx, ex, docex) + end +end + +#------------------------------------------------------------------------------- +# Desugaring's "big switch": expansion of some simple forms; dispatch to other +# expansion functions for the rest. + +""" +Lowering pass 2 - desugaring + +This pass simplifies expressions by expanding complicated syntax sugar into a +small set of core syntactic forms. For example, field access syntax `a.b` is +expanded to a function call `getproperty(a, :b)`. +""" +function expand_forms_2(ctx::DesugaringContext, ex::SyntaxTree, docs=nothing) + k = kind(ex) + if k == K"atomic" + throw(LoweringError(ex, "unimplemented or unsupported atomic declaration")) + elseif k == K"call" + expand_call(ctx, ex) + elseif k == K"dotcall" || k == K".&&" || k == K".||" || k == K".=" + expand_forms_2(ctx, expand_fuse_broadcast(ctx, ex)) + elseif k == K"." + expand_forms_2(ctx, expand_dot(ctx, ex)) + elseif k == K"?" + @chk numchildren(ex) == 3 + expand_forms_2(ctx, @ast ctx ex [K"if" children(ex)...]) + elseif k == K"&&" || k == K"||" + @chk numchildren(ex) > 1 + cs = expand_cond_children(ctx, ex) + # Attributing correct provenance for `cs[1:end-1]` is tricky in cases + # like `a && (b && c)` because the expression constructed here arises + # from the source fragment `a && (b` which doesn't follow the tree + # structure. For now we attribute to the parent node. + cond = length(cs) == 2 ? + cs[1] : + makenode(ctx, ex, k, cs[1:end-1]) + # This transformation assumes the type assertion `cond::Bool` will be + # added by a later compiler pass (currently done in codegen) + if k == K"&&" + @ast ctx ex [K"if" cond cs[end] false::K"Bool"] + else + @ast ctx ex [K"if" cond true::K"Bool" cs[end]] + end + elseif k == K"::" + @chk numchildren(ex) == 2 "`::` must be written `value::type` outside function argument lists" + @ast ctx ex [K"call" + "typeassert"::K"core" + expand_forms_2(ctx, ex[1]) + expand_forms_2(ctx, ex[2]) + ] + elseif k == K"<:" || k == K">:" || k == K"-->" + expand_forms_2(ctx, @ast ctx ex [K"call" + adopt_scope(string(k)::K"Identifier", ex) + children(ex)... + ]) + elseif k == K"op=" || k == K".op=" + expand_forms_2(ctx, expand_update_operator(ctx, ex)) + elseif k == K"=" + expand_assignment(ctx, ex) + elseif k == K"break" + numchildren(ex) > 0 ? ex : + @ast ctx ex [K"break" "loop_exit"::K"symbolic_label"] + elseif k == K"continue" + @ast ctx ex [K"break" "loop_cont"::K"symbolic_label"] + elseif k == K"comparison" + expand_forms_2(ctx, expand_compare_chain(ctx, ex)) + elseif k == K"doc" + @chk numchildren(ex) == 2 + expand_doc(ctx, ex[2], ex) + elseif k == K"for" + expand_forms_2(ctx, expand_for(ctx, ex)) + elseif k == K"comprehension" + @chk numchildren(ex) == 1 + @chk kind(ex[1]) == K"generator" + @ast ctx ex [K"call" + "collect"::K"top" + expand_forms_2(ctx, ex[1]) + ] + elseif k == K"typed_comprehension" + @chk numchildren(ex) == 2 + @chk kind(ex[2]) == K"generator" + if numchildren(ex[2]) == 2 && kind(ex[2][2]) == K"iteration" + # Hack to lower simple typed comprehensions to loops very early, + # greatly reducing the number of functions and load on the compiler + expand_forms_2(ctx, expand_comprehension_to_loops(ctx, ex)) + else + @ast ctx ex [K"call" + "collect"::K"top" + expand_forms_2(ctx, ex[1]) + expand_forms_2(ctx, ex[2]) + ] + end + elseif k == K"generator" + expand_forms_2(ctx, expand_generator(ctx, ex)) + elseif k == K"->" || k == K"do" + expand_forms_2(ctx, expand_arrow(ctx, ex)) + elseif k == K"function" + expand_forms_2(ctx, expand_function_def(ctx, ex, docs)) + elseif k == K"macro" + @ast ctx ex [K"block" + [K"assert" + "global_toplevel_only"::K"Symbol" + [K"inert" ex] + ] + expand_forms_2(ctx, expand_macro_def(ctx, ex)) + ] + elseif k == K"if" || k == K"elseif" + @chk numchildren(ex) >= 2 + @ast ctx ex [k + expand_condition(ctx, ex[1]) + expand_forms_2(ctx, ex[2:end])... + ] + elseif k == K"let" + expand_forms_2(ctx, expand_let(ctx, ex)) + elseif k == K"const" + expand_const_decl(ctx, ex) + elseif k == K"local" || k == K"global" + if k == K"global" && kind(ex[1]) == K"const" + # Normalize `global const` to `const global` + expand_const_decl(ctx, @ast ctx ex [K"const" [K"global" ex[1][1]]]) + else + expand_decls(ctx, ex) + end + elseif k == K"where" + expand_forms_2(ctx, expand_wheres(ctx, ex)) + elseif k == K"braces" || k == K"bracescat" + throw(LoweringError(ex, "{ } syntax is reserved for future use")) + elseif k == K"string" + if numchildren(ex) == 1 && kind(ex[1]) == K"String" + ex[1] + else + @ast ctx ex [K"call" + "string"::K"top" + expand_forms_2(ctx, children(ex))... + ] + end + elseif k == K"try" + expand_forms_2(ctx, expand_try(ctx, ex)) + elseif k == K"tuple" + if has_parameters(ex) + if numchildren(ex) > 1 + throw(LoweringError(ex[end], "unexpected semicolon in tuple - use `,` to separate tuple elements")) + end + expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex[1]))) + elseif any_assignment(children(ex)) + expand_forms_2(ctx, expand_named_tuple(ctx, ex, children(ex))) + else + expand_forms_2(ctx, @ast ctx ex [K"call" + "tuple"::K"core" + children(ex)... + ]) + end + elseif k == K"$" + throw(LoweringError(ex, "`\$` expression outside string or quote block")) + elseif k == K"module" + throw(LoweringError(ex, "`module` is only allowed at top level")) + elseif k == K"import" || k == K"using" + expand_import_or_using(ctx, ex) + elseif k == K"export" || k == K"public" + expand_public(ctx, ex) + elseif k == K"abstract" || k == K"primitive" + expand_forms_2(ctx, expand_abstract_or_primitive_type(ctx, ex)) + elseif k == K"struct" + expand_forms_2(ctx, expand_struct_def(ctx, ex, docs)) + elseif k == K"ref" + sctx = with_stmts(ctx) + (arr, idxs) = expand_ref_components(sctx, ex) + expand_forms_2(ctx, + @ast ctx ex [K"block" + sctx.stmts... + [K"call" + "getindex"::K"top" + arr + idxs... + ] + ] + ) + elseif k == K"curly" + expand_forms_2(ctx, expand_curly(ctx, ex)) + elseif k == K"toplevel" + # The toplevel form can't be lowered here - it needs to just be quoted + # and passed through to a call to eval. + ex2 = @ast ctx ex [K"block" + [K"assert" "toplevel_only"::K"Symbol" [K"inert" ex]] + [K"call" + eval ::K"Value" + ctx.mod ::K"Value" + [K"inert" ex] + [K"parameters" + [K"=" + "expr_compat_mode"::K"Identifier" + ctx.expr_compat_mode::K"Bool" + ] + ] + ] + ] + expand_forms_2(ctx, ex2) + elseif k == K"vect" + check_no_parameters(ex, "unexpected semicolon in array expression") + expand_array(ctx, ex, "vect") + elseif k == K"hcat" + expand_array(ctx, ex, "hcat") + elseif k == K"typed_hcat" + expand_array(ctx, ex, "typed_hcat") + elseif k == K"opaque_closure" + expand_forms_2(ctx, expand_opaque_closure(ctx, ex)) + elseif k == K"vcat" || k == K"typed_vcat" + expand_forms_2(ctx, expand_vcat(ctx, ex)) + elseif k == K"ncat" || k == K"typed_ncat" + expand_forms_2(ctx, expand_ncat(ctx, ex)) + elseif k == K"while" + @chk numchildren(ex) == 2 + @ast ctx ex [K"break_block" "loop_exit"::K"symbolic_label" + [K"_while" + expand_condition(ctx, ex[1]) + [K"break_block" "loop_cont"::K"symbolic_label" + [K"scope_block"(scope_type=:neutral) + expand_forms_2(ctx, ex[2]) + ] + ] + ] + ] + elseif k == K"inert" + ex + elseif k == K"gc_preserve" + s = ssavar(ctx, ex) + r = ssavar(ctx, ex) + @ast ctx ex [K"block" + s := [K"gc_preserve_begin" children(ex)[2:end]...] + r := expand_forms_2(ctx, children(ex)[1]) + [K"gc_preserve_end" s] + r + ] + elseif k == K"&" + throw(LoweringError(ex, "invalid syntax")) + elseif k == K"$" + throw(LoweringError(ex, "`\$` expression outside string or quote")) + elseif k == K"..." + throw(LoweringError(ex, "`...` expression outside call")) + elseif is_leaf(ex) + ex + elseif k == K"return" + if numchildren(ex) == 0 + @ast ctx ex [K"return" "nothing"::K"core"] + elseif numchildren(ex) == 1 + mapchildren(e->expand_forms_2(ctx,e), ctx, ex) + else + throw(LoweringError(ex, "More than one argument to return")) + end + else + mapchildren(e->expand_forms_2(ctx,e), ctx, ex) + end +end + +function expand_forms_2(ctx::DesugaringContext, exs::Union{Tuple,AbstractVector}) + res = SyntaxList(ctx) + for e in exs + push!(res, expand_forms_2(ctx, e)) + end + res +end + +function expand_forms_2(ctx::StatementListCtx, args...) + expand_forms_2(ctx.ctx, args...) +end + +@fzone "JL: desugar" function expand_forms_2(ctx::MacroExpansionContext, ex::SyntaxTree) + ctx1 = DesugaringContext(ctx, ctx.expr_compat_mode) + ex1 = expand_forms_2(ctx1, reparent(ctx1, ex)) + ctx1, ex1 +end diff --git a/JuliaLowering/src/eval.jl b/JuliaLowering/src/eval.jl new file mode 100644 index 0000000000000..3add153881b9a --- /dev/null +++ b/JuliaLowering/src/eval.jl @@ -0,0 +1,568 @@ +# Non-incremental lowering API for non-toplevel non-module expressions. +# May be removed? + +function lower(mod::Module, ex0; expr_compat_mode=false, world=Base.get_world_counter()) + ctx1, ex1 = expand_forms_1( mod, ex0, expr_compat_mode, world) + ctx2, ex2 = expand_forms_2( ctx1, ex1) + ctx3, ex3 = resolve_scopes( ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir( ctx4, ex4) + ex5 +end + +function macroexpand(mod::Module, ex; expr_compat_mode=false, world=Base.get_world_counter()) + ctx1, ex1 = expand_forms_1(mod, ex, expr_compat_mode, world) + ex1 +end + +# Incremental lowering API which can manage toplevel and module expressions. +# +# This iteration API is oddly bespoke and arguably somewhat non-Julian for two +# reasons: +# +# * Lowering knows when new modules are required, and may request them with +# `:begin_module`. However `eval()` generates those modules so they need to +# be passed back into lowering. So we can't just use `Base.iterate()`. (Put a +# different way, we have a situation which is suited to coroutines but we +# don't want to use full Julia `Task`s for this.) +# * We might want to implement this `eval()` in Julia's C runtime code or early +# in bootstrap. Hence using SimpleVector and Symbol as the return values of +# `lower_step()` +# +# We might consider changing at least the second of these choices, depending on +# how we end up putting this into Base. + +struct LoweringIterator{GraphType} + ctx::MacroExpansionContext{GraphType} + todo::Vector{Tuple{SyntaxTree{GraphType}, Bool, Int}} +end + +function lower_init(ex::SyntaxTree, mod::Module, macro_world::UInt; expr_compat_mode::Bool=false) + graph = ensure_macro_attributes(syntax_graph(ex)) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) + ex = reparent(ctx, ex) + LoweringIterator{typeof(graph)}(ctx, [(ex, false, 0)]) +end + +function lower_step(iter, push_mod=nothing) + if !isnothing(push_mod) + push_layer!(iter.ctx, push_mod, false) + end + + if isempty(iter.todo) + return Core.svec(:done) + end + + ex, is_module_body, child_idx = pop!(iter.todo) + if child_idx > 0 + next_child = child_idx + 1 + if child_idx <= numchildren(ex) + push!(iter.todo, (ex, is_module_body, next_child)) + ex = ex[child_idx] + else + if is_module_body + pop_layer!(iter.ctx) + return Core.svec(:end_module) + else + return lower_step(iter) + end + end + end + + k = kind(ex) + if !(k in KSet"toplevel module") + ex = expand_forms_1(iter.ctx, ex) + k = kind(ex) + end + if k == K"toplevel" + push!(iter.todo, (ex, false, 1)) + return lower_step(iter) + elseif k == K"module" + name = ex[1] + if kind(name) != K"Identifier" + throw(LoweringError(name, "Expected module name")) + end + newmod_name = Symbol(name.name_val) + body = ex[2] + if kind(body) != K"block" + throw(LoweringError(body, "Expected block in module body")) + end + std_defs = !has_flags(ex, JuliaSyntax.BARE_MODULE_FLAG) + loc = source_location(LineNumberNode, ex) + push!(iter.todo, (body, true, 1)) + return Core.svec(:begin_module, newmod_name, std_defs, loc) + else + # Non macro expansion parts of lowering + ctx2, ex2 = expand_forms_2(iter.ctx, ex) + ctx3, ex3 = resolve_scopes(ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) + thunk = to_lowered_expr(ex5) + return Core.svec(:thunk, thunk) + end +end + + +#------------------------------------------------------------------------------- + +function codeinfo_has_image_globalref(@nospecialize(e)) + if e isa GlobalRef + return 0x00 !== @ccall jl_object_in_image(e.mod::Any)::UInt8 + elseif e isa Core.CodeInfo + return any(codeinfo_has_image_globalref, e.code) + else + return false + end +end + +_CodeInfo_need_ver = v"1.12.0-DEV.512" +if VERSION < _CodeInfo_need_ver + function _CodeInfo(args...) + error("Constructing a CodeInfo using JuliaLowering currently requires Julia version $_CodeInfo_need_ver or greater") + end +else + # debuginfo changed completely as of https://github.com/JuliaLang/julia/pull/52415 + # nargs / isva was added as of https://github.com/JuliaLang/julia/pull/54341 + # field rettype added in https://github.com/JuliaLang/julia/pull/54655 + # field has_image_globalref added in https://github.com/JuliaLang/julia/pull/57433 + # CodeInfo constructor. TODO: Should be in Core + let + fns = fieldnames(Core.CodeInfo) + fts = fieldtypes(Core.CodeInfo) + conversions = [:(convert($t, $n)) for (t,n) in zip(fts, fns)] + + expected_fns = (:code, :debuginfo, :ssavaluetypes, :ssaflags, :slotnames, :slotflags, :slottypes, :rettype, :parent, :edges, :min_world, :max_world, :method_for_inference_limit_heuristics, :nargs, :propagate_inbounds, :has_fcall, :has_image_globalref, :nospecializeinfer, :isva, :inlining, :constprop, :purity, :inlining_cost) + expected_fts = (Vector{Any}, Core.DebugInfo, Any, Vector{UInt32}, Vector{Symbol}, Vector{UInt8}, Any, Any, Any, Any, UInt64, UInt64, Any, UInt64, Bool, Bool, Bool, Bool, Bool, UInt8, UInt8, UInt16, UInt16) + + code = if fns != expected_fns + unexpected_fns = collect(setdiff(Set(fns), Set(expected_fns))) + missing_fns = collect(setdiff(Set(expected_fns), Set(fns))) + :(function _CodeInfo(args...) + error("Unrecognized CodeInfo fields: Maybe version $VERSION is too new for this version of JuliaLowering?" + * isempty(unexpected_fns) ? "" : "\nUnexpected fields found: $($unexpected_fns)" + * isempty(missing_fns) ? "" : "\nMissing fields: $($missing_fns)") + end) + elseif fts != expected_fts + :(function _CodeInfo(args...) + error("Unrecognized CodeInfo field types: Maybe version $VERSION is too new for this version of JuliaLowering?") + end) + else + :(function _CodeInfo($(fns...)) + $(Expr(:new, :(Core.CodeInfo), conversions...)) + end) + end + + Core.eval(@__MODULE__, code) + end +end + +function _compress_debuginfo(info) + filename, edges, codelocs = info + edges = Core.svec(map(_compress_debuginfo, edges)...) + codelocs = @ccall jl_compress_codelocs((-1)::Int32, codelocs::Any, + div(length(codelocs),3)::Csize_t)::String + Core.DebugInfo(Symbol(filename), nothing, edges, codelocs) +end + +function ir_debug_info_state(ex) + e1 = first(flattened_provenance(ex)) + topfile = filename(e1) + [(topfile, [], Vector{Int32}())] +end + +function add_ir_debug_info!(current_codelocs_stack, stmt) + locstk = [(filename(e), source_location(e)[1]) for e in flattened_provenance(stmt)] + for j in 1:length(locstk) + if j === 1 && current_codelocs_stack[j][1] != locstk[j][1] + # dilemma: the filename stack here shares no prefix with that of the + # previous statement, where differing filenames usually (j > 1) mean + # a different macro expansion has started at this statement. guess + # that both files are the same, and inherit the previous filename. + locstk[j] = (current_codelocs_stack[j][1], locstk[j][2]) + end + if j < length(current_codelocs_stack) && (j === length(locstk) || + current_codelocs_stack[j+1][1] != locstk[j+1][1]) + while j < length(current_codelocs_stack) + info = pop!(current_codelocs_stack) + push!(last(current_codelocs_stack)[2], info) + end + elseif j > length(current_codelocs_stack) + push!(current_codelocs_stack, (locstk[j][1], [], Vector{Int32}())) + end + end + @assert length(locstk) === length(current_codelocs_stack) + for (j, (file,line)) in enumerate(locstk) + fn, edges, codelocs = current_codelocs_stack[j] + @assert fn == file + if j < length(locstk) + edge_index = length(edges) + 1 + edge_codeloc_index = fld1(length(current_codelocs_stack[j+1][3]) + 1, 3) + else + edge_index = 0 + edge_codeloc_index = 0 + end + push!(codelocs, line) + push!(codelocs, edge_index) + push!(codelocs, edge_codeloc_index) + end +end + +function finish_ir_debug_info!(current_codelocs_stack) + while length(current_codelocs_stack) > 1 + info = pop!(current_codelocs_stack) + push!(last(current_codelocs_stack)[2], info) + end + + _compress_debuginfo(only(current_codelocs_stack)) +end + +# Convert SyntaxTree to the CodeInfo+Expr data structures understood by the +# Julia runtime +function to_code_info(ex::SyntaxTree, slots::Vector{Slot}, meta::CompileHints) + stmts = Any[] + + current_codelocs_stack = ir_debug_info_state(ex) + + nargs = sum((s.kind==:argument for s in slots), init=0) + slotnames = Vector{Symbol}(undef, length(slots)) + slot_rename_inds = Dict{String,Int}() + slotflags = Vector{UInt8}(undef, length(slots)) + for (i, slot) in enumerate(slots) + name = slot.name + # TODO: Do we actually want unique names here? The C code in + # `jl_new_code_info_from_ir` has logic to simplify gensym'd names and + # use the empty string for compiler-generated bindings. + ni = get(slot_rename_inds, name, 0) + slot_rename_inds[name] = ni + 1 + if ni > 0 + name = "$name@$ni" + end + sname = Symbol(name) + slotnames[i] = sname + slotflags[i] = # Inference | Codegen + slot.is_read << 3 | # SLOT_USED | jl_vinfo_sa + slot.is_single_assign << 4 | # SLOT_ASSIGNEDONCE | - + slot.is_maybe_undef << 5 | # SLOT_USEDUNDEF | jl_vinfo_usedundef + slot.is_called << 6 # SLOT_CALLED | - + if slot.is_nospecialize + # Ideally this should be a slot flag instead + add_ir_debug_info!(current_codelocs_stack, ex) + push!(stmts, Expr(:meta, :nospecialize, Core.SlotNumber(i))) + end + end + + stmt_offset = length(stmts) + for stmt in children(ex) + push!(stmts, _to_lowered_expr(stmt, stmt_offset)) + add_ir_debug_info!(current_codelocs_stack, stmt) + end + + debuginfo = finish_ir_debug_info!(current_codelocs_stack) + + has_image_globalref = any(codeinfo_has_image_globalref, stmts) + + # TODO: Set ssaflags based on call site annotations: + # - @inbounds annotations + # - call site @inline / @noinline + # - call site @assume_effects + ssaflags = zeros(UInt32, length(stmts)) + + propagate_inbounds = + get(meta, :propagate_inbounds, false) + # TODO: Set true if there's a foreigncall + has_fcall = false + nospecializeinfer = + get(meta, :nospecializeinfer, false) + inlining = + get(meta, :inline, false) ? 0x01 : + get(meta, :noinline, false) ? 0x02 : 0x00 + constprop = + get(meta, :aggressive_constprop, false) ? 0x01 : + get(meta, :no_constprop, false) ? 0x02 : 0x00 + purity = + let eo = get(meta, :purity, nothing) + isnothing(eo) ? 0x0000 : Base.encode_effects_override(eo) + end + + # The following CodeInfo fields always get their default values for + # uninferred code. + ssavaluetypes = length(stmts) # Why does the runtime code do this? + slottypes = nothing + parent = nothing + method_for_inference_limit_heuristics = nothing + edges = nothing + min_world = Csize_t(1) + max_world = typemax(Csize_t) + isva = false + inlining_cost = 0xffff + rettype = Any + + _CodeInfo( + stmts, + debuginfo, + ssavaluetypes, + ssaflags, + slotnames, + slotflags, + slottypes, + rettype, + parent, + edges, + min_world, + max_world, + method_for_inference_limit_heuristics, + nargs, + propagate_inbounds, + has_fcall, + has_image_globalref, + nospecializeinfer, + isva, + inlining, + constprop, + purity, + inlining_cost + ) +end + +@fzone "JL: to_lowered_expr" function to_lowered_expr(ex::SyntaxTree) + _to_lowered_expr(ex, 0) +end + +function _to_lowered_expr(ex::SyntaxTree, stmt_offset::Int) + k = kind(ex) + if is_literal(k) + ex.value + elseif k == K"core" + name = ex.name_val + if name == "cglobal" + # Inference expects cglobal as call argument to be `GlobalRef`, + # so we resolve that name as a symbol of `Core.Intrinsics` here. + # https://github.com/JuliaLang/julia/blob/7a8cd6e202f1d1216a6c0c0b928fb43a123cada8/Compiler/src/validation.jl#L87 + GlobalRef(Core.Intrinsics, :cglobal) + elseif name == "nothing" + # Translate Core.nothing into literal `nothing`s (flisp uses a + # special form (null) for this during desugaring, etc) + nothing + else + GlobalRef(Core, Symbol(name)) + end + elseif k == K"top" + GlobalRef(Base, Symbol(ex.name_val)) + elseif k == K"globalref" + GlobalRef(ex.mod, Symbol(ex.name_val)) + elseif k == K"Identifier" + # Implicitly refers to name in parent module + # TODO: Should we even have plain identifiers at this point or should + # they all effectively be resolved into GlobalRef earlier? + Symbol(ex.name_val) + elseif k == K"SourceLocation" + QuoteNode(source_location(LineNumberNode, ex)) + elseif k == K"Symbol" + QuoteNode(Symbol(ex.name_val)) + elseif k == K"slot" + Core.SlotNumber(ex.var_id) + elseif k == K"static_parameter" + Expr(:static_parameter, ex.var_id) + elseif k == K"SSAValue" + Core.SSAValue(ex.var_id + stmt_offset) + elseif k == K"return" + Core.ReturnNode(_to_lowered_expr(ex[1], stmt_offset)) + elseif k == K"inert" + e1 = ex[1] + getmeta(ex, :as_Expr, false) ? QuoteNode(Expr(e1)) : e1 + elseif k == K"code_info" + ir = to_code_info(ex[1], ex.slots, ex.meta) + if ex.is_toplevel_thunk + Expr(:thunk, ir) # TODO: Maybe nice to just return a CodeInfo here? + else + ir + end + elseif k == K"Value" + ex.value + elseif k == K"goto" + Core.GotoNode(ex[1].id + stmt_offset) + elseif k == K"gotoifnot" + Core.GotoIfNot(_to_lowered_expr(ex[1], stmt_offset), ex[2].id + stmt_offset) + elseif k == K"enter" + catch_idx = ex[1].id + numchildren(ex) == 1 ? + Core.EnterNode(catch_idx) : + Core.EnterNode(catch_idx, _to_lowered_expr(ex[2], stmt_offset)) + elseif k == K"method" + cs = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) + # Ad-hoc unwrapping to satisfy `Expr(:method)` expectations + cs1 = cs[1] + c1 = cs1 isa QuoteNode ? cs1.value : cs1 + Expr(:method, c1, cs[2:end]...) + elseif k == K"newvar" + Core.NewvarNode(_to_lowered_expr(ex[1], stmt_offset)) + elseif k == K"opaque_closure_method" + args = map(e->_to_lowered_expr(e, stmt_offset), children(ex)) + # opaque_closure_method has special non-evaluated semantics for the + # `functionloc` line number node so we need to undo a level of quoting + arg4 = args[4] + @assert arg4 isa QuoteNode + args[4] = arg4.value + Expr(:opaque_closure_method, args...) + elseif k == K"meta" + args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)] + # Unpack K"Symbol" QuoteNode as `Expr(:meta)` requires an identifier here. + arg1 = args[1] + @assert arg1 isa QuoteNode + args[1] = arg1.value + Expr(:meta, args...) + elseif k == K"static_eval" + @assert numchildren(ex) == 1 + _to_lowered_expr(ex[1], stmt_offset) + elseif k == K"cfunction" + args = Any[_to_lowered_expr(e, stmt_offset) for e in children(ex)] + if kind(ex[2]) == K"static_eval" + args[2] = QuoteNode(args[2]) + end + Expr(:cfunction, args...) + else + # Allowed forms according to https://docs.julialang.org/en/v1/devdocs/ast/ + # + # call invoke static_parameter `=` method struct_type abstract_type + # primitive_type global const new splatnew isdefined + # enter leave pop_exception inbounds boundscheck loopinfo copyast meta + # lambda + head = k == K"call" ? :call : + k == K"new" ? :new : + k == K"splatnew" ? :splatnew : + k == K"=" ? :(=) : + k == K"leave" ? :leave : + k == K"isdefined" ? :isdefined : + k == K"latestworld" ? :latestworld : + k == K"pop_exception" ? :pop_exception : + k == K"captured_local" ? :captured_local : + k == K"gc_preserve_begin" ? :gc_preserve_begin : + k == K"gc_preserve_end" ? :gc_preserve_end : + k == K"foreigncall" ? :foreigncall : + k == K"new_opaque_closure" ? :new_opaque_closure : + nothing + if isnothing(head) + throw(LoweringError(ex, "Unhandled form for kind $k")) + end + ret = Expr(head) + for e in children(ex) + push!(ret.args, _to_lowered_expr(e, stmt_offset)) + end + return ret + end +end + +#------------------------------------------------------------------------------- +# Our version of eval - should be upstreamed though? +@fzone "JL: eval" function eval(mod::Module, ex::SyntaxTree; + macro_world::UInt=Base.get_world_counter(), + opts...) + iter = lower_init(ex, mod, macro_world; opts...) + _eval(mod, iter) +end + +# Version of eval() taking `Expr` (or Expr tree leaves of any type) +function eval(mod::Module, ex; opts...) + eval(mod, expr_to_syntaxtree(ex); opts...) +end + +if VERSION >= v"1.13.0-DEV.1199" # https://github.com/JuliaLang/julia/pull/59604 + +function _eval(mod, iter) + modules = Module[] + new_mod = nothing + result = nothing + while true + thunk = lower_step(iter, new_mod)::Core.SimpleVector + new_mod = nothing + type = thunk[1]::Symbol + if type == :done + break + elseif type == :begin_module + push!(modules, mod) + filename = something(thunk[4].file, :none) + mod = @ccall jl_begin_new_module(mod::Any, thunk[2]::Symbol, thunk[3]::Cint, + filename::Cstring, thunk[4].line::Cint)::Module + new_mod = mod + elseif type == :end_module + @ccall jl_end_new_module(mod::Module)::Cvoid + result = mod + mod = pop!(modules) + else + @assert type == :thunk + result = Core.eval(mod, thunk[2]) + end + end + @assert isempty(modules) + return result +end + +else + +function _eval(mod, iter, new_mod=nothing) + in_new_mod = !isnothing(new_mod) + result = nothing + while true + thunk = lower_step(iter, new_mod)::Core.SimpleVector + new_mod = nothing + type = thunk[1]::Symbol + if type == :done + @assert !in_new_mod + break + elseif type == :begin_module + name = thunk[2]::Symbol + std_defs = thunk[3] + result = Core.eval(mod, + Expr(:module, std_defs, name, + Expr(:block, thunk[4], Expr(:call, m->_eval(m, iter, m), name))) + ) + elseif type == :end_module + @assert in_new_mod + return mod + else + @assert type == :thunk + result = Core.eval(mod, thunk[2]) + end + end + return result +end + +end + +""" + include(mod::Module, path::AbstractString) + +Evaluate the contents of the input source file in the global scope of module +`mod`. Every module (except those defined with baremodule) has its own +definition of `include()` omitting the `mod` argument, which evaluates the file +in that module. Returns the result of the last evaluated expression of the +input file. During including, a task-local include path is set to the directory +containing the file. Nested calls to include will search relative to that path. +This function is typically used to load source interactively, or to combine +files in packages that are broken into multiple source files. +""" +function include(mod::Module, path::AbstractString) + path, prev = Base._include_dependency(mod, path) + code = read(path, String) + tls = task_local_storage() + tls[:SOURCE_PATH] = path + try + return include_string(mod, code, path) + finally + if prev === nothing + delete!(tls, :SOURCE_PATH) + else + tls[:SOURCE_PATH] = prev + end + end +end + +""" + include_string(mod::Module, code::AbstractString, filename::AbstractString="string") + +Like `include`, except reads code from the given string rather than from a file. +""" +function include_string(mod::Module, code::AbstractString, filename::AbstractString="string"; + expr_compat_mode=false) + eval(mod, parseall(SyntaxTree, code; filename=filename); expr_compat_mode) +end diff --git a/JuliaLowering/src/hooks.jl b/JuliaLowering/src/hooks.jl new file mode 100644 index 0000000000000..d4aef14f13773 --- /dev/null +++ b/JuliaLowering/src/hooks.jl @@ -0,0 +1,62 @@ +""" +Becomes `Core._lower()` upon activating JuliaLowering. + +Returns an svec with the lowered code (usually expr) as its first element, and +(until integration is less experimental) whatever we want after it +""" +function core_lowering_hook(@nospecialize(code), mod::Module, + file="none", line=0, world=typemax(Csize_t), warn=false) + if !(code isa SyntaxTree || code isa Expr) + # e.g. LineNumberNode, integer... + return Core.svec(code) + end + + # TODO: fix in base + file = file isa Ptr{UInt8} ? unsafe_string(file) : file + line = !(line isa Int64) ? Int64(line) : line + + local st0 = nothing + try + st0 = code isa Expr ? expr_to_syntaxtree(code, LineNumberNode(line, file)) : code + if kind(st0) in KSet"toplevel module" + return Core.svec(code) + elseif kind(st0) === K"doc" && numchildren(st0) >= 2 && kind(st0[2]) === K"module" + # TODO: this ignores module docstrings for now + return Core.svec(Expr(st0[2])) + end + ctx1, st1 = expand_forms_1( mod, st0, true, world) + ctx2, st2 = expand_forms_2( ctx1, st1) + ctx3, st3 = resolve_scopes( ctx2, st2) + ctx4, st4 = convert_closures(ctx3, st3) + ctx5, st5 = linearize_ir( ctx4, st4) + ex = to_lowered_expr(st5) + return Core.svec(ex, st5, ctx5) + catch exc + @info("JuliaLowering threw given input:", code=code, st0=st0, file=file, line=line, mod=mod) + rethrow(exc) + + # TODO: Re-enable flisp fallback once we're done collecting errors + # @error("JuliaLowering failed — falling back to flisp!", + # exception=(exc,catch_backtrace()), + # code=code, file=file, line=line, mod=mod) + # return Base.fl_lower(code, mod, file, line, world, warn) + end +end + +# TODO: Write a parser hook here. The input to `core_lowering_hook` should +# eventually be a (convertible to) SyntaxTree, but we need to make updates to +# the parsing API to include a parameter for AST type. + +const _has_v1_13_hooks = isdefined(Core, :_lower) + +function activate!(enable=true) + if !_has_v1_13_hooks + error("Cannot use JuliaLowering without `Core._lower` binding or in $VERSION < 1.13") + end + + if enable + Core._setlowerer!(core_lowering_hook) + else + Core._setlowerer!(Base.fl_lower) + end +end diff --git a/JuliaLowering/src/kinds.jl b/JuliaLowering/src/kinds.jl new file mode 100644 index 0000000000000..22a243f12f0d5 --- /dev/null +++ b/JuliaLowering/src/kinds.jl @@ -0,0 +1,172 @@ +# The following kinds are used in intermediate forms by lowering but are not +# part of the surface syntax +function _register_kinds() + JuliaSyntax.register_kinds!(JuliaLowering, 1, [ + # "Syntax extensions" - expression kinds emitted by macros or macro + # expansion, and known to lowering. These are part of the AST API but + # without having surface syntax. + "BEGIN_EXTENSION_KINDS" + # atomic fields or accesses (see `@atomic`) + "atomic" + # Flag for @generated parts of a function + "generated" + # Temporary rooting of identifiers (GC.@preserve) + "gc_preserve" + "gc_preserve_begin" + "gc_preserve_end" + # A literal Julia value of any kind, as might be inserted into the + # AST during macro expansion + "Value" + # A (quoted) `Symbol` + "Symbol" + # QuoteNode; not quasiquote + "inert" + # Compiler metadata hints + "meta" + # TODO: Use `meta` for inbounds and loopinfo etc? + "inbounds" + "boundscheck" + "inline" + "noinline" + "loopinfo" + # Call into foreign code. Emitted by `@ccall` + "foreigncall" + # Special form for constructing a function callable from C + "cfunction" + # Special form emitted by `Base.Experimental.@opaque` + "opaque_closure" + # Test whether a variable is defined + "isdefined" + # [K"throw_undef_if_not" var cond] + # This form is used internally in Core.Compiler but might be + # emitted by packages such as Diffractor. In principle it needs to + # be passed through lowering in a similar way to `isdefined` + "throw_undef_if_not" + # named labels for `@label` and `@goto` + "symbolic_label" + # Goto named label + "symbolic_goto" + # Internal initializer for struct types, for inner constructors/functions + "new" + "splatnew" + # Used for converting `esc()`'d expressions arising from old macro + # invocations during macro expansion (gone after macro expansion) + "escape" + # Used for converting the old-style macro hygienic-scope form (gone + # after macro expansion) + "hygienic_scope" + # An expression which will eventually be evaluated "statically" in + # the context of a CodeInfo and thus allows access only to globals + # and static parameters. Used for ccall, cfunction, cglobal + # TODO: Use this for GeneratedFunctionStub also? + "static_eval" + # Catch-all for additional syntax extensions without the need to + # extend `Kind`. Known extensions include: + # locals, islocal, isglobal + # The content of an assertion is not considered to be quoted, so + # use K"Symbol" or K"inert" inside where necessary. + "extension" + "END_EXTENSION_KINDS" + + # The following kinds are internal to lowering + "BEGIN_LOWERING_KINDS" + # Semantic assertions used by lowering. The content of an assertion + # is not considered to be quoted, so use K"Symbol" etc inside where necessary. + "assert" + # Unique identifying integer for bindings (of variables, constants, etc) + "BindingId" + # Various heads harvested from flisp lowering. + # (TODO: May or may not need all these - assess later) + "break_block" + # Like block, but introduces a lexical scope; used during scope resolution. + "scope_block" + # [K"always_defined" x] is an assertion that variable `x` is assigned before use + # ('local-def in flisp implementation is K"local" plus K"always_defined" + "always_defined" + "_while" + "_do_while" + "_typevars" # used for supplying already-allocated `TypeVar`s to `where` + "with_static_parameters" + "top" + "core" + "lambda" + # "A source location literal" - a node which exists only to record + # a sourceref + "SourceLocation" + # [K"function_decl" name] + # Declare a zero-method generic function with global `name` or + # creates a closure object and assigns it to the local `name`. + "function_decl" + # [K"function_type name] + # Evaluates to the type of the function or closure with given `name` + "function_type" + # [K"method_defs" name block] + # The code in `block` defines methods for generic function `name` + "method_defs" + # The code in `block` defines methods for generic function `name` + "_opaque_closure" + # The enclosed statements must be executed at top level + "toplevel_butfirst" + "assign_or_constdecl_if_global" + "moved_local" + "label" + "trycatchelse" + "tryfinally" + # The contained block of code causes no side effects and can be + # removed by a later lowering pass if its value isn't used. + # (That is, it's removable in the same sense as + # `@assume_effects :removable`.) + "removable" + # Variable type declaration; `x::T = rhs` will be temporarily + # desugared to include `(decl x T)` + "decl" + # [K"captured_local" index] + # A local variable captured into a global method. Contains the + # `index` of the associated `Box` in the rewrite list. + "captured_local" + # Causes the linearization pass to conditionally emit a world age increment + "latestworld_if_toplevel" + # This has two forms: + # [K"constdecl" var val] => declare and assign constant + # [K"constdecl" var] => declare undefined constant + # var is GlobalRef Value or Identifier + "constdecl" + # Returned from statements that should error if the result is used. + "unused_only" + "END_LOWERING_KINDS" + + # The following kinds are emitted by lowering and used in Julia's untyped IR + "BEGIN_IR_KINDS" + # Identifier for a value which is only assigned once + "SSAValue" + # Local variable in a `CodeInfo` code object (including lambda arguments) + "slot" + # Static parameter to a `CodeInfo` code object ("type parameters" to methods) + "static_parameter" + # References/declares a global variable within a module + "globalref" + # Unconditional goto + "goto" + # Conditional goto + "gotoifnot" + # Exception handling + "enter" + "leave" + "pop_exception" + # Lowering targets for method definitions arising from `function` etc + "method" + # (re-)initialize a slot to undef + # See Core.NewvarNode + "newvar" + # Result of lowering a `K"lambda"` after bindings have been + # converted to slot/globalref/SSAValue. + "code_info" + # Internal initializer for opaque closures + "new_opaque_closure" + # Wrapper for the lambda of around opaque closure methods + "opaque_closure_method" + # World age increment (TODO: use top level assertion and only one latestworld kind) + "latestworld" + "END_IR_KINDS" + ]) +end diff --git a/JuliaLowering/src/linear_ir.jl b/JuliaLowering/src/linear_ir.jl new file mode 100644 index 0000000000000..2b6838f9e97d3 --- /dev/null +++ b/JuliaLowering/src/linear_ir.jl @@ -0,0 +1,1140 @@ +#------------------------------------------------------------------------------- +# Lowering pass 5: Flatten to linear IR + +function is_valid_ir_argument(ctx, ex) + k = kind(ex) + if is_simple_atom(ctx, ex) || k in KSet"inert top core quote static_eval" + true + elseif k == K"BindingId" + binfo = lookup_binding(ctx, ex) + bk = binfo.kind + bk === :slot + # TODO: We should theoretically be able to allow `bk === + # :static_parameter` for slightly more compact IR, but it's uncertain + # what the compiler is built to tolerate. Notably, flisp allows + # static_parameter, but doesn't produce this form until a later pass, so + # it doesn't end up in the IR. + else + false + end +end + +function is_ssa(ctx, ex) + kind(ex) == K"BindingId" && lookup_binding(ctx, ex).is_ssa +end + +# Target to jump to, including info on try handler nesting and catch block +# nesting +struct JumpTarget{GraphType} + label::SyntaxTree{GraphType} + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} +end + +function JumpTarget(label::SyntaxTree{GraphType}, ctx) where {GraphType} + JumpTarget{GraphType}(label, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack)) +end + +struct JumpOrigin{GraphType} + goto::SyntaxTree{GraphType} + index::Int + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} +end + +function JumpOrigin(goto::SyntaxTree{GraphType}, index, ctx) where {GraphType} + JumpOrigin{GraphType}(goto, index, copy(ctx.handler_token_stack), copy(ctx.catch_token_stack)) +end + +struct FinallyHandler{GraphType} + tagvar::SyntaxTree{GraphType} + target::JumpTarget{GraphType} + exit_actions::Vector{Tuple{Symbol,Union{Nothing,SyntaxTree{GraphType}}}} +end + +function FinallyHandler(tagvar::SyntaxTree{GraphType}, target::JumpTarget) where {GraphType} + FinallyHandler{GraphType}(tagvar, target, + Vector{Tuple{Symbol, Union{Nothing,SyntaxTree{GraphType}}}}()) +end + + +""" +Context for creating linear IR. + +One of these is created per lambda expression to flatten the body down to +a sequence of statements (linear IR), which eventually becomes one CodeInfo. +""" +struct LinearIRContext{GraphType} <: AbstractLoweringContext + graph::GraphType + code::SyntaxList{GraphType, Vector{NodeId}} + bindings::Bindings + next_label_id::Ref{Int} + is_toplevel_thunk::Bool + lambda_bindings::LambdaBindings + return_type::Union{Nothing, SyntaxTree{GraphType}} + break_targets::Dict{String, JumpTarget{GraphType}} + handler_token_stack::SyntaxList{GraphType, Vector{NodeId}} + catch_token_stack::SyntaxList{GraphType, Vector{NodeId}} + finally_handlers::Vector{FinallyHandler{GraphType}} + symbolic_jump_targets::Dict{String,JumpTarget{GraphType}} + symbolic_jump_origins::Vector{JumpOrigin{GraphType}} + meta::Dict{Symbol, Any} + mod::Module +end + +function LinearIRContext(ctx, is_toplevel_thunk, lambda_bindings, return_type) + graph = syntax_graph(ctx) + rett = isnothing(return_type) ? nothing : reparent(graph, return_type) + GraphType = typeof(graph) + LinearIRContext(graph, SyntaxList(ctx), ctx.bindings, Ref(0), + is_toplevel_thunk, lambda_bindings, rett, + Dict{String,JumpTarget{GraphType}}(), SyntaxList(ctx), SyntaxList(ctx), + Vector{FinallyHandler{GraphType}}(), Dict{String,JumpTarget{GraphType}}(), + Vector{JumpOrigin{GraphType}}(), Dict{Symbol, Any}(), ctx.mod) +end + +function current_lambda_bindings(ctx::LinearIRContext) + ctx.lambda_bindings +end + +function is_valid_body_ir_argument(ctx, ex) + if is_valid_ir_argument(ctx, ex) + true + elseif kind(ex) == K"BindingId" + binfo = lookup_binding(ctx, ex) + # Arguments are always defined + # TODO: use equiv of vinfo:never-undef when we have it + binfo.kind == :argument + else + false + end +end + +function is_simple_arg(ctx, ex) + k = kind(ex) + return is_simple_atom(ctx, ex) || k == K"BindingId" || k == K"quote" || k == K"inert" || + k == K"top" || k == K"core" || k == K"globalref" || k == K"static_eval" +end + +function is_single_assign_var(ctx::LinearIRContext, ex) + kind(ex) == K"BindingId" || return false + binfo = lookup_binding(ctx, ex) + # Arguments are always single-assign + # TODO: Use equiv of vinfo:sa when we have it + return binfo.kind == :argument +end + +function is_const_read_arg(ctx, ex) + k = kind(ex) + # Even if we have side effects, we know that singly-assigned + # locals cannot be affected by them so we can inline them anyway. + # TODO from flisp: "We could also allow const globals here" + return k == K"inert" || k == K"top" || k == K"core" || k == K"static_eval" || + is_simple_atom(ctx, ex) || is_single_assign_var(ctx, ex) +end + +function is_valid_ir_rvalue(ctx, lhs, rhs) + return is_ssa(ctx, lhs) || + is_valid_ir_argument(ctx, rhs) || + (kind(lhs) == K"BindingId" && + # FIXME: add: invoke ? + kind(rhs) in KSet"new splatnew cfunction isdefined call foreigncall gc_preserve_begin foreigncall new_opaque_closure") +end + +function check_no_local_bindings(ctx, ex, msg) + contains_nonglobal_binding = contains_unquoted(ex) do e + kind(e) == K"BindingId" && lookup_binding(ctx, e).kind !== :global + end + if contains_nonglobal_binding + throw(LoweringError(ex, msg)) + end +end + +# evaluate the arguments of a call, creating temporary locations as needed +function compile_args(ctx, args) + # First check if all the arguments are simple (and therefore side-effect free). + # Otherwise, we need to use ssa values for all arguments to ensure proper + # left-to-right evaluation semantics. + all_simple = all(a->is_simple_arg(ctx, a), args) + args_out = SyntaxList(ctx) + for arg in args + arg_val = compile(ctx, arg, true, false) + if (all_simple || is_const_read_arg(ctx, arg_val)) && is_valid_body_ir_argument(ctx, arg_val) + push!(args_out, arg_val) + else + push!(args_out, emit_assign_tmp(ctx, arg_val)) + end + end + return args_out +end + +function emit(ctx::LinearIRContext, ex) + push!(ctx.code, ex) + return ex +end + +function emit(ctx::LinearIRContext, srcref, k, args...) + emit(ctx, makenode(ctx, srcref, k, args...)) +end + +# Emit computation of ex, assigning the result to an ssavar and returning that +function emit_assign_tmp(ctx::LinearIRContext, ex, name="tmp") + tmp = ssavar(ctx, ex, name) + emit(ctx, @ast ctx ex [K"=" tmp ex]) + return tmp +end + +function compile_pop_exception(ctx, srcref, src_tokens, dest_tokens) + # It's valid to leave the context of src_tokens for the context of + # dest_tokens when src_tokens is the same or nested within dest_tokens. + # It's enough to check the token on the top of the dest stack. + n = length(dest_tokens) + jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) + jump_ok || throw(LoweringError(srcref, "Attempt to jump into catch block")) + if n < length(src_tokens) + @ast ctx srcref [K"pop_exception" src_tokens[n+1]] + else + nothing + end +end + +function compile_leave_handler(ctx, srcref, src_tokens, dest_tokens) + n = length(dest_tokens) + jump_ok = n == 0 || (n <= length(src_tokens) && dest_tokens[n].var_id == src_tokens[n].var_id) + jump_ok || throw(LoweringError(srcref, "Attempt to jump into try block")) + if n < length(src_tokens) + @ast ctx srcref [K"leave" src_tokens[n+1:end]...] + else + nothing + end +end + +function emit_pop_exception(ctx::LinearIRContext, srcref, dest_tokens) + pexc = compile_pop_exception(ctx, srcref, ctx.catch_token_stack, dest_tokens) + if !isnothing(pexc) + emit(ctx, pexc) + end +end + +function emit_leave_handler(ctx::LinearIRContext, srcref, dest_tokens) + ex = compile_leave_handler(ctx, srcref, ctx.handler_token_stack, dest_tokens) + if !isnothing(ex) + emit(ctx, ex) + end +end + +function emit_jump(ctx, srcref, target::JumpTarget) + emit_pop_exception(ctx, srcref, target.catch_token_stack) + emit_leave_handler(ctx, srcref, target.handler_token_stack) + emit(ctx, @ast ctx srcref [K"goto" target.label]) +end + +# Enter the current finally block, either through the landing pad (on_exit == +# :rethrow) or via a jump (on_exit ∈ (:return, :break)). +# +# An integer tag is created to identify the current code path and select the +# on_exit action to be taken at finally handler exit. +function enter_finally_block(ctx, srcref, on_exit, value) + @assert on_exit ∈ (:rethrow, :break, :return) + handler = last(ctx.finally_handlers) + push!(handler.exit_actions, (on_exit, value)) + tag = length(handler.exit_actions) + emit(ctx, @ast ctx srcref [K"=" handler.tagvar tag::K"Integer"]) + if on_exit != :rethrow + emit_jump(ctx, srcref, handler.target) + end +end + +# Helper function for emit_return +function _actually_return(ctx, ex) + # TODO: Handle the implicit return coverage hack for #53354 ? + rett = ctx.return_type + if !isnothing(rett) + ex = compile(ctx, convert_for_type_decl(ctx, rett, ex, rett, true), true, false) + end + simple_ret_val = isempty(ctx.catch_token_stack) ? + # returning lambda directly is needed for @generated + (is_valid_ir_argument(ctx, ex) || kind(ex) == K"lambda") : + is_simple_atom(ctx, ex) + if !simple_ret_val + ex = emit_assign_tmp(ctx, ex, "return_tmp") + end + emit_pop_exception(ctx, ex, ()) + emit(ctx, @ast ctx ex [K"return" ex]) + return nothing +end + +function emit_return(ctx, srcref, ex) + # todo: Mark implicit returns + if isnothing(ex) + return + elseif isempty(ctx.handler_token_stack) + _actually_return(ctx, ex) + return + end + # TODO: What's this !is_ssa(ctx, ex) here about? + x = if is_simple_atom(ctx, ex) && !(is_ssa(ctx, ex) && !isempty(ctx.finally_handlers)) + ex + elseif !isempty(ctx.finally_handlers) + # todo: Why does flisp lowering create a mutable variable here even + # though we don't mutate it? + # tmp = ssavar(ctx, srcref, "returnval_via_finally") # <- can we use this? + tmp = new_local_binding(ctx, srcref, "returnval_via_finally") + emit(ctx, @ast ctx srcref [K"=" tmp ex]) + tmp + else + emit_assign_tmp(ctx, ex, "returnval_via_finally") + end + if !isempty(ctx.finally_handlers) + enter_finally_block(ctx, srcref, :return, x) + else + emit(ctx, @ast ctx srcref [K"leave" ctx.handler_token_stack...]) + _actually_return(ctx, x) + end + return nothing +end + +function emit_return(ctx, ex) + emit_return(ctx, ex, ex) +end + +function emit_break(ctx, ex) + name = ex[1].name_val + target = get(ctx.break_targets, name, nothing) + if isnothing(target) + ty = name == "loop_exit" ? "break" : "continue" + throw(LoweringError(ex, "$ty must be used inside a `while` or `for` loop")) + end + if !isempty(ctx.finally_handlers) + handler = last(ctx.finally_handlers) + if length(target.handler_token_stack) < length(handler.target.handler_token_stack) + enter_finally_block(ctx, ex, :break, ex) + return + end + end + emit_jump(ctx, ex, target) +end + +# `op` may be either K"=" (where global assignments are converted to setglobal!) +# or K"constdecl". flisp: emit-assignment-or-setglobal +function emit_simple_assignment(ctx, srcref, lhs, rhs, op=K"=") + binfo = lookup_binding(ctx, lhs.var_id) + if binfo.kind == :global + emit(ctx, @ast ctx srcref [ + K"call" + op == K"constdecl" ? "declare_const"::K"core" : "setglobal!"::K"core" + binfo.mod::K"Value" + binfo.name::K"Symbol" + rhs + ]) + else + emit(ctx, srcref, op, lhs, rhs) + end +end + +function emit_assignment(ctx, srcref, lhs, rhs, op=K"=") + if !isnothing(rhs) + if is_valid_ir_rvalue(ctx, lhs, rhs) + emit_simple_assignment(ctx, srcref, lhs, rhs, op) + else + r = emit_assign_tmp(ctx, rhs) + emit_simple_assignment(ctx, srcref, lhs, r, op) + end + else + # in unreachable code (such as after return); still emit the assignment + # so that the structure of those uses is preserved + emit_simple_assignment(ctx, srcref, lhs, nothing_(ctx, srcref), op) + nothing + end +end + +function make_label(ctx, srcref) + id = ctx.next_label_id[] + ctx.next_label_id[] += 1 + makeleaf(ctx, srcref, K"label", id=id) +end + +# flisp: make&mark-label +function emit_label(ctx, srcref) + if !isempty(ctx.code) + # Use current label if available + e = ctx.code[end] + if kind(e) == K"label" + return e + end + end + l = make_label(ctx, srcref) + emit(ctx, l) + l +end + +function emit_latestworld(ctx, srcref) + (isempty(ctx.code) || kind(last(ctx.code)) != K"latestworld") && + emit(ctx, makeleaf(ctx, srcref, K"latestworld")) +end + +function compile_condition_term(ctx, ex) + cond = compile(ctx, ex, true, false) + if !is_valid_body_ir_argument(ctx, cond) + cond = emit_assign_tmp(ctx, cond) + end + return cond +end + +# flisp: emit-cond +function compile_conditional(ctx, ex, false_label) + if kind(ex) == K"block" + for i in 1:numchildren(ex)-1 + compile(ctx, ex[i], false, false) + end + test = ex[end] + else + test = ex + end + k = kind(test) + if k == K"||" + true_label = make_label(ctx, test) + for (i,e) in enumerate(children(test)) + c = compile_condition_term(ctx, e) + if i < numchildren(test) + next_term_label = make_label(ctx, test) + # Jump over short circuit + emit(ctx, @ast ctx e [K"gotoifnot" c next_term_label]) + # Short circuit to true + emit(ctx, @ast ctx e [K"goto" true_label]) + emit(ctx, next_term_label) + else + emit(ctx, @ast ctx e [K"gotoifnot" c false_label]) + end + end + emit(ctx, true_label) + elseif k == K"&&" + for e in children(test) + c = compile_condition_term(ctx, e) + emit(ctx, @ast ctx e [K"gotoifnot" c false_label]) + end + else + c = compile_condition_term(ctx, test) + emit(ctx, @ast ctx test [K"gotoifnot" c false_label]) + end +end + +# Lowering of exception handling must ensure that +# +# * Each `enter` is matched with a `leave` on every possible non-exceptional +# program path (including implicit returns generated in tail position). +# * Each catch block which is entered and handles the exception - by exiting +# via a non-exceptional program path - leaves the block with `pop_exception`. +# * Each `finally` block runs, regardless of any early `return` or jumps +# via `break`/`continue`/`goto` etc. +# +# These invariants are upheld by tracking the nesting using +# `handler_token_stack` and `catch_token_stack` and using these when emitting +# any control flow (return / goto) which leaves the associated block. +# +# The following special forms are emitted into the IR: +# +# (= tok (enter catch_label dynscope)) +# push exception handler with catch block at `catch_label` and dynamic +# scope `dynscope`, yielding a token which is used by `leave` and +# `pop_exception`. `dynscope` is only used in the special `tryfinally` form +# without associated source level syntax (see the `@with` macro) +# +# (leave tok) +# pop exception handler back to the state of the `tok` from the associated +# `enter`. Multiple tokens can be supplied to pop multiple handlers using +# `(leave tok1 tok2 ...)`. +# +# (pop_exception tok) - pop exception stack back to state of associated enter +# +# See the devdocs for further discussion. +function compile_try(ctx::LinearIRContext, ex, needs_value, in_tail_pos) + @chk numchildren(ex) <= 3 + try_block = ex[1] + if kind(ex) == K"trycatchelse" + catch_block = ex[2] + else_block = numchildren(ex) == 2 ? nothing : ex[3] + finally_block = nothing + catch_label = make_label(ctx, catch_block) + else + catch_block = nothing + else_block = nothing + finally_block = ex[2] + catch_label = make_label(ctx, finally_block) + end + + end_label = !in_tail_pos || !isnothing(finally_block) ? make_label(ctx, ex) : nothing + try_result = needs_value && !in_tail_pos ? new_local_binding(ctx, ex, "try_result") : nothing + + # Exception handler block prefix + handler_token = ssavar(ctx, ex, "handler_token") + emit(ctx, @ast ctx ex [K"=" + handler_token + [K"enter" catch_label] # TODO: dynscope + ]) + if !isnothing(finally_block) + # TODO: Trivial finally block optimization from JuliaLang/julia#52593 (or + # support a special form for @with)? + finally_handler = FinallyHandler(new_local_binding(ctx, finally_block, "finally_tag"), + JumpTarget(end_label, ctx)) + push!(ctx.finally_handlers, finally_handler) + emit(ctx, @ast ctx finally_block [K"=" finally_handler.tagvar (-1)::K"Integer"]) + end + push!(ctx.handler_token_stack, handler_token) + + # Try block code. + try_val = compile(ctx, try_block, needs_value, false) + # Exception handler block postfix + if isnothing(else_block) + if in_tail_pos + if !isnothing(try_val) + emit_return(ctx, try_val) + end + else + if needs_value && !isnothing(try_val) + emit_assignment(ctx, ex, try_result, try_val) + end + emit(ctx, @ast ctx ex [K"leave" handler_token]) + end + pop!(ctx.handler_token_stack) + else + if !isnothing(try_val) && (in_tail_pos || needs_value) + emit(ctx, try_val) # TODO: Only for any side effects ? + end + emit(ctx, @ast ctx ex [K"leave" handler_token]) + pop!(ctx.handler_token_stack) + # Else block code + else_val = compile(ctx, else_block, needs_value, in_tail_pos) + if !in_tail_pos + if needs_value && !isnothing(else_val) + emit_assignment(ctx, ex, try_result, else_val) + end + end + end + if !in_tail_pos + emit(ctx, @ast ctx ex [K"goto" end_label]) + end + + # Catch pad + # Emit either catch or finally block. A combined try/catch/finally block + # was split into separate trycatchelse and tryfinally blocks earlier. + emit(ctx, catch_label) # <- Exceptional control flow enters here + if !isnothing(finally_block) + # Attribute the postfix and prefix to the finally block as a whole. + srcref = finally_block + enter_finally_block(ctx, srcref, :rethrow, nothing) + emit(ctx, end_label) # <- Non-exceptional control flow enters here + pop!(ctx.finally_handlers) + compile(ctx, finally_block, false, false) + # Finally block postfix: Emit a branch for every code path which enters + # the block to dynamically decide which return/break/rethrow exit action to take + for (tag, (on_exit, value)) in Iterators.reverse(enumerate(finally_handler.exit_actions)) + next_action_label = !in_tail_pos || tag != 1 || on_exit != :return ? + make_label(ctx, srcref) : nothing + if !isnothing(next_action_label) + next_action_label = make_label(ctx, srcref) + tmp = ssavar(ctx, srcref, "do_finally_action") + emit(ctx, @ast ctx srcref [K"=" tmp + [K"call" + "==="::K"core" + finally_handler.tagvar + tag::K"Integer" + ] + ]) + emit(ctx, @ast ctx srcref [K"gotoifnot" tmp next_action_label]) + end + if on_exit === :return + emit_return(ctx, value) + elseif on_exit === :break + emit_break(ctx, value) + elseif on_exit === :rethrow + emit(ctx, @ast ctx srcref [K"call" "rethrow"::K"top"]) + else + @assert false + end + if !isnothing(next_action_label) + emit(ctx, next_action_label) + end + end + else + push!(ctx.catch_token_stack, handler_token) + catch_val = compile(ctx, catch_block, needs_value, in_tail_pos) + if !isnothing(try_result) && !isnothing(catch_val) + emit_assignment(ctx, ex, try_result, catch_val) + end + if !in_tail_pos + emit(ctx, @ast ctx ex [K"pop_exception" handler_token]) + emit(ctx, end_label) + else + # (pop_exception done in emit_return) + end + pop!(ctx.catch_token_stack) + end + try_result +end + +# This pass behaves like an interpreter on the given code. +# To perform stateful operations, it calls `emit` to record that something +# needs to be done. In value position, it returns an expression computing +# the needed value. +function compile(ctx::LinearIRContext, ex, needs_value, in_tail_pos) + k = kind(ex) + if k == K"BindingId" || is_literal(k) || k == K"quote" || k == K"inert" || + k == K"top" || k == K"core" || k == K"Value" || k == K"Symbol" || + k == K"SourceLocation" || k == K"static_eval" + if in_tail_pos + emit_return(ctx, ex) + elseif needs_value + ex + else + if k == K"BindingId" && !is_ssa(ctx, ex) + emit(ctx, ex) # keep identifiers for undefined-var checking + end + nothing + end + elseif k == K"Placeholder" + if needs_value + throw(LoweringError(ex, "all-underscore identifiers are write-only and their values cannot be used in expressions")) + end + nothing + elseif k == K"TOMBSTONE" + @chk !needs_value (ex,"TOMBSTONE encountered in value position") + nothing + elseif k == K"call" || k == K"new" || k == K"splatnew" || k == K"foreigncall" || + k == K"new_opaque_closure" || k == K"cfunction" + callex = makenode(ctx, ex, k, compile_args(ctx, children(ex))) + if in_tail_pos + emit_return(ctx, ex, callex) + elseif needs_value + callex + else + emit(ctx, callex) + nothing + end + elseif k == K"=" || k == K"constdecl" + lhs = ex[1] + res = if kind(lhs) == K"Placeholder" + compile(ctx, ex[2], needs_value, in_tail_pos) + elseif k == K"constdecl" && numchildren(ex) == 1 + # No RHS - make undefined constant + mod, name = if kind(ex[1]) == K"BindingId" + binfo = lookup_binding(ctx, ex[1]) + binfo.mod, binfo.name + else + @assert kind(ex[1]) == K"Value" && typeof(ex[1].value) === GlobalRef + gr = ex[1].value + gr.mod, String(gr.name) + end + emit(ctx, @ast ctx ex [K"call" "declare_const"::K"core" + mod::K"Value" name::K"Symbol"]) + else + rhs = compile(ctx, ex[2], true, false) + # TODO look up arg-map for renaming if lhs was reassigned + if needs_value && !isnothing(rhs) + r = emit_assign_tmp(ctx, rhs) + emit_simple_assignment(ctx, ex, lhs, r, k) + if in_tail_pos + emit_return(ctx, ex, r) + else + r + end + else + emit_assignment(ctx, ex, lhs, rhs, k) + end + end + k == K"constdecl" && emit_latestworld(ctx, ex) + res + elseif k == K"block" || k == K"scope_block" + nc = numchildren(ex) + if nc == 0 + if in_tail_pos + emit_return(ctx, nothing_(ctx, ex)) + elseif needs_value + nothing_(ctx, ex) + else + nothing + end + else + res = nothing + for i in 1:nc + islast = i == nc + res = compile(ctx, ex[i], islast && needs_value, islast && in_tail_pos) + end + res + end + elseif k == K"break_block" + end_label = make_label(ctx, ex) + name = ex[1].name_val + outer_target = get(ctx.break_targets, name, nothing) + ctx.break_targets[name] = JumpTarget(end_label, ctx) + compile(ctx, ex[2], false, false) + if isnothing(outer_target) + delete!(ctx.break_targets, name) + else + ctx.break_targets[name] = outer_target + end + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end + elseif k == K"break" + emit_break(ctx, ex) + elseif k == K"symbolic_label" + label = emit_label(ctx, ex) + name = ex.name_val + if haskey(ctx.symbolic_jump_targets, name) + throw(LoweringError(ex, "Label `$name` defined multiple times")) + end + push!(ctx.symbolic_jump_targets, name=>JumpTarget(label, ctx)) + if in_tail_pos + emit_return(ctx, ex, nothing_(ctx, ex)) + elseif needs_value + throw(LoweringError(ex, "misplaced label in value position")) + end + elseif k == K"symbolic_goto" + push!(ctx.symbolic_jump_origins, JumpOrigin(ex, length(ctx.code)+1, ctx)) + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? pop_exception + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? leave + emit(ctx, makeleaf(ctx, ex, K"TOMBSTONE")) # ? goto + nothing + elseif k == K"return" + compile(ctx, ex[1], true, true) + nothing + elseif k == K"removable" + if needs_value + compile(ctx, ex[1], needs_value, in_tail_pos) + else + nothing + end + elseif k == K"if" || k == K"elseif" + @chk numchildren(ex) <= 3 + has_else = numchildren(ex) > 2 + else_label = make_label(ctx, ex) + compile_conditional(ctx, ex[1], else_label) + if in_tail_pos + compile(ctx, ex[2], needs_value, in_tail_pos) + emit(ctx, else_label) + if has_else + compile(ctx, ex[3], needs_value, in_tail_pos) + else + emit_return(ctx, ex, nothing_(ctx, ex)) + end + nothing + else + val = needs_value && new_local_binding(ctx, ex, "if_val") + v1 = compile(ctx, ex[2], needs_value, in_tail_pos) + if needs_value + emit_assignment(ctx, ex, val, v1) + end + if has_else || needs_value + end_label = make_label(ctx, ex) + emit(ctx, @ast ctx ex [K"goto" end_label]) + else + end_label = nothing + end + emit(ctx, else_label) + v2 = if has_else + compile(ctx, ex[3], needs_value, in_tail_pos) + elseif needs_value + nothing_(ctx, ex) + end + if needs_value + emit_assignment(ctx, ex, val, v2) + end + if !isnothing(end_label) + emit(ctx, end_label) + end + val + end + elseif k == K"trycatchelse" || k == K"tryfinally" + compile_try(ctx, ex, needs_value, in_tail_pos) + elseif k == K"method" + # TODO + # throw(LoweringError(ex, + # "Global method definition needs to be placed at the top level, or use `eval`")) + res = if numchildren(ex) == 1 + if in_tail_pos + emit_return(ctx, ex) + elseif needs_value + ex + else + emit(ctx, ex) + end + else + @chk numchildren(ex) == 3 + fname = ex[1] + sig = compile(ctx, ex[2], true, false) + if !is_valid_ir_argument(ctx, sig) + sig = emit_assign_tmp(ctx, sig) + end + lam = ex[3] + if kind(lam) == K"lambda" + lam = compile_lambda(ctx, lam) + else + lam = emit_assign_tmp(ctx, compile(ctx, lam, true, false)) + end + emit(ctx, ex, K"method", fname, sig, lam) + @assert !needs_value && !in_tail_pos + nothing + end + emit_latestworld(ctx, ex) + res + elseif k == K"opaque_closure_method" + @ast ctx ex [K"opaque_closure_method" + ex[1] + ex[2] + ex[3] + ex[4] + compile_lambda(ctx, ex[5]) + ] + elseif k == K"lambda" + lam = compile_lambda(ctx, ex) + if in_tail_pos + emit_return(ctx, lam) + elseif needs_value + lam + else + emit(ctx, lam) + end + elseif k == K"gc_preserve_begin" + makenode(ctx, ex, k, compile_args(ctx, children(ex))) + elseif k == K"gc_preserve_end" + if needs_value + throw(LoweringError(ex, "misplaced kind $k in value position")) + end + emit(ctx, ex) + nothing + elseif k == K"meta" + @chk numchildren(ex) >= 1 + if ex[1].name_val in ("inline", "noinline", "propagate_inbounds", + "nospecializeinfer", "aggressive_constprop", "no_constprop") + for c in children(ex) + ctx.meta[Symbol(c.name_val)] = true + end + elseif ex[1].name_val === "purity" + ctx.meta[Symbol(ex[1].name_val)] = ex[2].value::Base.EffectsOverride + else + emit(ctx, ex) + end + if needs_value + val = @ast ctx ex "nothing"::K"core" + if in_tail_pos + emit_return(ctx, val) + else + val + end + end + elseif k == K"_while" + end_label = make_label(ctx, ex) + top_label = emit_label(ctx, ex) + compile_conditional(ctx, ex[1], end_label) + compile(ctx, ex[2], false, false) + emit(ctx, @ast ctx ex [K"goto" top_label]) + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end + elseif k == K"_do_while" + end_label = make_label(ctx, ex) + top_label = emit_label(ctx, ex) + compile(ctx, ex[1], false, false) + compile_conditional(ctx, ex[2], end_label) + emit(ctx, @ast ctx ex [K"goto" top_label]) + emit(ctx, end_label) + if needs_value + compile(ctx, nothing_(ctx, ex), needs_value, in_tail_pos) + end + elseif k == K"isdefined" || k == K"captured_local" || k == K"throw_undef_if_not" || + k == K"boundscheck" + if in_tail_pos + emit_return(ctx, ex) + elseif needs_value + ex + end + elseif k == K"newvar" + @assert !needs_value + is_duplicate = !isempty(ctx.code) && + (e = last(ctx.code); kind(e) == K"newvar" && e[1].var_id == ex[1].var_id) + if !is_duplicate + # TODO: also exclude deleted vars + emit(ctx, ex) + end + elseif k == K"latestworld" + if needs_value + throw(LoweringError(ex, "misplaced latestsworld")) + end + emit_latestworld(ctx, ex) + elseif k == K"latestworld_if_toplevel" + ctx.is_toplevel_thunk && emit_latestworld(ctx, ex) + elseif k == K"unused_only" + if needs_value && !(in_tail_pos && ctx.is_toplevel_thunk) + throw(LoweringError(ex, + "global declaration doesn't read the variable and can't return a value")) + end + compile(ctx, ex[1], needs_value, in_tail_pos) + else + throw(LoweringError(ex, "Invalid syntax; $(repr(k))")) + end +end + +function _remove_vars_with_isdefined_check!(vars, ex) + if is_leaf(ex) || is_quoted(ex) || kind(ex) == K"static_eval" + return + elseif kind(ex) == K"isdefined" + delete!(vars, ex[1].var_id) + else + for e in children(ex) + _remove_vars_with_isdefined_check!(vars, e) + end + end +end + +# Find newvar nodes that are unnecessary because +# 1. The variable is not captured and +# 2. The variable is assigned before any branches. +# +# This is used to remove newvar nodes that are not needed for re-initializing +# variables to undefined (see Julia issue #11065). It doesn't look for variable +# *uses*, because any variables used-before-def that also pass this test are +# *always* used undefined, and therefore don't need to be reinitialized. The +# one exception to that is `@isdefined`, which can observe an undefined +# variable without throwing an error. +function unnecessary_newvar_ids(ctx, stmts) + vars = Set{IdTag}() + ids_assigned_before_branch = Set{IdTag}() + for ex in stmts + _remove_vars_with_isdefined_check!(vars, ex) + k = kind(ex) + if k == K"newvar" + id = ex[1].var_id + if !lookup_binding(ctx, id).is_captured + push!(vars, id) + end + elseif k == K"goto" || k == K"gotoifnot" || (k == K"=" && kind(ex[2]) == K"enter") + empty!(vars) + elseif k == K"=" + id = ex[1].var_id + if id in vars + delete!(vars, id) + push!(ids_assigned_before_branch, id) + end + end + end + ids_assigned_before_branch +end + +# flisp: compile-body +function compile_body(ctx, ex) + compile(ctx, ex, true, true) + + # Fix up any symbolic gotos. (We can't do this earlier because the goto + # might precede the label definition in unstructured control flow.) + for origin in ctx.symbolic_jump_origins + name = origin.goto.name_val + target = get(ctx.symbolic_jump_targets, name, nothing) + if isnothing(target) + throw(LoweringError(origin.goto, "label `$name` referenced but not defined")) + end + i = origin.index + pop_ex = compile_pop_exception(ctx, origin.goto, origin.catch_token_stack, + target.catch_token_stack) + if !isnothing(pop_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = pop_ex + i += 1 + end + leave_ex = compile_leave_handler(ctx, origin.goto, origin.handler_token_stack, + target.handler_token_stack) + if !isnothing(leave_ex) + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = leave_ex + i += 1 + end + @assert kind(ctx.code[i]) == K"TOMBSTONE" + ctx.code[i] = @ast ctx origin.goto [K"goto" target.label] + end + + # Filter out unnecessary newvar nodes + ids_assigned_before_branch = unnecessary_newvar_ids(ctx, ctx.code) + filter!(ctx.code) do ex + !(kind(ex) == K"newvar" && ex[1].var_id in ids_assigned_before_branch) + end +end + +#------------------------------------------------------------------------------- + +# Recursively renumber an expression within linear IR +# flisp: renumber-stuff +function _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, ex) + k = kind(ex) + if k == K"BindingId" + id = ex.var_id + if haskey(ssa_rewrites, id) + makeleaf(ctx, ex, K"SSAValue"; var_id=ssa_rewrites[id]) + else + new_id = get(slot_rewrites, id, nothing) + binfo = lookup_binding(ctx, id) + if !isnothing(new_id) + sk = binfo.kind == :local || binfo.kind == :argument ? K"slot" : + binfo.kind == :static_parameter ? K"static_parameter" : + throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)")) + makeleaf(ctx, ex, sk; var_id=new_id) + else + if binfo.kind !== :global + throw(LoweringError(ex, "Found unexpected binding of kind $(binfo.kind)")) + end + makeleaf(ctx, ex, K"globalref", binfo.name, mod=binfo.mod) + end + end + elseif k == K"meta" || k == K"static_eval" + # Somewhat-hack for Expr(:meta, :generated, gen) which has + # weird top-level semantics for `gen`, but we still need to translate + # the binding it contains to a globalref. (TODO: use + # static_eval for this meta, somehow) + mapchildren(ctx, ex) do e + _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) + end + elseif is_literal(k) || is_quoted(k) + ex + elseif k == K"label" + @ast ctx ex label_table[ex.id]::K"label" + elseif k == K"code_info" + ex + else + mapchildren(ctx, ex) do e + _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, e) + end + end +end + +# flisp: renumber-lambda, compact-ir +function renumber_body(ctx, input_code, slot_rewrites) + # Step 1: Remove any assignments to SSA variables, record the indices of labels + ssa_rewrites = Dict{IdTag,IdTag}() + label_table = Dict{Int,Int}() + code = SyntaxList(ctx) + for ex in input_code + k = kind(ex) + ex_out = nothing + if k == K"=" && is_ssa(ctx, ex[1]) + lhs_id = ex[1].var_id + if is_ssa(ctx, ex[2]) + # For SSA₁ = SSA₂, record that all uses of SSA₁ should be replaced by SSA₂ + ssa_rewrites[lhs_id] = ssa_rewrites[ex[2].var_id] + else + # Otherwise, record which `code` index this SSA value refers to + ssa_rewrites[lhs_id] = length(code) + 1 + ex_out = ex[2] + end + elseif k == K"label" + label_table[ex.id] = length(code) + 1 + elseif k == K"TOMBSTONE" + # remove statement + else + ex_out = ex + end + if !isnothing(ex_out) + push!(code, ex_out) + end + end + + # Step 2: + # * Translate any SSA uses and labels into indices in the code table + # * Translate locals into slot indices + for i in 1:length(code) + code[i] = _renumber(ctx, ssa_rewrites, slot_rewrites, label_table, code[i]) + end + code +end + +struct Slot + name::String + kind::Symbol + is_nospecialize::Bool + is_read::Bool + is_single_assign::Bool + is_maybe_undef::Bool + is_called::Bool +end + +function compile_lambda(outer_ctx, ex) + lambda_args = ex[1] + static_parameters = ex[2] + ret_var = numchildren(ex) == 4 ? ex[4] : nothing + # TODO: Add assignments for reassigned arguments to body + lambda_bindings = ex.lambda_bindings + ctx = LinearIRContext(outer_ctx, ex.is_toplevel_thunk, lambda_bindings, ret_var) + compile_body(ctx, ex[3]) + slots = Vector{Slot}() + slot_rewrites = Dict{IdTag,Int}() + for arg in children(lambda_args) + if kind(arg) == K"Placeholder" + # Unused functions arguments like: `_` or `::T` + push!(slots, Slot(arg.name_val, :argument, false, false, false, false, false)) + else + @assert kind(arg) == K"BindingId" + id = arg.var_id + binfo = lookup_binding(ctx, id) + lbinfo = lookup_lambda_binding(ctx, id) + @assert binfo.kind == :local || binfo.kind == :argument + # FIXME: is_single_assign, is_maybe_undef + push!(slots, Slot(binfo.name, :argument, binfo.is_nospecialize, + lbinfo.is_read, false, false, lbinfo.is_called)) + slot_rewrites[id] = length(slots) + end + end + # Sorting the lambda locals is required to remove dependence on Dict iteration order. + for (id, lbinfo) in sort(collect(pairs(lambda_bindings.bindings)), by=first) + if !lbinfo.is_captured + binfo = lookup_binding(ctx.bindings, id) + if binfo.kind == :local + # FIXME: is_single_assign, is_maybe_undef + push!(slots, Slot(binfo.name, :local, false, + lbinfo.is_read, false, false, lbinfo.is_called)) + slot_rewrites[id] = length(slots) + end + end + end + for (i,arg) in enumerate(children(static_parameters)) + @assert kind(arg) == K"BindingId" + id = arg.var_id + info = lookup_binding(ctx.bindings, id) + @assert info.kind == :static_parameter + slot_rewrites[id] = i + end + code = renumber_body(ctx, ctx.code, slot_rewrites) + @ast ctx ex [K"code_info"(is_toplevel_thunk=ex.is_toplevel_thunk, + slots=slots, meta=CompileHints(ctx.meta)) + [K"block"(ex[3]) + code... + ] + ] +end + +""" +This pass converts nested ASTs in the body of a lambda into a list of +statements (ie, Julia's linear/untyped IR). + +Most of the compliexty of this pass is in lowering structured control flow (if, +loops, etc) to gotos and exception handling to enter/leave. We also convert +`K"BindingId"` into K"slot", `K"globalref"` or `K"SSAValue` as appropriate. +""" +@fzone "JL: linearize" function linearize_ir(ctx, ex) + graph = ensure_attributes(ctx.graph, + slots=Vector{Slot}, + mod=Module, + id=Int) + # TODO: Cleanup needed - `_ctx` is just a dummy context here. But currently + # required to call reparent() ... + GraphType = typeof(graph) + _ctx = LinearIRContext(graph, SyntaxList(graph), ctx.bindings, + Ref(0), false, LambdaBindings(), nothing, + Dict{String,JumpTarget{typeof(graph)}}(), + SyntaxList(graph), SyntaxList(graph), + Vector{FinallyHandler{GraphType}}(), + Dict{String, JumpTarget{GraphType}}(), + Vector{JumpOrigin{GraphType}}(), + Dict{Symbol, Any}(), ctx.mod) + res = compile_lambda(_ctx, reparent(_ctx, ex)) + _ctx, res +end diff --git a/JuliaLowering/src/macro_expansion.jl b/JuliaLowering/src/macro_expansion.jl new file mode 100644 index 0000000000000..d66205548e7df --- /dev/null +++ b/JuliaLowering/src/macro_expansion.jl @@ -0,0 +1,560 @@ +# Lowering pass 1: Macro expansion, simple normalizations and quote expansion + +struct MacroExpansionContext{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + scope_layers::Vector{ScopeLayer} + scope_layer_stack::Vector{LayerId} + expr_compat_mode::Bool + macro_world::UInt +end + +function MacroExpansionContext(graph::SyntaxGraph, mod::Module, expr_compat_mode::Bool, world::UInt) + layers = ScopeLayer[ScopeLayer(1, mod, 0, false)] + MacroExpansionContext(graph, Bindings(), layers, LayerId[length(layers)], expr_compat_mode, world) +end + +function push_layer!(ctx::MacroExpansionContext, mod::Module, is_macro_expansion::Bool) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod, + current_layer_id(ctx), is_macro_expansion) + push!(ctx.scope_layers, new_layer) + push!(ctx.scope_layer_stack, new_layer.id) +end +function pop_layer!(ctx::MacroExpansionContext) + pop!(ctx.scope_layer_stack) +end + +current_layer(ctx::MacroExpansionContext) = ctx.scope_layers[last(ctx.scope_layer_stack)] +current_layer_id(ctx::MacroExpansionContext) = last(ctx.scope_layer_stack) + +#-------------------------------------------------- +# Expansion of quoted expressions +function collect_unquoted!(ctx, unquoted, ex, depth) + if kind(ex) == K"$" && depth == 0 + # children(ex) is usually length 1, but for double interpolation it may + # be longer and the children may contain K"..." expressions. Wrapping + # in a tuple groups the arguments together correctly in those cases. + push!(unquoted, @ast ctx ex [K"tuple" children(ex)...]) + else + inner_depth = kind(ex) == K"quote" ? depth + 1 : + kind(ex) == K"$" ? depth - 1 : + depth + for e in children(ex) + collect_unquoted!(ctx, unquoted, e, inner_depth) + end + end + return unquoted +end + +function expand_quote(ctx, ex) + unquoted = SyntaxList(ctx) + collect_unquoted!(ctx, unquoted, ex, 0) + # Unlike user-defined macro expansion, we don't call append_sourceref for + # the entire expression produced by `quote` expansion. We could, but it + # seems unnecessary for `quote` because the surface syntax is a transparent + # representation of the expansion process. However, it's useful to add the + # extra srcref in a more targeted way for $ interpolations inside + # interpolate_ast, so we do that there. + # + # In principle, particular user-defined macros could opt into a similar + # mechanism. + # + # TODO: Should we try adding a srcref to the `quote` node only for the + # extra syntax generated by expand_quote so srcref essentially becomes + # (ex, @HERE) ? + @ast ctx ex [K"call" + interpolate_ast::K"Value" + (ctx.expr_compat_mode ? Expr : SyntaxTree)::K"Value" + [K"inert"(meta=CompileHints(:as_Expr, ctx.expr_compat_mode)) ex] + unquoted... + ] +end + +#-------------------------------------------------- +struct MacroContext <: AbstractLoweringContext + graph::SyntaxGraph + macrocall::Union{SyntaxTree,LineNumberNode,SourceRef} + scope_layer::ScopeLayer +end + +function adopt_scope(ex, ctx::MacroContext) + adopt_scope(ex, ctx.scope_layer.id) +end + +struct MacroExpansionError <: Exception + context::Union{Nothing,MacroContext} + ex::SyntaxTree + msg::String + "The source position relative to the node - may be `:begin` or `:end` or `:all`" + position::Symbol + "Error that occurred inside the macro function call (`nothing` if no inner exception)" + err + MacroExpansionError( + context::Union{Nothing,MacroContext}, ex::SyntaxTree, msg::AbstractString, position::Symbol, + @nospecialize err = nothing + ) = new(context, ex, msg, position, err) +end + +function MacroExpansionError(ex::SyntaxTree, msg::AbstractString; position=:all) + MacroExpansionError(nothing, ex, msg, position) +end + +function Base.showerror(io::IO, exc::MacroExpansionError) + print(io, "MacroExpansionError") + ctx = exc.context + if !isnothing(ctx) + # Use `Expr` formatting to pretty print the macro name for now - + # there's quite a lot of special cases. We could alternatively consider + # calling sourcetext() though that won't work well if it's a + # synthetically-generated macro name path. + macname_str = string(Expr(:macrocall, Expr(ctx.macrocall[1]), nothing)) + print(io, " while expanding ", macname_str, + " in module ", ctx.scope_layer.mod) + end + print(io, ":\n") + # TODO: Display niceties: + # * Show the full provenance tree somehow, in addition to the primary + # source location we're showing here? + # * What if the expression doesn't arise from a source file? + # * How to deal with highlighting trivia? Could provide a token kind or + # child position within the raw tree? How to abstract this?? + src = sourceref(exc.ex) + if src isa LineNumberNode + highlight(io, src, note=exc.msg) + else + fb = first_byte(src) + lb = last_byte(src) + pos = exc.position + byterange = pos == :all ? (fb:lb) : + pos == :begin ? (fb:fb-1) : + pos == :end ? (lb+1:lb) : + error("Unknown position $pos") + highlight(io, src.file, byterange, note=exc.msg) + end + if !isnothing(exc.err) + print(io, "\nCaused by:\n") + showerror(io, exc.err) + end +end + +function fixup_macro_name(ctx::MacroExpansionContext, ex::SyntaxTree) + k = kind(ex) + if k == K"StrMacroName" || k == K"CmdMacroName" + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + newname = JuliaSyntax.lower_identifier_name(ex.name_val, k) + makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid, name_val=newname) + elseif k == K"macro_name" + @chk numchildren(ex) === 1 + if kind(ex[1]) === K"." + @ast ctx ex [K"." ex[1][1] [K"macro_name" ex[1][2]]] + else + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + newname = JuliaSyntax.lower_identifier_name(ex[1].name_val, K"macro_name") + makeleaf(ctx, ex[1], ex[1], kind=kind(ex[1]), name_val=newname) + end + else + mapchildren(e->fixup_macro_name(ctx,e), ctx, ex) + end +end + +function eval_macro_name(ctx::MacroExpansionContext, mctx::MacroContext, ex::SyntaxTree) + # `ex1` might contain a nontrivial mix of scope layers so we can't just + # `eval()` it, as it's already been partially lowered by this point. + # Instead, we repeat the latter parts of `lower()` here. + ex1 = expand_forms_1(ctx, fixup_macro_name(ctx, ex)) + ctx2, ex2 = expand_forms_2(ctx, ex1) + ctx3, ex3 = resolve_scopes(ctx2, ex2) + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) + mod = current_layer(ctx).mod + expr_form = to_lowered_expr(ex5) + try + # Using Core.eval here fails when precompiling packages since we hit the + # user-facing error (in `jl_check_top_level_effect`) that warns that + # effects won't persist when eval-ing into a closed module. + # `jl_invoke_julia_macro` bypasses this by calling `jl_toplevel_eval` on + # the macro name. This is fine assuming the first argument to the + # macrocall is effect-free. + ccall(:jl_toplevel_eval, Any, (Any, Any), mod, expr_form) + catch err + throw(MacroExpansionError(mctx, ex, "Macro not found", :all, err)) + end +end + +# Record scope layer information for symbols passed to a macro by setting +# scope_layer for each expression and also processing any K"escape" arising +# from previous expansion of old-style macros. +# +# See also set_scope_layer() +function set_macro_arg_hygiene(ctx, ex, layer_ids, layer_idx) + k = kind(ex) + scope_layer = get(ex, :scope_layer, layer_ids[layer_idx]) + if is_leaf(ex) + makeleaf(ctx, ex, ex; scope_layer=scope_layer) + else + inner_layer_idx = layer_idx + if k == K"escape" + inner_layer_idx = layer_idx - 1 + if inner_layer_idx < 1 + # If we encounter too many escape nodes, there's probably been + # an error in the previous macro expansion. + # todo: The error here isn't precise about that - maybe we + # should record that macro call expression with the scope layer + # if we want to report the error against the macro call? + throw(MacroExpansionError(ex, "`escape` node in outer context")) + end + end + mapchildren(e->set_macro_arg_hygiene(ctx, e, layer_ids, inner_layer_idx), + ctx, ex; scope_layer=scope_layer) + end +end + +function prepare_macro_args(ctx, mctx, raw_args) + macro_args = Any[mctx] + for arg in raw_args + # Add hygiene information to be carried along with macro arguments. + # + # Macro call arguments may be either + # * Unprocessed by the macro expansion pass + # * Previously processed, but spliced into a further macro call emitted by + # a macro expansion. + # In either case, we need to set scope layers before passing the + # arguments to the macro call. + push!(macro_args, set_macro_arg_hygiene(ctx, arg, ctx.scope_layer_stack, + length(ctx.scope_layer_stack))) + end + return macro_args +end + +# TODO: Do we need to handle :scope_layer or multiple escapes here? +# See https://github.com/c42f/JuliaLowering.jl/issues/39 +""" +Insert a hygienic-scope around each arg of K"toplevel" returned from a macro. + +It isn't correct for macro expansion to recurse into a K"toplevel" expression +since one child may define a macro and the next may use it. However, not +recursing now means we lose some important context: the module of the macro we +just expanded, which is necessary for resolving the identifiers in the +K"toplevel" AST. The solution implemented in JuliaLang/julia#53515 was to save +our place and expand later using `Expr(:hygienic-scope toplevel_child mod)`. + +Of course, these hygienic-scopes are also necessary because existing user code +contains the corresponding escaping, which would otherwise cause errors. We +already consumed the hygienic-scope that comes with every expansion, but won't +be looking for escapes under :toplevel, so push hygienic-scope under toplevel +""" +function fix_toplevel_expansion(ctx, ex::SyntaxTree, mod::Module, lnn::LineNumberNode) + if kind(ex) === K"toplevel" + mapchildren(ctx, ex) do e + @ast ctx ex [K"hygienic_scope" e mod::K"Value" lnn::K"Value"] + end + else + mapchildren(e->fix_toplevel_expansion(ctx, e, mod, lnn), ctx, ex) + end +end + +function expand_macro(ctx, ex) + @assert kind(ex) == K"macrocall" + + macname = ex[1] + mctx = MacroContext(ctx.graph, ex, current_layer(ctx)) + macfunc = eval_macro_name(ctx, mctx, macname) + raw_args = ex[2:end] + macro_loc = let loc = source_location(LineNumberNode, ex) + # Some macros, e.g. @cmd, don't play nicely with file == nothing + isnothing(loc.file) ? LineNumberNode(loc.line, :none) : loc + end + # We use a specific well defined world age for the next checks and macro + # expansion invocations. This avoids inconsistencies if the latest world + # age changes concurrently. + # + # TODO: Allow this to be passed in + if hasmethod(macfunc, Tuple{typeof(mctx), typeof.(raw_args)...}; world=ctx.macro_world) + macro_args = prepare_macro_args(ctx, mctx, raw_args) + expanded = try + Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...) + catch exc + newexc = exc isa MacroExpansionError ? + MacroExpansionError(mctx, exc.ex, exc.msg, exc.position, exc.err) : + MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc) + # TODO: We can delete this rethrow when we move to AST-based error propagation. + rethrow(newexc) + end + if expanded isa SyntaxTree + if !is_compatible_graph(ctx, expanded) + # If the macro has produced syntax outside the macro context, + # copy it over. TODO: Do we expect this always to happen? What + # is the API for access to the macro expansion context? + expanded = copy_ast(ctx, expanded) + end + else + expanded = @ast ctx ex expanded::K"Value" + end + else + # Compat: attempt to invoke an old-style macro if there's no applicable + # method for new-style macro arguments. + macro_args = Any[macro_loc, current_layer(ctx).mod] + for arg in raw_args + # For hygiene in old-style macros, we omit any additional scope + # layer information from macro arguments. Old-style macros will + # handle that using manual escaping in the macro itself. + # + # Note that there's one slight incompatibility here for identifiers + # interpolated into the `raw_args` from outer macro expansions of + # new-style macros which call old-style macros. Instead of seeing + # `Expr(:escape)` in such situations, old-style macros will now see + # `Expr(:scope_layer)` inside `macro_args`. + push!(macro_args, Expr(arg)) + end + expanded = try + Base.invoke_in_world(ctx.macro_world, macfunc, macro_args...) + catch exc + if exc isa MethodError && exc.f === macfunc + if !isempty(methods_in_world(macfunc, Tuple{typeof(mctx), Vararg{Any}}, ctx.macro_world)) + # If the macro has at least some methods implemented in the + # new style, assume the user meant to call one of those + # rather than any old-style macro methods which might exist + exc = MethodError(macfunc, (prepare_macro_args(ctx, mctx, raw_args)..., ), ctx.macro_world) + end + end + rethrow(MacroExpansionError(mctx, ex, "Error expanding macro", :all, exc)) + end + expanded = expr_to_syntaxtree(ctx, expanded, macro_loc) + end + + if kind(expanded) != K"Value" + expanded = append_sourceref(ctx, expanded, ex) + # Module scope for the returned AST is the module where this particular + # method was defined (may be different from `parentmodule(macfunc)`) + mod_for_ast = lookup_method_instance(macfunc, macro_args, + ctx.macro_world).def.module + expanded = fix_toplevel_expansion(ctx, expanded, mod_for_ast, macro_loc) + new_layer = ScopeLayer(length(ctx.scope_layers)+1, mod_for_ast, + current_layer_id(ctx), true) + push_layer!(ctx, mod_for_ast, true) + expanded = expand_forms_1(ctx, expanded) + pop_layer!(ctx) + end + return expanded +end + +# Add a secondary source of provenance to each expression in the tree `ex`. +function append_sourceref(ctx, ex, secondary_prov) + srcref = (ex, secondary_prov) + if !is_leaf(ex) + if kind(ex) == K"macrocall" + makenode(ctx, srcref, ex, children(ex)...) + else + makenode(ctx, srcref, ex, + map(e->append_sourceref(ctx, e, secondary_prov), children(ex))...) + end + else + makeleaf(ctx, srcref, ex) + end +end + +function remove_scope_layer!(ex) + if !is_leaf(ex) + for c in children(ex) + remove_scope_layer!(c) + end + end + deleteattr!(ex, :scope_layer) + ex +end + +function remove_scope_layer(ctx, ex) + remove_scope_layer!(copy_ast(ctx, ex)) +end + +""" +Lowering pass 1 + +This pass contains some simple expansion to make the rest of desugaring easier +to write and expands user defined macros. Macros see the surface syntax, so +need to be dealt with before other lowering. + +* Does identifier normalization +* Strips semantically irrelevant "container" nodes like parentheses +* Expands macros +* Processes quoted syntax turning `K"quote"` into `K"inert"` (eg, expanding + interpolations) +""" +function expand_forms_1(ctx::MacroExpansionContext, ex::SyntaxTree) + k = kind(ex) + if k == K"Identifier" + name_str = ex.name_val + if all(==('_'), name_str) + @ast ctx ex ex=>K"Placeholder" + elseif is_ccall_or_cglobal(name_str) + # Lower special identifiers `cglobal` and `ccall` to `K"core"` + # pseudo-refs very early so that cglobal and ccall can never be + # turned into normal bindings (eg, assigned to) + @ast ctx ex name_str::K"core" + else + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + makeleaf(ctx, ex, ex, kind=K"Identifier", scope_layer=layerid) + end + elseif k == K"StrMacroName" || k == K"CmdMacroName" || k == K"macro_name" + # These can appear outside of a macrocall, e.g. in `import` + e2 = fixup_macro_name(ctx, ex) + expand_forms_1(ctx, e2) + elseif k == K"var" || k == K"char" || k == K"parens" + # Strip "container" nodes + @chk numchildren(ex) == 1 + expand_forms_1(ctx, ex[1]) + elseif k == K"escape" + # For processing of old-style macros + @chk numchildren(ex) >= 1 "`escape` requires an argument" + if length(ctx.scope_layer_stack) === 1 + throw(MacroExpansionError(ex, "`escape` node in outer context")) + end + top_layer = pop!(ctx.scope_layer_stack) + escaped_ex = expand_forms_1(ctx, ex[1]) + push!(ctx.scope_layer_stack, top_layer) + escaped_ex + elseif k == K"hygienic_scope" + @chk numchildren(ex) >= 2 && ex[2].value isa Module (ex,"`hygienic_scope` requires an AST and a module") + new_layer = ScopeLayer(length(ctx.scope_layers)+1, ex[2].value, + current_layer_id(ctx), true) + push!(ctx.scope_layers, new_layer) + push!(ctx.scope_layer_stack, new_layer.id) + hyg_ex = expand_forms_1(ctx, ex[1]) + pop!(ctx.scope_layer_stack) + hyg_ex + elseif k == K"juxtapose" + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + @chk numchildren(ex) == 2 + @ast ctx ex [K"call" + "*"::K"Identifier"(scope_layer=layerid) + expand_forms_1(ctx, ex[1]) + expand_forms_1(ctx, ex[2]) + ] + elseif k == K"quote" + @chk numchildren(ex) == 1 + # TODO: Upstream should set a general flag for detecting parenthesized + # expressions so we don't need to dig into `green_tree` here. Ugh! + plain_symbol = has_flags(ex, JuliaSyntax.COLON_QUOTE) && + kind(ex[1]) == K"Identifier" && + (sr = sourceref(ex); sr isa SourceRef && kind(sr.green_tree[2]) != K"parens") + if plain_symbol + # As a compromise for compatibility, we treat non-parenthesized + # colon quoted identifiers like `:x` as plain Symbol literals + # because these are ubiquitiously used in Julia programs as ad hoc + # enum-like entities rather than pieces of AST. + @ast ctx ex[1] ex[1]=>K"Symbol" + else + expand_forms_1(ctx, expand_quote(ctx, ex[1])) + end + elseif k == K"macrocall" + expand_macro(ctx, ex) + elseif k == K"module" || k == K"toplevel" || k == K"inert" + # Remove scope layer information from any inert syntax which survives + # macro expansion so that it doesn't contaminate lowering passes which + # are later run against the quoted code. TODO: This works as a first + # approximation but is incorrect in general. We need to revisit such + # "deferred hygiene" situations (see https://github.com/c42f/JuliaLowering.jl/issues/111) + remove_scope_layer(ctx, ex) + elseif k == K"." && numchildren(ex) == 2 + # Handle quoted property access like `x.:(foo)` or `Core.:(!==)` + # Unwrap the quote to get the identifier before expansion + rhs = ex[2] + if kind(rhs) == K"quote" && numchildren(rhs) == 1 + rhs = rhs[1] + end + e2 = expand_forms_1(ctx, rhs) + if kind(e2) == K"Identifier" || kind(e2) == K"Placeholder" + # FIXME: Do the K"Symbol" transformation in the parser?? + e2 = @ast ctx e2 e2=>K"Symbol" + end + @ast ctx ex [K"." expand_forms_1(ctx, ex[1]) e2] + elseif k == K"cmdstring" + @chk numchildren(ex) == 1 + e2 = @ast ctx ex [K"macrocall" [K"macro_name" "cmd"::K"core"] ex[1]] + expand_macro(ctx, e2) + elseif (k == K"call" || k == K"dotcall") + # Do some initial desugaring of call and dotcall here to simplify + # the later desugaring pass + args = SyntaxList(ctx) + if is_infix_op_call(ex) || is_postfix_op_call(ex) + @chk numchildren(ex) >= 2 "Postfix/infix operators must have at least two positional arguments" + farg = ex[2] + push!(args, ex[1]) + append!(args, ex[3:end]) + else + @chk numchildren(ex) > 0 "Call expressions must have a function name" + farg = ex[1] + append!(args, ex[2:end]) + end + if !isempty(args) + if kind(args[end]) == K"do" + # move do block into first argument location + pushfirst!(args, pop!(args)) + end + end + if length(args) == 2 && is_same_identifier_like(farg, "^") && kind(args[2]) == K"Integer" + # Do literal-pow expansion here as it's later used in both call and + # dotcall expansion. + @ast ctx ex [k + "literal_pow"::K"top" + expand_forms_1(ctx, farg) + expand_forms_1(ctx, args[1]) + [K"call" + [K"call" + "apply_type"::K"core" + "Val"::K"top" + args[2] + ] + ] + ] + else + if kind(farg) == K"." && numchildren(farg) == 1 + # (.+)(x,y) is treated as a dotcall + k = K"dotcall" + farg = farg[1] + end + # Preserve call type flags (mostly ignored in the next pass as + # we've already reordered arguments.) + callflags = JuliaSyntax.call_type_flags(ex) + @ast ctx ex [k(syntax_flags=(callflags == 0 ? nothing : callflags)) + expand_forms_1(ctx, farg) + (expand_forms_1(ctx, a) for a in args)... + ] + end + elseif is_leaf(ex) + ex + elseif k == K"<:" || k == K">:" || k == K"-->" + # TODO: Should every form get layerid systematically? Or only the ones + # which expand_forms_2 needs? + layerid = get(ex, :scope_layer, current_layer_id(ctx)) + mapchildren(e->expand_forms_1(ctx,e), ctx, ex; scope_layer=layerid) + else + mapchildren(e->expand_forms_1(ctx,e), ctx, ex) + end +end + +function ensure_macro_attributes(graph) + ensure_attributes(graph, + var_id=IdTag, + scope_layer=LayerId, + __macro_ctx__=Nothing, + meta=CompileHints) +end + +@fzone "JL: macroexpand" function expand_forms_1(mod::Module, ex::SyntaxTree, expr_compat_mode::Bool, macro_world::UInt) + if kind(ex) == K"local" + # This error assumes we're expanding the body of a top level thunk but + # we might want to make that more explicit in the pass system. + throw(LoweringError(ex, "local declarations have no effect outside a scope")) + end + graph = ensure_macro_attributes(syntax_graph(ex)) + ctx = MacroExpansionContext(graph, mod, expr_compat_mode, macro_world) + ex2 = expand_forms_1(ctx, reparent(ctx, ex)) + graph2 = delete_attributes(graph, :__macro_ctx__) + # TODO: Returning the context with pass-specific mutable data is a bad way + # to carry state into the next pass. We might fix this by attaching such + # data to the graph itself as global attributes? + ctx2 = MacroExpansionContext(graph2, ctx.bindings, ctx.scope_layers, ctx.scope_layer_stack, + expr_compat_mode, macro_world) + return ctx2, reparent(ctx2, ex2) +end diff --git a/JuliaLowering/src/precompile.jl b/JuliaLowering/src/precompile.jl new file mode 100644 index 0000000000000..7a5fccaded4b5 --- /dev/null +++ b/JuliaLowering/src/precompile.jl @@ -0,0 +1,27 @@ +# exercise the whole lowering pipeline +if Base.get_bool_env("JULIA_LOWERING_PRECOMPILE", true) + thunks = String[ + """ + function foo(xxx, yyy) + @nospecialize xxx + return Pair{Any,Any}(typeof(xxx), typeof(yyy)) + end + """ + + """ + struct Foo + x::Int + Foo(x::Int) = new(x) + # Foo() = new() + end + """ + ] + for thunk in thunks + stream = JuliaSyntax.ParseStream(thunk) + JuliaSyntax.parse!(stream; rule=:all) + st0 = JuliaSyntax.build_tree(SyntaxTree, stream; filename=@__FILE__) + lwrst = lower(@__MODULE__, st0[1]) + lwr = to_lowered_expr(lwrst) + @assert Meta.isexpr(lwr, :thunk) && only(lwr.args) isa Core.CodeInfo + end +end diff --git a/JuliaLowering/src/runtime.jl b/JuliaLowering/src/runtime.jl new file mode 100644 index 0000000000000..d5a908aef1772 --- /dev/null +++ b/JuliaLowering/src/runtime.jl @@ -0,0 +1,455 @@ +# Runtime support for +# 1. Functions called by the code emitted from lowering +# 2. Introspecting Julia's state during lowering +# +# These should probably all move to `Core` at some point. + +#------------------------------------------------------------------------------- +# Functions/types used by code emitted from lowering, but not called by it directly + +# Return the current exception. In JuliaLowering we use this rather than the +# special form `K"the_exception"` to reduces the number of special forms. +Base.@assume_effects :removable function current_exception() + @ccall jl_current_exception(current_task()::Any)::Any +end + +#-------------------------------------------------- +# Supporting functions for AST interpolation (`quote`) +struct InterpolationContext{Graph} <: AbstractLoweringContext + graph::Graph + values::Tuple + current_index::Ref{Int} +end + +# Context for `Expr`-based AST interpolation in compat mode +struct ExprInterpolationContext <: AbstractLoweringContext + values::Tuple + current_index::Ref{Int} +end + +# Helper functions to make shared interpolation code which works with both +# SyntaxTree and Expr data structures. +_interp_kind(ex::SyntaxTree) = kind(ex) +function _interp_kind(@nospecialize(ex)) + return (ex isa Expr && ex.head === :quote) ? K"quote" : + (ex isa Expr && ex.head === :$) ? K"$" : + K"None" # Other cases irrelevant to interpolation +end + +_children(ex::SyntaxTree) = children(ex) +_children(@nospecialize(ex)) = ex isa Expr ? ex.args : () + +_numchildren(ex::SyntaxTree) = numchildren(ex) +_numchildren(@nospecialize(ex)) = ex isa Expr ? length(ex.args) : 0 + +_syntax_list(ctx::InterpolationContext) = SyntaxList(ctx) +_syntax_list(ctx::ExprInterpolationContext) = Any[] + +_interp_makenode(ctx::InterpolationContext, ex, args) = makenode(ctx, ex, ex, args) +_interp_makenode(ctx::ExprInterpolationContext, ex, args) = Expr((ex::Expr).head, args...) + +_is_leaf(ex::SyntaxTree) = is_leaf(ex) +_is_leaf(ex::Expr) = false +_is_leaf(@nospecialize(ex)) = true + +# Produce interpolated node for `$x` syntax +function _interpolated_value(ctx::InterpolationContext, srcref, ex) + if ex isa SyntaxTree + if !is_compatible_graph(ctx, ex) + ex = copy_ast(ctx, ex) + end + append_sourceref(ctx, ex, srcref) + elseif ex isa Symbol + # Plain symbols become identifiers. This is an accommodation for + # compatibility to allow `:x` (a Symbol) and `:(x)` (a SyntaxTree) to + # be used interchangeably in macros. + makeleaf(ctx, srcref, K"Identifier", string(ex)) + else + makeleaf(ctx, srcref, K"Value", ex) + end +end + +function _interpolated_value(::ExprInterpolationContext, _, ex) + ex +end + +function _interpolate_ast(ctx::ExprInterpolationContext, ex::QuoteNode, depth) + out = _interpolate_ast(ctx, Expr(:inert, ex.value), depth) + QuoteNode(only(out.args)) +end + +function _interpolate_ast(ctx, @nospecialize(ex), depth) + _is_leaf(ex) && return ex + k = _interp_kind(ex) + inner_depth = k == K"quote" ? depth + 1 : + k == K"$" ? depth - 1 : + depth + expanded_children = _syntax_list(ctx) + + for e in _children(ex) + if _interp_kind(e) == K"$" && inner_depth == 0 + vals = ctx.values[ctx.current_index[]]::Tuple + ctx.current_index[] += 1 + for (i,v) in enumerate(vals) + srcref = _numchildren(e) == 1 ? e : _children(e)[i] + push!(expanded_children, _interpolated_value(ctx, srcref, v)) + end + else + push!(expanded_children, _interpolate_ast(ctx, e, inner_depth)) + end + end + + _interp_makenode(ctx, ex, expanded_children) +end + +# Produced by expanding K"quote". Must create a copy of the AST. Note that +# wrapping `ex` in an extra node handles the edge case where the root `ex` is +# `$` (our recursion is one step removed due to forms like `($ a b)`.) +function interpolate_ast(::Type{SyntaxTree}, ex::SyntaxTree, values...) + # Construct graph for interpolation context. We inherit this from the macro + # context where possible by detecting it using __macro_ctx__. This feels + # hacky though. + # + # Perhaps we should use a ScopedValue for this instead or get it from + # the macro __context__? None of the options feel great here. + graph = nothing + for vals in values + for v in vals + if v isa SyntaxTree && hasattr(syntax_graph(v), :__macro_ctx__) + graph = syntax_graph(v) + break + end + end + end + if isnothing(graph) + graph = ensure_attributes( + SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String, scope_layer=LayerId) + end + ctx = InterpolationContext(graph, values, Ref(1)) + + # We must copy the AST into our context to use it as the source reference of + # generated expressions. + ex1 = copy_ast(ctx, ex) + out = _interpolate_ast(ctx, @ast(ctx, ex1, [K"None" ex1]), 0) + length(children(out)) === 1 || throw( + LoweringError(ex1, "More than one value in bare `\$` expression")) + return only(children(out)) +end + +function interpolate_ast(::Type{Expr}, @nospecialize(ex), values...) + ctx = ExprInterpolationContext(values, Ref(1)) + if ex isa Expr && ex.head === :$ + @assert length(values) === 1 + if length(ex.args) !== 1 + throw(LoweringError( + expr_to_syntaxtree(ex), "More than one value in bare `\$` expression")) + end + only(values[1]) + else + _interpolate_ast(ctx, ex, 0) + end +end + +#-------------------------------------------------- +# Functions called by closure conversion +function eval_closure_type(mod::Module, closure_type_name::Symbol, field_names, field_is_box) + type_params = Core.TypeVar[] + field_types = [] + for (name, isbox) in zip(field_names, field_is_box) + if !isbox + T = Core.TypeVar(Symbol(name, "_type")) + push!(type_params, T) + push!(field_types, T) + else + push!(field_types, Core.Box) + end + end + type = Core._structtype(mod, closure_type_name, + Core.svec(type_params...), + Core.svec(field_names...), + Core.svec(), + false, + length(field_names)) + Core._setsuper!(type, Core.Function) + Core.declare_const(mod, closure_type_name, type) + Core._typebody!(false, type, Core.svec(field_types...)) + type +end + +# Interpolate captured local variables into the CodeInfo for a global method +function replace_captured_locals!(codeinfo::Core.CodeInfo, locals::Core.SimpleVector) + for (i, ex) in enumerate(codeinfo.code) + if Meta.isexpr(ex, :captured_local) + codeinfo.code[i] = locals[ex.args[1]::Int] + end + end + codeinfo +end + +#-------------------------------------------------- +# Functions which create modules or mutate their bindings + +const _Base_has_eval_import = isdefined(Base, :_eval_import) + +function eval_import(imported::Bool, to::Module, from::Union{Expr, Nothing}, paths::Expr...) + if _Base_has_eval_import + Base._eval_import(imported, to, from, paths...) + else + head = imported ? :import : :using + ex = isnothing(from) ? + Expr(head, paths...) : + Expr(head, Expr(Symbol(":"), from, paths...)) + Core.eval(to, ex) + end +end + +function eval_using(to::Module, path::Expr) + if _Base_has_eval_import + Base._eval_using(to, path) + else + Core.eval(to, Expr(:using, path)) + end +end + +function eval_public(mod::Module, is_exported::Bool, identifiers) + # symbol jl_module_public is no longer exported as of #57765 + Core.eval(mod, Expr((is_exported ? :export : :public), map(Symbol, identifiers)...)) +end + +#-------------------------------------------------- +# Docsystem integration +function _bind_func_docs!(f, docstr, method_metadata::Core.SimpleVector) + mod = parentmodule(f) + bind = Base.Docs.Binding(mod, nameof(f)) + full_sig = method_metadata[1] + arg_sig = Tuple{full_sig[2:end]...} + lineno = method_metadata[3] + metadata = Dict{Symbol, Any}( + :linenumber => lineno.line, + :module => mod, + ) + if !isnothing(lineno.file) + push!(metadata, :path => string(lineno.file)) + end + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), arg_sig) +end + +function bind_docs!(f::Function, docstr, method_metadata::Core.SimpleVector) + _bind_func_docs!(f, docstr, method_metadata) +end + +# Document constructors +function bind_docs!(::Type{Type{T}}, docstr, method_metadata::Core.SimpleVector) where T + _bind_func_docs!(T, docstr, method_metadata) +end + +function bind_docs!(type::Type, docstr, method_metadata::Core.SimpleVector) + _bind_func_docs!(type, docstr, method_metadata) +end + +function bind_docs!(type::Type, docstr, lineno::LineNumberNode; field_docs=Core.svec()) + mod = parentmodule(type) + bind = Base.Docs.Binding(mod, nameof(type)) + metadata = Dict{Symbol, Any}( + :linenumber => lineno, + :module => mod, + ) + if !isnothing(lineno.file) + push!(metadata, :path => string(lineno.file)) + end + if !isempty(field_docs) + fd = Dict{Symbol, Any}() + fns = fieldnames(type) + for i = 1:2:length(field_docs) + fd[fns[field_docs[i]]] = field_docs[i+1] + end + metadata[:fields] = fd + end + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), Union{}) +end + +""" +Called in the unfortunate cases (K"call", K".", K"Identifier") where docstrings +change the semantics of the expressions they annotate, no longer requiring the +expression to execute. +""" +function bind_static_docs!(mod::Module, name::Symbol, docstr, lnn::LineNumberNode, sigtypes::Type) + metadata = Dict{Symbol, Any}( + :linenumber => lnn.line, + :module => mod, + :path => something(lnn.file, "none"), + ) + bind = Base.Docs.Binding(mod, name) + Docs.doc!(mod, bind, Base.Docs.docstr(docstr, metadata), sigtypes) +end + +#-------------------------------------------------- +# Runtime support infrastructure for `@generated` + +# An alternative to Core.GeneratedFunctionStub which works on SyntaxTree rather +# than Expr. +struct GeneratedFunctionStub + gen + srcref + argnames::Core.SimpleVector + spnames::Core.SimpleVector +end + +# Call the `@generated` code generator function and wrap the results of the +# expression into a CodeInfo. +# +# `args` passed into stub by the Julia runtime are (parent_func, static_params..., arg_types...) +function (g::GeneratedFunctionStub)(world::UInt, source::Method, @nospecialize args...) + # Some of the lowering pipeline from lower() and the pass-specific setup is + # re-implemented here because generated functions are very much (but not + # entirely) like macro expansion. + # + # TODO: Reduce duplication where possible. + + # Attributes from parsing + graph = ensure_attributes(SyntaxGraph(), kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String) + # Attributes for macro expansion + graph = ensure_attributes(ensure_macro_attributes(graph), + # Additional attribute for resolve_scopes, for + # adding our custom lambda below + is_toplevel_thunk=Bool, + toplevel_pure=Bool, + ) + + __module__ = source.module + + # Macro expansion. Looking at Core.GeneratedFunctionStub, it seems that + # macros emitted by the generator are currently expanded in the latest + # world, so do that for compatibility. + macro_world = typemax(UInt) + ctx1 = MacroExpansionContext(graph, __module__, false, macro_world) + + layer = only(ctx1.scope_layers) + + # Run code generator - this acts like a macro expander and like a macro + # expander it gets a MacroContext. + mctx = MacroContext(syntax_graph(ctx1), g.srcref, layer) + ex0 = g.gen(mctx, args...) + if ex0 isa SyntaxTree + if !is_compatible_graph(ctx1, ex0) + # If the macro has produced syntax outside the macro context, copy it over. + # TODO: Do we expect this always to happen? What is the API for access + # to the macro expansion context? + ex0 = copy_ast(ctx1, ex0) + end + else + ex0 = @ast ctx ex expanded::K"Value" + end + # Expand any macros emitted by the generator + ex1 = expand_forms_1(ctx1, reparent(ctx1, ex0)) + ctx1 = MacroExpansionContext(delete_attributes(graph, :__macro_ctx__), + ctx1.bindings, ctx1.scope_layers, + ctx1.scope_layer_stack, false, macro_world) + ex1 = reparent(ctx1, ex1) + + # Desugaring + ctx2, ex2 = expand_forms_2(ctx1, ex1) + + # Wrap expansion in a non-toplevel lambda and run scope resolution + ex2 = @ast ctx2 ex0 [K"lambda"(is_toplevel_thunk=false, toplevel_pure=true) + [K"block" + (adopt_scope(string(n)::K"Identifier", layer) for n in g.argnames)... + ] + [K"block" + (adopt_scope(string(n)::K"Identifier", layer) for n in g.spnames)... + ] + ex2 + ] + ctx3, ex3 = resolve_scopes(ctx2, ex2) + + # Rest of lowering + ctx4, ex4 = convert_closures(ctx3, ex3) + ctx5, ex5 = linearize_ir(ctx4, ex4) + ci = to_lowered_expr(ex5) + @assert ci isa Core.CodeInfo + + # See GeneratedFunctionStub code in base/expr.jl + ci.isva = source.isva + code = ci.code + bindings = IdSet{Core.Binding}() + for i = 1:length(code) + stmt = code[i] + if isa(stmt, GlobalRef) + push!(bindings, convert(Core.Binding, stmt)) + end + end + if !isempty(bindings) + ci.edges = Core.svec(bindings...) + end + + return ci +end + + +#------------------------------------------------------------------------------- +# The following functions are called directly by lowering to inspect Julia's state. + +# Get the binding for `name` if one is already resolved in module `mod`. Note +# that we cannot use `isdefined(::Module, ::Symbol)` here, because that causes +# binding resolution which is a massive side effect we must avoid in lowering. +function _get_module_binding(mod, name; create=false) + b = @ccall jl_get_module_binding(mod::Module, name::Symbol, create::Cint)::Ptr{Core.Binding} + b == C_NULL ? nothing : unsafe_pointer_to_objref(b) +end + +# Return true if a `name` is defined in and *by* the module `mod`. +# Has no side effects, unlike isdefined() +# +# (This should do what fl_defined_julia_global does for flisp lowering) +function is_defined_and_owned_global(mod, name) + Base.binding_kind(mod, name) === Base.PARTITION_KIND_GLOBAL +end + +# "Reserve" a binding: create the binding if it doesn't exist but do not assign +# to it. +function reserve_module_binding(mod, name) + # TODO: Fix the race condition here: We should really hold the Module's + # binding lock during this test-and-set type operation. But the binding + # lock is only accessible from C. See also the C code in + # `fl_module_unique_name`. + if _get_module_binding(mod, name; create=false) === nothing + _get_module_binding(mod, name; create=true) !== nothing + else + return false + end +end + +# Reserve a global binding named "$basename#$i" in module `mod` for the +# smallest `i` starting at `0`. +# +# TODO: Remove the use of this where possible. Currently this is used within +# lowering to create unique global names for keyword function bodies and +# closure types as a more local alternative to current-julia-module-counter. +# However, we should ideally defer it to eval-time to make lowering itself +# completely non-mutating. +function reserve_module_binding_i(mod, basename) + i = 0 + while true + name = "$basename$i" + if reserve_module_binding(mod, Symbol(name)) + return name + end + i += 1 + end +end + +function lookup_method_instance(func, args, world::Integer) + allargs = Vector{Any}(undef, length(args) + 1) + allargs[1] = func + allargs[2:end] = args + mi = @ccall jl_method_lookup(allargs::Ptr{Any}, length(allargs)::Csize_t, + world::Csize_t)::Ptr{Cvoid} + return mi == C_NULL ? nothing : unsafe_pointer_to_objref(mi) +end + +# Like `Base.methods()` but with world age support +function methods_in_world(func, arg_sig, world) + Base._methods(func, arg_sig, -1, world) +end diff --git a/JuliaLowering/src/scope_analysis.jl b/JuliaLowering/src/scope_analysis.jl new file mode 100644 index 0000000000000..ce3f0fba23b76 --- /dev/null +++ b/JuliaLowering/src/scope_analysis.jl @@ -0,0 +1,814 @@ +# Lowering pass 3: scope and variable analysis + +""" +Key to use when transforming names into bindings +""" +struct NameKey + name::String + layer::LayerId +end + +function Base.isless(a::NameKey, b::NameKey) + (a.name, a.layer) < (b.name, b.layer) +end + +function NameKey(ex::SyntaxTree) + @chk kind(ex) == K"Identifier" + NameKey(ex.name_val, ex.scope_layer) +end + +#------------------------------------------------------------------------------- +_insert_if_not_present!(dict, key, val) = get!(dict, key, val) + +function _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex) + k = kind(ex) + if k == K"Identifier" + _insert_if_not_present!(used_names, NameKey(ex), ex) + elseif k == K"BindingId" + push!(used_bindings, ex.var_id) + elseif is_leaf(ex) || is_quoted(k) || + k in KSet"scope_block lambda module toplevel" + return + elseif k == K"local" + if getmeta(ex, :is_destructured_arg, false) + push!(destructured_args, ex[1]) + else + _insert_if_not_present!(locals, NameKey(ex[1]), ex) + end + elseif k == K"global" + if !(kind(ex[1]) == K"Value" && ex[1].value isa GlobalRef) + _insert_if_not_present!(globals, NameKey(ex[1]), ex) + end + elseif k == K"assign_or_constdecl_if_global" + # like v = val, except that if `v` turns out global(either implicitly or + # by explicit `global`), it gains an implicit `const` + _insert_if_not_present!(assignments, NameKey(ex[1]), ex) + elseif k == K"=" || k == K"constdecl" + v = decl_var(ex[1]) + if !(kind(v) in KSet"BindingId globalref Value Placeholder") + _insert_if_not_present!(assignments, NameKey(v), v) + end + if k != K"constdecl" || numchildren(ex) == 2 + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, ex[2]) + end + elseif k == K"function_decl" + v = ex[1] + kv = kind(v) + if kv == K"Identifier" + _insert_if_not_present!(assignments, NameKey(v), v) + elseif kv == K"BindingId" + binfo = lookup_binding(ctx, v) + if !binfo.is_ssa && binfo.kind != :global + @assert false "allow local BindingId as function name?" + end + else + @assert false + end + else + for e in children(ex) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e) + end + end +end + +# Find names of all identifiers used in the given expression, grouping them +# into sets by type of usage. +# +# NB: This only works properly after desugaring +function find_scope_vars(ctx, ex) + ExT = typeof(ex) + assignments = Dict{NameKey,ExT}() + locals = Dict{NameKey,ExT}() + destructured_args = Vector{ExT}() + globals = Dict{NameKey,ExT}() + used_names = Dict{NameKey,ExT}() + used_bindings = Set{IdTag}() + for e in children(ex) + _find_scope_vars!(ctx, assignments, locals, destructured_args, globals, used_names, used_bindings, e) + end + + # Sort by key so that id generation is deterministic + assignments = sort!(collect(pairs(assignments)), by=first) + locals = sort!(collect(pairs(locals)), by=first) + globals = sort!(collect(pairs(globals)), by=first) + used_names = sort!(collect(pairs(used_names)), by=first) + used_bindings = sort!(collect(used_bindings)) + + return assignments, locals, destructured_args, globals, used_names, used_bindings +end + +struct ScopeInfo + # True if scope is the global top level scope + is_toplevel_global_scope::Bool + # True if scope is part of top level code, or a non-lambda scope nested + # inside top level code. Thus requiring special scope resolution rules. + in_toplevel_thunk::Bool + # Soft/hard scope. For top level thunks only + is_soft::Bool + is_hard::Bool + # Map from variable names to IDs which appear in this scope but not in the + # parent scope + # TODO: Rename to `locals` or local_bindings? + var_ids::Dict{NameKey,IdTag} + # Bindings used by the enclosing lambda + lambda_bindings::LambdaBindings +end + +struct ScopeResolutionContext{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + mod::Module + scope_layers::Vector{ScopeLayer} + # name=>id mappings for all discovered global vars + global_vars::Dict{NameKey,IdTag} + # Stack of name=>id mappings for each scope, innermost scope last. + scope_stack::Vector{ScopeInfo} + # Variables which were implicitly global due to being assigned to in top + # level code + implicit_toplevel_globals::Set{NameKey} +end + +function ScopeResolutionContext(ctx) + graph = ensure_attributes(ctx.graph, lambda_bindings=LambdaBindings) + ScopeResolutionContext(graph, + ctx.bindings, + ctx.mod, + ctx.scope_layers, + Dict{NameKey,IdTag}(), + Vector{ScopeInfo}(), + Set{NameKey}()) +end + +function current_lambda_bindings(ctx::ScopeResolutionContext) + last(ctx.scope_stack).lambda_bindings +end + +function lookup_var(ctx, varkey::NameKey, exclude_toplevel_globals=false) + for i in lastindex(ctx.scope_stack):-1:1 + ids = ctx.scope_stack[i].var_ids + id = get(ids, varkey, nothing) + if !isnothing(id) && (!exclude_toplevel_globals || + i > 1 || lookup_binding(ctx, id).kind != :global) + return id + end + end + return exclude_toplevel_globals ? nothing : get(ctx.global_vars, varkey, nothing) +end + +function var_kind(ctx, id::IdTag) + lookup_binding(ctx, id).kind +end + +function var_kind(ctx, varkey::NameKey, exclude_toplevel_globals=false) + id = lookup_var(ctx, varkey, exclude_toplevel_globals) + isnothing(id) ? nothing : lookup_binding(ctx, id).kind +end + +function init_binding(ctx, srcref, varkey::NameKey, kind::Symbol; kws...) + id = kind === :global ? get(ctx.global_vars, varkey, nothing) : nothing + if isnothing(id) + mod = kind === :global ? ctx.scope_layers[varkey.layer].mod : nothing + ex = new_binding(ctx, srcref, varkey.name, kind; mod=mod, kws...) + id = ex.var_id + end + if kind === :global + ctx.global_vars[varkey] = id + end + id +end + +# Add lambda arguments and static parameters +function add_lambda_args(ctx, var_ids, args, args_kind) + for arg in args + ka = kind(arg) + if ka == K"Identifier" + varkey = NameKey(arg) + if haskey(var_ids, varkey) + vk = lookup_binding(ctx, var_ids[varkey]).kind + _is_arg(k) = k == :argument || k == :local + msg = _is_arg(vk) && _is_arg(args_kind) ? "function argument name not unique" : + vk == :static_parameter && args_kind == :static_parameter ? "function static parameter name not unique" : + "static parameter name not distinct from function argument" + throw(LoweringError(arg, msg)) + end + is_always_defined = args_kind == :argument + id = init_binding(ctx, arg, varkey, args_kind; + is_nospecialize=getmeta(arg, :nospecialize, false), + is_always_defined=is_always_defined) + var_ids[varkey] = id + elseif ka != K"BindingId" && ka != K"Placeholder" + throw(LoweringError(arg, "Unexpected lambda arg kind")) + end + end +end + +# Analyze identifier usage within a scope +# * Allocate a new binding for each identifier which the scope introduces. +# * Record the identifier=>binding mapping in a lookup table +# * Return a `ScopeInfo` with the mapping plus additional scope metadata +function analyze_scope(ctx, ex, scope_type, is_toplevel_global_scope=false, + lambda_args=nothing, lambda_static_parameters=nothing) + parentscope = isempty(ctx.scope_stack) ? nothing : ctx.scope_stack[end] + is_outer_lambda_scope = kind(ex) == K"lambda" + in_toplevel_thunk = is_toplevel_global_scope || + (!is_outer_lambda_scope && parentscope.in_toplevel_thunk) + + assignments, locals, destructured_args, globals, + used_names, used_bindings = find_scope_vars(ctx, ex) + + # Construct a mapping from identifiers to bindings + # + # This will contain a binding ID for each variable which is introduced by + # the scope, including + # * Explicit locals + # * Explicit globals + # * Implicit locals created by assignment + var_ids = Dict{NameKey,IdTag}() + + if !isnothing(lambda_args) + add_lambda_args(ctx, var_ids, lambda_args, :argument) + add_lambda_args(ctx, var_ids, lambda_static_parameters, :static_parameter) + add_lambda_args(ctx, var_ids, destructured_args, :local) + end + + # Add explicit locals + for (varkey,e) in locals + if haskey(var_ids, varkey) + vk = lookup_binding(ctx, var_ids[varkey]).kind + if vk === :argument && is_outer_lambda_scope + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with an argument")) + elseif vk === :static_parameter + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) + end + elseif var_kind(ctx, varkey) === :static_parameter + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) + else + var_ids[varkey] = init_binding(ctx, e[1], varkey, :local) + end + end + + # Add explicit globals + for (varkey,e) in globals + if haskey(var_ids, varkey) + vk = lookup_binding(ctx, var_ids[varkey]).kind + if vk === :local + throw(LoweringError(e, "Variable `$(varkey.name)` declared both local and global")) + elseif vk === :argument && is_outer_lambda_scope + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with an argument")) + elseif vk === :static_parameter + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) + end + elseif var_kind(ctx, varkey) === :static_parameter + throw(LoweringError(e, "global variable name `$(varkey.name)` conflicts with a static parameter")) + end + var_ids[varkey] = init_binding(ctx, e[1], varkey, :global) + end + + # Compute implicit locals and globals + if is_toplevel_global_scope + is_hard_scope = false + is_soft_scope = false + + # Assignments are implicitly global at top level, unless they come from + # a macro expansion + for (varkey,e) in assignments + vk = haskey(var_ids, varkey) ? + lookup_binding(ctx, var_ids[varkey]).kind : + var_kind(ctx, varkey, true) + if vk === nothing + if ctx.scope_layers[varkey.layer].is_macro_expansion + var_ids[varkey] = init_binding(ctx, e, varkey, :local) + else + init_binding(ctx, e, varkey, :global) + push!(ctx.implicit_toplevel_globals, varkey) + end + end + end + else + is_hard_scope = in_toplevel_thunk && (parentscope.is_hard || scope_type === :hard) + is_soft_scope = in_toplevel_thunk && !is_hard_scope && + (scope_type === :neutral ? parentscope.is_soft : scope_type === :soft) + + # Outside top level code, most assignments create local variables implicitly + for (varkey,e) in assignments + vk = haskey(var_ids, varkey) ? + lookup_binding(ctx, var_ids[varkey]).kind : + var_kind(ctx, varkey, true) + if vk === :static_parameter + throw(LoweringError(e, "local variable name `$(varkey.name)` conflicts with a static parameter")) + elseif vk !== nothing + continue + end + # Assignment is to a newly discovered variable name + is_ambiguous_local = false + if in_toplevel_thunk && !is_hard_scope + # In a top level thunk but *inside* a nontrivial scope + layer = ctx.scope_layers[varkey.layer] + if !layer.is_macro_expansion && (varkey in ctx.implicit_toplevel_globals || + is_defined_and_owned_global(layer.mod, Symbol(varkey.name))) + # Special scope rules to make assignments to globals work + # like assignments to locals do inside a function. + if is_soft_scope + # Soft scope (eg, for loop in REPL) => treat as a global + init_binding(ctx, e, varkey, :global) + continue + else + # Ambiguous case (eg, nontrivial scopes in package top level code) + # => Treat as local but generate warning when assigned to + is_ambiguous_local = true + end + end + end + var_ids[varkey] = init_binding(ctx, e, varkey, :local; + is_ambiguous_local=is_ambiguous_local) + end + end + + #-------------------------------------------------- + # At this point we've discovered all the bindings defined in this scope and + # added them to `var_ids`. + # + # Next we record information about how the new bindings relate to the + # enclosing lambda + # * All non-globals are recorded (kind :local and :argument will later be turned into slots) + # * Captured variables are detected and recorded + # + # TODO: Move most or-all of this to the VariableAnalysis sub-pass + lambda_bindings = if is_outer_lambda_scope + if isempty(lambda_args) + LambdaBindings() + else + selfarg = first(lambda_args) + selfid = kind(selfarg) == K"BindingId" ? + selfarg.var_id : var_ids[NameKey(selfarg)] + LambdaBindings(selfid) + end + else + parentscope.lambda_bindings + end + + for id in values(var_ids) + binfo = lookup_binding(ctx, id) + if !binfo.is_ssa && binfo.kind !== :global + init_lambda_binding(lambda_bindings, id) + end + end + + # FIXME: This assumes used bindings are internal to the lambda and cannot + # be from the environment, and also assumes they are assigned. That's + # correct for now but in general we should go by the same code path that + # identifiers do. + for id in used_bindings + binfo = lookup_binding(ctx, id) + if (binfo.kind === :local && !binfo.is_ssa) || binfo.kind === :argument || + binfo.kind === :static_parameter + if !has_lambda_binding(lambda_bindings, id) + init_lambda_binding(lambda_bindings, id) + end + end + end + + for (varkey, e) in used_names + id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) + if id === nothing + # Identifiers which are used but not defined in some scope are + # newly discovered global bindings + init_binding(ctx, e, varkey, :global) + elseif !in_toplevel_thunk + binfo = lookup_binding(ctx, id) + if binfo.kind !== :global + if !has_lambda_binding(lambda_bindings, id) + # Used vars from a scope *outside* the current lambda are captured + init_lambda_binding(lambda_bindings, id, is_captured=true) + update_binding!(ctx, id; is_captured=true) + end + end + end + end + + if !in_toplevel_thunk + for (varkey,_) in assignments + id = haskey(var_ids, varkey) ? var_ids[varkey] : lookup_var(ctx, varkey) + binfo = lookup_binding(ctx, id) + if binfo.kind !== :global + if !has_lambda_binding(lambda_bindings, id) + # Assigned vars from a scope *outside* the current lambda are captured + init_lambda_binding(lambda_bindings, id, is_captured=true) + update_binding!(ctx, id; is_captured=true) + end + end + end + end + + return ScopeInfo(is_toplevel_global_scope, in_toplevel_thunk, is_soft_scope, + is_hard_scope, var_ids, lambda_bindings) +end + +function add_local_decls!(ctx, stmts, srcref, scope) + # Add local decls to start of block so that closure conversion can + # initialize if necessary. + for id in sort!(collect(values(scope.var_ids))) + binfo = lookup_binding(ctx, id) + if binfo.kind == :local + push!(stmts, @ast ctx srcref [K"local" binding_ex(ctx, id)]) + end + end +end + +function _resolve_scopes(ctx, ex::SyntaxTree) + k = kind(ex) + if k == K"Identifier" + @ast ctx ex lookup_var(ctx, NameKey(ex))::K"BindingId" + elseif is_leaf(ex) || is_quoted(ex) || k == K"toplevel" + ex + # elseif k == K"global" + # ex + elseif k == K"local" + # Local declarations have a value of `nothing` according to flisp + # lowering. + # TODO: Should local decls be disallowed in value position? + @ast ctx ex "nothing"::K"core" + elseif k == K"decl" + ex_out = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + name = ex_out[1] + if kind(name) != K"Placeholder" + binfo = lookup_binding(ctx, name) + if binfo.kind == :global && !ctx.scope_stack[end].in_toplevel_thunk + throw(LoweringError(ex, "type declarations for global variables must be at top level, not inside a function")) + end + end + id = ex_out[1] + if kind(id) != K"Placeholder" + binfo = lookup_binding(ctx, id) + if !isnothing(binfo.type) + throw(LoweringError(ex, "multiple type declarations found for `$(binfo.name)`")) + end + update_binding!(ctx, id; type=ex_out[2]) + end + ex_out + elseif k == K"always_defined" + id = lookup_var(ctx, NameKey(ex[1])) + update_binding!(ctx, id; is_always_defined=true) + makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"lambda" + is_toplevel_thunk = ex.is_toplevel_thunk + scope = analyze_scope(ctx, ex, nothing, is_toplevel_thunk, + children(ex[1]), children(ex[2])) + + push!(ctx.scope_stack, scope) + arg_bindings = _resolve_scopes(ctx, ex[1]) + sparm_bindings = _resolve_scopes(ctx, ex[2]) + body_stmts = SyntaxList(ctx) + add_local_decls!(ctx, body_stmts, ex, scope) + body = _resolve_scopes(ctx, ex[3]) + if kind(body) == K"block" + append!(body_stmts, children(body)) + else + push!(body_stmts, body) + end + ret_var = numchildren(ex) == 4 ? _resolve_scopes(ctx, ex[4]) : nothing + pop!(ctx.scope_stack) + + @ast ctx ex [K"lambda"(lambda_bindings=scope.lambda_bindings, + is_toplevel_thunk=is_toplevel_thunk, + toplevel_pure=false) + arg_bindings + sparm_bindings + [K"block" + body_stmts... + ] + ret_var + ] + elseif k == K"scope_block" + scope = analyze_scope(ctx, ex, ex.scope_type) + push!(ctx.scope_stack, scope) + stmts = SyntaxList(ctx) + add_local_decls!(ctx, stmts, ex, scope) + for e in children(ex) + push!(stmts, _resolve_scopes(ctx, e)) + end + pop!(ctx.scope_stack) + @ast ctx ex [K"block" stmts...] + elseif k == K"extension" + etype = extension_type(ex) + if etype == "islocal" + id = lookup_var(ctx, NameKey(ex[2])) + islocal = !isnothing(id) && var_kind(ctx, id) != :global + @ast ctx ex islocal::K"Bool" + elseif etype == "isglobal" + e2 = ex[2] + @chk kind(e2) in KSet"Identifier Placeholder" + isglobal = if kind(e2) == K"Identifier" + id = lookup_var(ctx, NameKey(e2)) + isnothing(id) || var_kind(ctx, id) == :global + else + false + end + @ast ctx ex isglobal::K"Bool" + elseif etype == "locals" + stmts = SyntaxList(ctx) + locals_dict = ssavar(ctx, ex, "locals_dict") + push!(stmts, @ast ctx ex [K"=" + locals_dict + [K"call" + [K"call" + "apply_type"::K"core" + "Dict"::K"top" + "Symbol"::K"core" + "Any"::K"core" + ] + ] + ]) + for scope in ctx.scope_stack + for id in values(scope.var_ids) + binfo = lookup_binding(ctx, id) + if binfo.kind == :global || binfo.is_internal + continue + end + binding = binding_ex(ctx, id) + push!(stmts, @ast ctx ex [K"if" + [K"isdefined" binding] + [K"call" + "setindex!"::K"top" + locals_dict + binding + binfo.name::K"Symbol" + ] + ]) + end + end + push!(stmts, locals_dict) + makenode(ctx, ex, K"block", stmts) + end + elseif k == K"assert" + etype = extension_type(ex) + if etype == "require_existing_locals" + for v in ex[2:end] + vk = var_kind(ctx, NameKey(v)) + if vk !== :local + throw(LoweringError(v, "`outer` annotations must match with a local variable in an outer scope but no such variable was found")) + end + end + elseif etype == "global_toplevel_only" + if !ctx.scope_stack[end].is_toplevel_global_scope + e = ex[2][1] + throw(LoweringError(e, "$(kind(e)) is only allowed in global scope")) + end + elseif etype == "toplevel_only" + if !ctx.scope_stack[end].in_toplevel_thunk + e = ex[2][1] + throw(LoweringError(e, "this syntax is only allowed in top level code")) + end + else + throw(LoweringError(ex, "Unknown syntax assertion")) + end + makeleaf(ctx, ex, K"TOMBSTONE") + elseif k == K"function_decl" + resolved = mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + name = resolved[1] + if kind(name) == K"BindingId" + bk = lookup_binding(ctx, name).kind + if bk == :argument + throw(LoweringError(name, "Cannot add method to a function argument")) + elseif bk == :global && !ctx.scope_stack[end].in_toplevel_thunk + throw(LoweringError(name, + "Global method definition needs to be placed at the top level, or use `eval()`")) + end + end + resolved + elseif k == K"assign_or_constdecl_if_global" + id = _resolve_scopes(ctx, ex[1]) + bk = lookup_binding(ctx, id).kind + @assert numchildren(ex) === 2 + assignment_kind = bk == :global ? K"constdecl" : K"=" + @ast ctx ex _resolve_scopes(ctx, [assignment_kind ex[1] ex[2]]) + else + mapchildren(e->_resolve_scopes(ctx, e), ctx, ex) + end +end + +function _resolve_scopes(ctx, exs::AbstractVector) + out = SyntaxList(ctx) + for e in exs + push!(out, _resolve_scopes(ctx, e)) + end + out +end + +#------------------------------------------------------------------------------- +# Sub-pass to compute additional information about variable usage as required +# by closure conversion, etc +struct ClosureBindings + name_stack::Vector{String} # Names of functions the closure is nested within + lambdas::Vector{LambdaBindings} # Bindings for each method of the closure +end + +ClosureBindings(name_stack) = ClosureBindings(name_stack, Vector{LambdaBindings}()) + +struct VariableAnalysisContext{GraphType} <: AbstractLoweringContext + graph::GraphType + bindings::Bindings + mod::Module + lambda_bindings::LambdaBindings + # Stack of method definitions for closure naming + method_def_stack::SyntaxList{GraphType} + # Collection of information about each closure, principally which methods + # are part of the closure (and hence captures). + closure_bindings::Dict{IdTag,ClosureBindings} +end + +function VariableAnalysisContext(graph, bindings, mod, lambda_bindings) + VariableAnalysisContext(graph, bindings, mod, lambda_bindings, + SyntaxList(graph), Dict{IdTag,ClosureBindings}()) +end + +function current_lambda_bindings(ctx::VariableAnalysisContext) + ctx.lambda_bindings +end + +function init_closure_bindings!(ctx, fname) + func_name_id = fname.var_id + @assert lookup_binding(ctx, func_name_id).kind === :local + get!(ctx.closure_bindings, func_name_id) do + name_stack = Vector{String}() + for parentname in ctx.method_def_stack + if kind(parentname) == K"BindingId" + push!(name_stack, lookup_binding(ctx, parentname).name) + end + end + push!(name_stack, lookup_binding(ctx, func_name_id).name) + ClosureBindings(name_stack) + end +end + +function find_any_local_binding(ctx, ex) + k = kind(ex) + if k == K"BindingId" + bkind = lookup_binding(ctx, ex.var_id).kind + if bkind != :global && bkind != :static_parameter + return ex + end + elseif !is_leaf(ex) && !is_quoted(ex) + for e in children(ex) + r = find_any_local_binding(ctx, e) + if !isnothing(r) + return r + end + end + end + return nothing +end + +# Update ctx.bindings and ctx.lambda_bindings metadata based on binding usage +function analyze_variables!(ctx, ex) + k = kind(ex) + if k == K"BindingId" + if has_lambda_binding(ctx, ex) + # TODO: Move this after closure conversion so that we don't need + # to model the closure conversion transformations here. + update_lambda_binding!(ctx, ex, is_read=true) + else + binfo = lookup_binding(ctx, ex.var_id) + if !binfo.is_ssa && binfo.kind != :global + # The type of typed locals is invisible in the previous pass, + # but is filled in here. + init_lambda_binding(ctx.lambda_bindings, ex.var_id, is_captured=true, is_read=true) + update_binding!(ctx, ex, is_captured=true) + end + end + elseif is_leaf(ex) || is_quoted(ex) + return + elseif k == K"static_eval" + badvar = find_any_local_binding(ctx, ex[1]) + if !isnothing(badvar) + name_hint = getmeta(ex, :name_hint, "syntax") + throw(LoweringError(badvar, "$(name_hint) cannot reference local variable")) + end + return + elseif k == K"local" || k == K"global" + # Presence of BindingId within local/global is ignored. + return + elseif k == K"=" + lhs = ex[1] + if kind(lhs) != K"Placeholder" + update_binding!(ctx, lhs, add_assigned=1) + if has_lambda_binding(ctx, lhs) + update_lambda_binding!(ctx, lhs, is_assigned=true) + end + lhs_binfo = lookup_binding(ctx, lhs) + if !isnothing(lhs_binfo.type) + # Assignments introduce a variable's type later during closure + # conversion, but we must model that explicitly here. + analyze_variables!(ctx, lhs_binfo.type) + end + end + analyze_variables!(ctx, ex[2]) + elseif k == K"function_decl" + name = ex[1] + if lookup_binding(ctx, name.var_id).kind === :local + init_closure_bindings!(ctx, name) + end + update_binding!(ctx, name, add_assigned=1) + if has_lambda_binding(ctx, name) + update_lambda_binding!(ctx, name, is_assigned=true) + end + elseif k == K"function_type" + if kind(ex[1]) != K"BindingId" || lookup_binding(ctx, ex[1]).kind !== :local + analyze_variables!(ctx, ex[1]) + end + elseif k == K"constdecl" + id = ex[1] + if kind(id) == K"BindingId" + if lookup_binding(ctx, id).kind == :local + throw(LoweringError(ex, "unsupported `const` declaration on local variable")) + end + update_binding!(ctx, id; is_const=true) + end + elseif k == K"call" + name = ex[1] + if kind(name) == K"BindingId" + id = name.var_id + if has_lambda_binding(ctx, id) + # TODO: Move this after closure conversion so that we don't need + # to model the closure conversion transformations. + update_lambda_binding!(ctx, id, is_called=true) + end + end + foreach(e->analyze_variables!(ctx, e), children(ex)) + elseif k == K"method_defs" + push!(ctx.method_def_stack, ex[1]) + analyze_variables!(ctx, ex[2]) + pop!(ctx.method_def_stack) + elseif k == K"_opaque_closure" + name = ex[1] + init_closure_bindings!(ctx, name) + push!(ctx.method_def_stack, name) + analyze_variables!(ctx, ex[2]) + analyze_variables!(ctx, ex[3]) + analyze_variables!(ctx, ex[4]) + analyze_variables!(ctx, ex[9]) + pop!(ctx.method_def_stack) + elseif k == K"lambda" + lambda_bindings = ex.lambda_bindings + if !ex.is_toplevel_thunk && !isempty(ctx.method_def_stack) + # Record all lambdas for the same closure type in one place + func_name = last(ctx.method_def_stack) + if kind(func_name) == K"BindingId" + func_name_id = func_name.var_id + if lookup_binding(ctx, func_name_id).kind === :local + push!(ctx.closure_bindings[func_name_id].lambdas, lambda_bindings) + end + end + end + ctx2 = VariableAnalysisContext(ctx.graph, ctx.bindings, ctx.mod, lambda_bindings, + ctx.method_def_stack, ctx.closure_bindings) + foreach(e->analyze_variables!(ctx2, e), ex[3:end]) # body & return type + for (id,lbinfo) in pairs(lambda_bindings.bindings) + if lbinfo.is_captured + # Add any captured bindings to the enclosing lambda, if necessary. + outer_lbinfo = lookup_lambda_binding(ctx.lambda_bindings, id) + if isnothing(outer_lbinfo) + # Inner lambda captures a variable. If it's not yet present + # in the outer lambda, the outer lambda must capture it as + # well so that the closure associated to the inner lambda + # can be initialized when `function_decl` is hit. + init_lambda_binding(ctx.lambda_bindings, id, is_captured=true, is_read=true) + end + end + end + else + foreach(e->analyze_variables!(ctx, e), children(ex)) + end + nothing +end + +function resolve_scopes(ctx::ScopeResolutionContext, ex) + if kind(ex) != K"lambda" + # Wrap in a top level thunk if we're not already expanding a lambda. + # (Maybe this should be done elsewhere?) + ex = @ast ctx ex [K"lambda"(is_toplevel_thunk=true, toplevel_pure=false) + [K"block"] + [K"block"] + ex + ] + end + _resolve_scopes(ctx, ex) +end + +""" +This pass analyzes scopes and the names (locals/globals etc) used within them. + +Names of kind `K"Identifier"` are transformed into binding identifiers of +kind `K"BindingId"`. The associated `Bindings` table in the context records +metadata about each binding. + +This pass also records the set of binding IDs used locally within the +enclosing lambda form and information about variables captured by closures. +""" +@fzone "JL: resolve_scopes" function resolve_scopes(ctx::DesugaringContext, ex) + ctx2 = ScopeResolutionContext(ctx) + ex2 = resolve_scopes(ctx2, reparent(ctx2, ex)) + ctx3 = VariableAnalysisContext(ctx2.graph, ctx2.bindings, ctx2.mod, ex2.lambda_bindings) + analyze_variables!(ctx3, ex2) + ctx3, ex2 +end diff --git a/JuliaLowering/src/syntax_graph.jl b/JuliaLowering/src/syntax_graph.jl new file mode 100644 index 0000000000000..c8145aa1b93c3 --- /dev/null +++ b/JuliaLowering/src/syntax_graph.jl @@ -0,0 +1,828 @@ +const NodeId = Int + +""" +Directed graph with arbitrary attributes on nodes. Used here for representing +one or several syntax trees. + +TODO: Global attributes! +""" +mutable struct SyntaxGraph{Attrs} + edge_ranges::Vector{UnitRange{Int}} + edges::Vector{NodeId} + attributes::Attrs +end + +SyntaxGraph() = SyntaxGraph{Dict{Symbol,Any}}(Vector{UnitRange{Int}}(), + Vector{NodeId}(), Dict{Symbol,Any}()) + +# "Freeze" attribute names and types, encoding them in the type of the returned +# SyntaxGraph. +function freeze_attrs(graph::SyntaxGraph) + frozen_attrs = (; pairs(graph.attributes)...) + SyntaxGraph(graph.edge_ranges, graph.edges, frozen_attrs) +end + +# Create a copy of `graph` where the attribute list is mutable +function unfreeze_attrs(graph::SyntaxGraph) + unfrozen_attrs = Dict{Symbol,Any}(pairs(graph.attributes)...) + SyntaxGraph(graph.edge_ranges, graph.edges, unfrozen_attrs) +end + +function _show_attrs(io, attributes::Dict) + show(io, MIME("text/plain"), attributes) +end +function _show_attrs(io, attributes::NamedTuple) + show(io, MIME("text/plain"), Dict(pairs(attributes)...)) +end + +function attrnames(graph::SyntaxGraph) + keys(graph.attributes) +end + +function attrdefs(graph::SyntaxGraph) + [(k=>typeof(v).parameters[2]) for (k, v) in pairs(graph.attributes)] +end + +function Base.show(io::IO, ::MIME"text/plain", graph::SyntaxGraph) + print(io, typeof(graph), + " with $(length(graph.edge_ranges)) vertices, $(length(graph.edges)) edges, and attributes:\n") + _show_attrs(io, graph.attributes) +end + +function ensure_attributes!(graph::SyntaxGraph; kws...) + for (k,v) in pairs(kws) + @assert k isa Symbol + @assert v isa Type + if haskey(graph.attributes, k) + v0 = valtype(graph.attributes[k]) + v == v0 || throw(ErrorException("Attribute type mismatch $v != $v0")) + elseif graph.attributes isa NamedTuple + throw(ErrorException(""" + ensure_attributes!: $k is not an existing attribute, and the graph's attributes are frozen. \ + Consider calling non-mutating `ensure_attributes` instead.""")) + else + graph.attributes[k] = Dict{NodeId,v}() + end + end + graph +end + +function ensure_attributes(graph::SyntaxGraph{<:Dict}; kws...) + g = unfreeze_attrs(graph) + ensure_attributes!(g; kws...) +end + +function ensure_attributes(graph::SyntaxGraph{<:NamedTuple}; kws...) + g = unfreeze_attrs(graph) + ensure_attributes!(g; kws...) + freeze_attrs(g) +end + +function delete_attributes!(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...) + for name in attr_names + delete!(graph.attributes, name) + end + graph +end + +function delete_attributes(graph::SyntaxGraph{<:Dict}, attr_names::Symbol...) + delete_attributes!(unfreeze_attrs(graph), attr_names...) +end + +function delete_attributes(graph::SyntaxGraph{<:NamedTuple}, attr_names::Symbol...) + g = delete_attributes!(unfreeze_attrs(graph), attr_names...) + freeze_attrs(g) +end + +function newnode!(graph::SyntaxGraph) + push!(graph.edge_ranges, 0:-1) # Invalid range start => leaf node + return length(graph.edge_ranges) +end + +function setchildren!(graph::SyntaxGraph, id, children::NodeId...) + setchildren!(graph, id, children) +end + +function setchildren!(graph::SyntaxGraph, id, children) + n = length(graph.edges) + graph.edge_ranges[id] = n+1:(n+length(children)) + # TODO: Reuse existing edges if possible + append!(graph.edges, children) +end + +function JuliaSyntax.is_leaf(graph::SyntaxGraph, id) + first(graph.edge_ranges[id]) == 0 +end + +function JuliaSyntax.numchildren(graph::SyntaxGraph, id) + length(graph.edge_ranges[id]) +end + +function JuliaSyntax.children(graph::SyntaxGraph, id) + @view graph.edges[graph.edge_ranges[id]] +end + +function JuliaSyntax.children(graph::SyntaxGraph, id, r::UnitRange) + @view graph.edges[graph.edge_ranges[id][r]] +end + +function child(graph::SyntaxGraph, id::NodeId, i::Integer) + graph.edges[graph.edge_ranges[id][i]] +end + +function getattr(graph::SyntaxGraph{<:Dict}, name::Symbol) + getfield(graph, :attributes)[name] +end + +function getattr(graph::SyntaxGraph{<:NamedTuple}, name::Symbol) + getfield(getfield(graph, :attributes), name) +end + +function getattr(graph::SyntaxGraph, name::Symbol, default) + get(getfield(graph, :attributes), name, default) +end + +function hasattr(graph::SyntaxGraph, name::Symbol) + getattr(graph, name, nothing) !== nothing +end + +# TODO: Probably terribly non-inferable? +function setattr!(graph::SyntaxGraph, id; attrs...) + for (k,v) in pairs(attrs) + if !isnothing(v) + getattr(graph, k)[id] = v + end + end +end + +function deleteattr!(graph::SyntaxGraph, id::NodeId, name::Symbol) + delete!(getattr(graph, name), id) +end + +function Base.getproperty(graph::SyntaxGraph, name::Symbol) + # TODO: Remove access to internals? + name === :edge_ranges && return getfield(graph, :edge_ranges) + name === :edges && return getfield(graph, :edges) + name === :attributes && return getfield(graph, :attributes) + return getattr(graph, name) +end + +function sethead!(graph, id::NodeId, h::JuliaSyntax.SyntaxHead) + sethead!(graph, id, kind(h)) + setflags!(graph, id, flags(h)) +end + +function sethead!(graph, id::NodeId, k::Kind) + graph.kind[id] = k +end + +function setflags!(graph, id::NodeId, f::UInt16) + graph.syntax_flags[id] = f +end + +function _convert_nodes(graph::SyntaxGraph, node::SyntaxNode) + id = newnode!(graph) + sethead!(graph, id, head(node)) + if !isnothing(node.val) + v = node.val + if v isa Symbol + # TODO: Fixes in JuliaSyntax to avoid ever converting to Symbol + setattr!(graph, id, name_val=string(v)) + else + setattr!(graph, id, value=v) + end + end + setattr!(graph, id, source=SourceRef(node.source, node.position, node.raw)) + if !is_leaf(node) + cs = map(children(node)) do n + _convert_nodes(graph, n) + end + setchildren!(graph, id, cs) + end + return id +end + +""" + syntax_graph(ctx) + +Return `SyntaxGraph` associated with `ctx` +""" +syntax_graph(graph::SyntaxGraph) = graph + +function check_same_graph(x, y) + if syntax_graph(x) !== syntax_graph(y) + error("Mismatching syntax graphs") + end +end + +function check_compatible_graph(x, y) + if !is_compatible_graph(x, y) + error("Incompatible syntax graphs") + end +end + +function is_compatible_graph(x, y) + syntax_graph(x).edges === syntax_graph(y).edges +end + +#------------------------------------------------------------------------------- +struct SyntaxTree{GraphType} + _graph::GraphType + _id::NodeId +end + +function Base.getproperty(ex::SyntaxTree, name::Symbol) + name === :_graph && return getfield(ex, :_graph) + name === :_id && return getfield(ex, :_id) + _id = getfield(ex, :_id) + return get(getproperty(getfield(ex, :_graph), name), _id) do + attrstr = join(["\n $n = $(getproperty(ex, n))" + for n in attrnames(ex)], ",") + error("Property `$name[$_id]` not found. Available attributes:$attrstr") + end +end + +function Base.setproperty!(ex::SyntaxTree, name::Symbol, val) + return setattr!(ex._graph, ex._id; name=>val) +end + +function Base.propertynames(ex::SyntaxTree) + attrnames(ex) +end + +function Base.get(ex::SyntaxTree, name::Symbol, default) + attr = getattr(getfield(ex, :_graph), name, nothing) + return isnothing(attr) ? default : + get(attr, getfield(ex, :_id), default) +end + +function Base.getindex(ex::SyntaxTree, i::Integer) + SyntaxTree(ex._graph, child(ex._graph, ex._id, i)) +end + +function Base.getindex(ex::SyntaxTree, r::UnitRange) + SyntaxList(ex._graph, children(ex._graph, ex._id, r)) +end + +Base.firstindex(ex::SyntaxTree) = 1 +Base.lastindex(ex::SyntaxTree) = numchildren(ex) + +function hasattr(ex::SyntaxTree, name::Symbol) + attr = getattr(ex._graph, name, nothing) + return !isnothing(attr) && haskey(attr, ex._id) +end + +function attrnames(ex::SyntaxTree) + attrs = ex._graph.attributes + [name for (name, value) in pairs(attrs) if haskey(value, ex._id)] +end + +function copy_node(ex::SyntaxTree) + graph = syntax_graph(ex) + id = newnode!(graph) + if !is_leaf(ex) + setchildren!(graph, id, _node_ids(graph, children(ex)...)) + end + ex2 = SyntaxTree(graph, id) + copy_attrs!(ex2, ex, true) + ex2 +end + +function setattr(ex::SyntaxTree; extra_attrs...) + ex2 = copy_node(ex) + setattr!(ex2; extra_attrs...) + ex2 +end + +function setattr!(ex::SyntaxTree; attrs...) + setattr!(ex._graph, ex._id; attrs...) +end + +function deleteattr!(ex::SyntaxTree, name::Symbol) + deleteattr!(ex._graph, ex._id, name) +end + +# JuliaSyntax tree API + +function JuliaSyntax.is_leaf(ex::SyntaxTree) + is_leaf(ex._graph, ex._id) +end + +function JuliaSyntax.numchildren(ex::SyntaxTree) + numchildren(ex._graph, ex._id) +end + +function JuliaSyntax.children(ex::SyntaxTree) + SyntaxList(ex._graph, children(ex._graph, ex._id)) +end + +function JuliaSyntax.head(ex::SyntaxTree) + JuliaSyntax.SyntaxHead(kind(ex), flags(ex)) +end + +function JuliaSyntax.kind(ex::SyntaxTree) + ex.kind::JuliaSyntax.Kind +end + +function JuliaSyntax.flags(ex::SyntaxTree) + get(ex, :syntax_flags, 0x0000) +end + + +# Reference to bytes within a source file +struct SourceRef + file::SourceFile + first_byte::Int + # TODO: Do we need the green node, or would last_byte suffice? + green_tree::JuliaSyntax.GreenNode +end + +JuliaSyntax.sourcefile(src::SourceRef) = src.file +JuliaSyntax.byte_range(src::SourceRef) = src.first_byte:(src.first_byte + span(src.green_tree) - 1) + +# TODO: Adding these methods to support LineNumberNode is kind of hacky but we +# can remove these after JuliaLowering becomes self-bootstrapping for macros +# and we a proper SourceRef for @ast's @HERE form. +JuliaSyntax.byte_range(src::LineNumberNode) = 0:0 +JuliaSyntax.source_location(src::LineNumberNode) = (src.line, 0) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode) = src +JuliaSyntax.source_line(src::LineNumberNode) = src.line +# The follow somewhat strange cases are for where LineNumberNode is standing in +# for SourceFile because we've only got Expr-based provenance info +JuliaSyntax.sourcefile(src::LineNumberNode) = src +JuliaSyntax.sourcetext(src::LineNumberNode) = SubString("") +JuliaSyntax.source_location(src::LineNumberNode, byte_index::Integer) = (src.line, 0) +JuliaSyntax.source_location(::Type{LineNumberNode}, src::LineNumberNode, byte_index::Integer) = src +JuliaSyntax.filename(src::LineNumberNode) = string(src.file) + +function JuliaSyntax.highlight(io::IO, src::LineNumberNode; note="") + print(io, src, " - ", note) +end + +function JuliaSyntax.highlight(io::IO, src::SourceRef; kws...) + highlight(io, src.file, first_byte(src):last_byte(src); kws...) +end + +function Base.show(io::IO, ::MIME"text/plain", src::SourceRef) + highlight(io, src; note="these are the bytes you're looking for 😊", context_lines_inner=20) +end + + +function provenance(ex::SyntaxTree) + s = ex.source + if s isa NodeId + return (SyntaxTree(ex._graph, s),) + elseif s isa Tuple + return SyntaxTree.((ex._graph,), s) + else + return (s,) + end +end + + +function _sourceref(sources, id) + i = 1 + while true + i += 1 + s = sources[id] + if s isa NodeId + id = s + else + return s, id + end + end +end + +function sourceref(ex::SyntaxTree) + sources = ex._graph.source + id::NodeId = ex._id + while true + s, _ = _sourceref(sources, id) + if s isa Tuple + s = s[1] + end + if s isa NodeId + id = s + else + return s + end + end +end + +function _flattened_provenance(refs, graph, sources, id) + # TODO: Implement in terms of `provenance()`? + s, id2 = _sourceref(sources, id) + if s isa Tuple + for i in s + _flattened_provenance(refs, graph, sources, i) + end + else + push!(refs, SyntaxTree(graph, id2)) + end +end + +function flattened_provenance(ex::SyntaxTree) + refs = SyntaxList(ex) + _flattened_provenance(refs, ex._graph, ex._graph.source, ex._id) + return reverse(refs) +end + + +function is_ancestor(ex, ancestor) + if !is_compatible_graph(ex, ancestor) + return false + end + sources = ex._graph.source + id::NodeId = ex._id + while true + s = get(sources, id, nothing) + if s isa NodeId + id = s + if id == ancestor._id + return true + end + else + return false + end + end +end + +const SourceAttrType = Union{SourceRef,LineNumberNode,NodeId,Tuple} + +function SyntaxTree(graph::SyntaxGraph, node::SyntaxNode) + ensure_attributes!(graph, kind=Kind, syntax_flags=UInt16, source=SourceAttrType, + value=Any, name_val=String) + id = _convert_nodes(graph, node) + return SyntaxTree(graph, id) +end + +function SyntaxTree(node::SyntaxNode) + return SyntaxTree(SyntaxGraph(), node) +end + +attrsummary(name, value) = string(name) +attrsummary(name, value::Number) = "$name=$value" + +function _value_string(ex) + k = kind(ex) + str = k in KSet"Identifier StrMacroName CmdMacroName" || is_operator(k) ? ex.name_val : + k == K"Placeholder" ? ex.name_val : + k == K"SSAValue" ? "%" : + k == K"BindingId" ? "#" : + k == K"label" ? "label" : + k == K"core" ? "core.$(ex.name_val)" : + k == K"top" ? "top.$(ex.name_val)" : + k == K"Symbol" ? ":$(ex.name_val)" : + k == K"globalref" ? "$(ex.mod).$(ex.name_val)" : + k == K"slot" ? "slot" : + k == K"latestworld" ? "latestworld" : + k == K"static_parameter" ? "static_parameter" : + k == K"symbolic_label" ? "label:$(ex.name_val)" : + k == K"symbolic_goto" ? "goto:$(ex.name_val)" : + k == K"SourceLocation" ? "SourceLocation:$(JuliaSyntax.filename(ex)):$(join(source_location(ex), ':'))" : + repr(get(ex, :value, nothing)) + id = get(ex, :var_id, nothing) + if isnothing(id) + id = get(ex, :id, nothing) + end + if !isnothing(id) + idstr = subscript_str(id) + str = "$(str)$idstr" + end + if k == K"slot" || k == K"BindingId" + p = provenance(ex)[1] + while p isa SyntaxTree + if kind(p) == K"Identifier" + str = "$(str)/$(p.name_val)" + break + end + p = provenance(p)[1] + end + end + return str +end + +function _show_syntax_tree(io, ex, indent, show_kinds) + val = get(ex, :value, nothing) + nodestr = !is_leaf(ex) ? "[$(untokenize(head(ex)))]" : _value_string(ex) + + treestr = rpad(string(indent, nodestr), 40) + if show_kinds && is_leaf(ex) + treestr = treestr*" :: "*string(kind(ex)) + end + + std_attrs = Set([:name_val,:value,:kind,:syntax_flags,:source,:var_id]) + attrstr = join([attrsummary(n, getproperty(ex, n)) + for n in attrnames(ex) if n ∉ std_attrs], ",") + treestr = string(rpad(treestr, 60), " │ $attrstr") + + println(io, treestr) + if !is_leaf(ex) + new_indent = indent*" " + for n in children(ex) + _show_syntax_tree(io, n, new_indent, show_kinds) + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", ex::SyntaxTree, show_kinds=true) + anames = join(string.(attrnames(syntax_graph(ex))), ",") + println(io, "SyntaxTree with attributes $anames") + _show_syntax_tree(io, ex, "", show_kinds) +end + +function _show_syntax_tree_sexpr(io, ex) + if is_leaf(ex) + if is_error(ex) + print(io, "(", untokenize(head(ex)), ")") + else + print(io, _value_string(ex)) + end + else + print(io, "(", untokenize(head(ex))) + first = true + for n in children(ex) + print(io, ' ') + _show_syntax_tree_sexpr(io, n) + first = false + end + print(io, ')') + end +end + +function Base.show(io::IO, ::MIME"text/x.sexpression", node::SyntaxTree) + _show_syntax_tree_sexpr(io, node) +end + +function Base.show(io::IO, node::SyntaxTree) + _show_syntax_tree_sexpr(io, node) +end + +function reparent(ctx, ex::SyntaxTree) + # Ensure `ex` has the same parent graph, in a somewhat loose sense. + # Could relax by copying if necessary? + # In that case, would we copy all the attributes? That would have slightly + # different semantics. + graph = syntax_graph(ctx) + @assert graph.edge_ranges === ex._graph.edge_ranges + SyntaxTree(graph, ex._id) +end + +function ensure_attributes(ex::SyntaxTree; kws...) + reparent(ensure_attributes(syntax_graph(ex); kws...), ex) +end + +syntax_graph(ex::SyntaxTree) = ex._graph + +function JuliaSyntax.build_tree(::Type{SyntaxTree}, stream::JuliaSyntax.ParseStream; kws...) + SyntaxTree(JuliaSyntax.build_tree(SyntaxNode, stream; kws...)) +end + +JuliaSyntax.sourcefile(ex::SyntaxTree) = sourcefile(sourceref(ex)) +JuliaSyntax.byte_range(ex::SyntaxTree) = byte_range(sourceref(ex)) + +function JuliaSyntax._expr_leaf_val(ex::SyntaxTree, _...) + name = get(ex, :name_val, nothing) + if !isnothing(name) + n = Symbol(name) + if kind(ex) === K"Symbol" + return QuoteNode(n) + elseif hasattr(ex, :scope_layer) + Expr(:scope_layer, n, ex.scope_layer) + else + n + end + else + val = get(ex, :value, nothing) + if kind(ex) == K"Value" && val isa Expr || val isa LineNumberNode + # Expr AST embedded in a SyntaxTree should be quoted rather than + # becoming part of the output AST. + QuoteNode(val) + else + val + end + end +end + +Base.Expr(ex::SyntaxTree) = JuliaSyntax.to_expr(ex) + +#-------------------------------------------------- +function _find_SyntaxTree_macro(ex, line) + @assert !is_leaf(ex) + for c in children(ex) + rng = byte_range(c) + firstline = JuliaSyntax.source_line(sourcefile(c), first(rng)) + lastline = JuliaSyntax.source_line(sourcefile(c), last(rng)) + if line < firstline || lastline < line + continue + end + # We're in the line range. Either + if firstline == line && kind(c) == K"macrocall" && begin + name = c[1] + if kind(name) == K"macro_name" + name = name[1] + end + if kind(name) == K"." + name = name[2] + if kind(name) == K"macro_name" + name = name[1] + end + end + @assert kind(name) == K"Identifier" + name.name_val == "SyntaxTree" + end + # We find the node we're looking for. NB: Currently assuming a max + # of one @SyntaxTree invocation per line. Though we could relax + # this with more heuristic matching of the Expr-AST... + @assert numchildren(c) == 2 + return c[2] + elseif !is_leaf(c) + # Recurse + ex1 = _find_SyntaxTree_macro(c, line) + if !isnothing(ex1) + return ex1 + end + end + end + return nothing # Will get here if multiple children are on the same line. +end + +# Translate JuliaLowering hygiene to esc() for use in @SyntaxTree +function _scope_layer_1_to_esc!(ex) + if ex isa Expr + if ex.head == :scope_layer + @assert ex.args[2] === 1 + return esc(_scope_layer_1_to_esc!(ex.args[1])) + else + map!(_scope_layer_1_to_esc!, ex.args, ex.args) + return ex + end + else + return ex + end +end + +""" +Macro to construct quoted SyntaxTree literals (instead of quoted Expr literals) +in normal Julia source code. + +Example: + +```julia +tree1 = @SyntaxTree :(some_unique_identifier) +tree2 = @SyntaxTree quote + x = 1 + \$tree1 = x +end +``` +""" +macro SyntaxTree(ex_old) + # The implementation here is hilarious and arguably very janky: we + # 1. Briefly check but throw away the Expr-AST + if !(Meta.isexpr(ex_old, :quote) || ex_old isa QuoteNode) + throw(ArgumentError("@SyntaxTree expects a `quote` block or `:`-quoted expression")) + end + # 2. Re-parse the current source file as SyntaxTree instead + fname = isnothing(__source__.file) ? error("No current file") : String(__source__.file) + if occursin(r"REPL\[\d+\]", fname) + # Assume we should look at last history entry in REPL + try + # Wow digging in like this is an awful hack but `@SyntaxTree` is + # already a hack so let's go for it I guess 😆 + text = Base.active_repl.mistate.interface.modes[1].hist.history[end] + if !occursin("@SyntaxTree", text) + error("Text not found in last REPL history line") + end + catch + error("Text not found in REPL history") + end + else + text = read(fname, String) + end + full_ex = parseall(SyntaxTree, text) + # 3. Using the current file and line number, dig into the re-parsed tree and + # discover the piece of AST which should be returned. + ex = _find_SyntaxTree_macro(full_ex, __source__.line) + isnothing(ex) && error("_find_SyntaxTree_macro failed") + # 4. Do the first step of JuliaLowering's syntax lowering to get + # syntax interpolations to work + _, ex1 = expand_forms_1(__module__, ex, false, Base.tls_world_age()) + @assert kind(ex1) == K"call" && ex1[1].value == interpolate_ast + Expr(:call, :interpolate_ast, SyntaxTree, ex1[3][1], + map(e->_scope_layer_1_to_esc!(Expr(e)), ex1[4:end])...) +end + +#------------------------------------------------------------------------------- +# Lightweight vector of nodes ids with associated pointer to graph stored separately. +mutable struct SyntaxList{GraphType, NodeIdVecType} <: AbstractVector{SyntaxTree} + graph::GraphType + ids::NodeIdVecType +end + +function SyntaxList(graph::SyntaxGraph, ids::AbstractVector{NodeId}) + SyntaxList{typeof(graph), typeof(ids)}(graph, ids) +end + +SyntaxList(graph::SyntaxGraph) = SyntaxList(graph, Vector{NodeId}()) +SyntaxList(ctx) = SyntaxList(syntax_graph(ctx)) + +syntax_graph(lst::SyntaxList) = lst.graph + +Base.size(v::SyntaxList) = size(v.ids) + +Base.IndexStyle(::Type{<:SyntaxList}) = IndexLinear() + +Base.getindex(v::SyntaxList, i::Int) = SyntaxTree(v.graph, v.ids[i]) + +function Base.getindex(v::SyntaxList, r::UnitRange) + SyntaxList(v.graph, view(v.ids, r)) +end + +function Base.setindex!(v::SyntaxList, ex::SyntaxTree, i::Int) + check_compatible_graph(v, ex) + v.ids[i] = ex._id +end + +function Base.setindex!(v::SyntaxList, id::NodeId, i::Int) + v.ids[i] = id +end + +function Base.push!(v::SyntaxList, ex::SyntaxTree) + check_compatible_graph(v, ex) + push!(v.ids, ex._id) +end + +function Base.pushfirst!(v::SyntaxList, ex::SyntaxTree) + check_compatible_graph(v, ex) + pushfirst!(v.ids, ex._id) +end + +function Base.similar(v::SyntaxList, size::Tuple=Base.size(v.ids)) + SyntaxList(v.graph, zeros(NodeId, size)) +end + +function Base.isassigned(v::SyntaxList, i::Integer) + v.ids[i] > 0 +end + +function Base.append!(v::SyntaxList, exs) + for e in exs + push!(v, e) + end + v +end + +function Base.append!(v::SyntaxList, exs::SyntaxList) + check_compatible_graph(v, exs) + append!(v.ids, exs.ids) + v +end + +function Base.push!(v::SyntaxList, id::NodeId) + push!(v.ids, id) +end + +function Base.pop!(v::SyntaxList) + SyntaxTree(v.graph, pop!(v.ids)) +end + +function Base.resize!(v::SyntaxList, n) + resize!(v.ids, n) + v +end + +function Base.empty!(v::SyntaxList) + empty!(v.ids) + v +end + +function Base.deleteat!(v::SyntaxList, inds) + deleteat!(v.ids, inds) + v +end + +function Base.copy(v::SyntaxList) + SyntaxList(v.graph, copy(v.ids)) +end + +function Base.filter(f, exs::SyntaxList) + out = SyntaxList(syntax_graph(exs)) + for ex in exs + if f(ex) + push!(out, ex) + end + end + out +end + +# Would like the following to be an overload of Base.map() ... but need +# somewhat arcane trickery to ensure that this only tries to collect into a +# SyntaxList when `f` yields a SyntaxTree. +# +# function mapsyntax(f, exs::SyntaxList) +# out = SyntaxList(syntax_graph(exs)) +# for ex in exs +# push!(out, f(ex)) +# end +# out +# end diff --git a/JuliaLowering/src/syntax_macros.jl b/JuliaLowering/src/syntax_macros.jl new file mode 100644 index 0000000000000..e7e5a1c850d7a --- /dev/null +++ b/JuliaLowering/src/syntax_macros.jl @@ -0,0 +1,371 @@ +# The following are versions of macros from Base which act as "standard syntax +# extensions": +# +# * They emit syntactic forms with special `Kind`s and semantics known to +# lowering +# * There is no other Julia surface syntax for these `Kind`s. + +# In order to implement these here without getting into bootstrapping problems, +# we just write them as plain old macro-named functions and add the required +# __context__ argument ourselves. +# +# TODO: @inline, @noinline, @inbounds, @simd, @ccall, @assume_effects +# +# TODO: Eventually move these to proper `macro` definitions and use +# `JuliaLowering.include()` or something. Then we'll be in the fun little world +# of bootstrapping but it shouldn't be too painful :) + +function _apply_nospecialize(ctx, ex) + k = kind(ex) + if k == K"Identifier" || k == K"Placeholder" || k == K"tuple" + setmeta(ex; nospecialize=true) + elseif k == K"..." || k == K"::" || k == K"=" + if k == K"::" && numchildren(ex) == 1 + ex = @ast ctx ex [K"::" "_"::K"Placeholder" ex[1]] + end + mapchildren(c->_apply_nospecialize(ctx, c), ctx, ex, 1:1) + else + throw(LoweringError(ex, "Invalid function argument")) + end +end + +function Base.var"@nospecialize"(__context__::MacroContext, ex, exs...) + # TODO support multi-arg version properly + _apply_nospecialize(__context__, ex) +end + +function Base.var"@atomic"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" || kind(ex) == K"::" (ex, "Expected identifier or declaration") + @ast __context__ __context__.macrocall [K"atomic" ex] +end + +function Base.var"@label"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_label" +end + +function Base.var"@goto"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ ex ex=>K"symbolic_goto" +end + +function Base.var"@locals"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"extension" "locals"::K"Symbol"] +end + +function Base.var"@isdefined"(__context__::MacroContext, ex) + @ast __context__ __context__.macrocall [K"isdefined" ex] +end + +function Base.var"@generated"(__context__::MacroContext) + @ast __context__ __context__.macrocall [K"generated"] +end +function Base.var"@generated"(__context__::MacroContext, ex) + if kind(ex) != K"function" + throw(LoweringError(ex, "Expected a function argument to `@generated`")) + end + @ast __context__ __context__.macrocall [K"function" + ex[1] + [K"if" [K"generated"] + ex[2] + [K"block" + [K"meta" "generated_only"::K"Symbol"] + [K"return"] + ] + ] + ] +end + +function Base.var"@cfunction"(__context__::MacroContext, callable, return_type, arg_types) + if kind(arg_types) != K"tuple" + throw(MacroExpansionError(arg_types, "@cfunction argument types must be a literal tuple")) + end + arg_types_svec = @ast __context__ arg_types [K"call" + "svec"::K"core" + children(arg_types)... + ] + if kind(callable) == K"$" + fptr = callable[1] + typ = Base.CFunction + else + # Kinda weird semantics here - without `$`, the callable is a top level + # expression which will be evaluated by `jl_resolve_globals_in_ir`, + # implicitly within the module where the `@cfunction` is expanded into. + fptr = @ast __context__ callable [K"static_eval"( + meta=name_hint("cfunction function name")) + callable + ] + typ = Ptr{Cvoid} + end + @ast __context__ __context__.macrocall [K"cfunction" + typ::K"Value" + fptr + [K"static_eval"(meta=name_hint("cfunction return type")) + return_type + ] + [K"static_eval"(meta=name_hint("cfunction argument type")) + arg_types_svec + ] + "ccall"::K"Symbol" + ] +end + +function ccall_macro_parse(ctx, ex, opts) + gc_safe=false + for opt in opts + if kind(opt) != K"=" || numchildren(opt) != 2 || + kind(opt[1]) != K"Identifier" + throw(MacroExpansionError(opt, "Bad option to ccall")) + else + optname = opt[1].name_val + if optname == "gc_safe" + if kind(opt[2]) == K"Bool" + gc_safe = opt[2].value::Bool + else + throw(MacroExpansionError(opt[2], "gc_safe must be true or false")) + end + else + throw(MacroExpansionError(opt[1], "Unknown option name for ccall")) + end + end + end + + if kind(ex) != K"::" + throw(MacroExpansionError(ex, "Expected a return type annotation `::SomeType`", position=:end)) + end + + rettype = ex[2] + call = ex[1] + if kind(call) != K"call" + throw(MacroExpansionError(call, "Expected function call syntax `f()`")) + end + + func = call[1] + varargs = numchildren(call) > 1 && kind(call[end]) == K"parameters" ? + children(call[end]) : nothing + + # collect args and types + args = SyntaxList(ctx) + types = SyntaxList(ctx) + function pusharg!(arg) + if kind(arg) != K"::" + throw(MacroExpansionError(arg, "argument needs a type annotation")) + end + push!(args, arg[1]) + push!(types, arg[2]) + end + + for e in call[2:(isnothing(varargs) ? end : end-1)] + kind(e) != K"parameters" || throw(MacroExpansionError(call[end], "Multiple parameter blocks not allowed")) + pusharg!(e) + end + + if !isnothing(varargs) + num_required_args = length(args) + if num_required_args == 0 + throw(MacroExpansionError(call[end], "C ABI prohibits varargs without one required argument")) + end + for e in varargs + pusharg!(e) + end + else + num_required_args = 0 # Non-vararg call + end + + return func, rettype, types, args, gc_safe, num_required_args +end + +function ccall_macro_lower(ctx, ex, convention, func, rettype, types, args, gc_safe, num_required_args) + statements = SyntaxTree[] + kf = kind(func) + if kf == K"Identifier" + lowered_func = @ast ctx func func=>K"Symbol" + elseif kf == K"." + lowered_func = @ast ctx func [K"tuple" + func[2]=>K"Symbol" + [K"static_eval"(meta=name_hint("@ccall library name")) + func[1] + ] + ] + elseif kf == K"$" + check = @SyntaxTree quote + func = $(func[1]) + if !isa(func, Ptr{Cvoid}) + name = :($(func[1])) + throw(ArgumentError("interpolated function `$name` was not a `Ptr{Cvoid}`, but $(typeof(func))")) + end + end + push!(statements, check) + lowered_func = check[1][1] + else + throw(MacroExpansionError(func, + "Function name must be a symbol like `foo`, a library and function name like `libc.printf` or an interpolated function pointer like `\$ptr`")) + end + + roots = SyntaxTree[] + cargs = SyntaxTree[] + for (i, (type, arg)) in enumerate(zip(types, args)) + argi = @ast ctx arg "arg$i"::K"Identifier" + # TODO: Does it help to emit ssavar() here for the `argi`? + push!(statements, @SyntaxTree :(local $argi = Base.cconvert($type, $arg))) + push!(roots, argi) + push!(cargs, @SyntaxTree :(Base.unsafe_convert($type, $argi))) + end + effect_flags = UInt16(0) + push!(statements, @ast ctx ex [K"foreigncall" + lowered_func + [K"static_eval"(meta=name_hint("@ccall return type")) + rettype + ] + [K"static_eval"(meta=name_hint("@ccall argument type")) + [K"call" + "svec"::K"core" + types... + ] + ] + num_required_args::K"Integer" + QuoteNode((convention, effect_flags, gc_safe))::K"Value" + cargs... + roots... + ]) + + @ast ctx ex [K"block" + statements... + ] +end + +function Base.var"@ccall"(ctx::MacroContext, ex, opts...) + ccall_macro_lower(ctx, ex, :ccall, ccall_macro_parse(ctx, ex, opts)...) +end + +function Base.GC.var"@preserve"(__context__::MacroContext, exs...) + idents = exs[1:end-1] + for e in idents + if kind(e) != K"Identifier" + throw(MacroExpansionError(e, "Preserved variable must be a symbol")) + end + end + @ast __context__ __context__.macrocall [K"block" + [K"=" + "s"::K"Identifier" + [K"gc_preserve_begin" + idents... + ] + ] + [K"=" + "r"::K"Identifier" + exs[end] + ] + [K"gc_preserve_end" "s"::K"Identifier"] + "r"::K"Identifier" + ] +end + +function Base.Experimental.var"@opaque"(__context__::MacroContext, ex) + @chk kind(ex) == K"->" + @ast __context__ __context__.macrocall [K"opaque_closure" + "nothing"::K"core" + "nothing"::K"core" + "nothing"::K"core" + true::K"Bool" + ex + ] +end + +function _at_eval_code(ctx, srcref, mod, ex) + @ast ctx srcref [K"block" + [K"local" + [K"=" + "eval_result"::K"Identifier" + [K"call" + # TODO: Call "eval"::K"core" here + JuliaLowering.eval::K"Value" + mod + [K"quote" ex] + ] + ] + ] + (::K"latestworld_if_toplevel") + "eval_result"::K"Identifier" + ] +end + +function Base.var"@eval"(__context__::MacroContext, ex) + mod = @ast __context__ __context__.macrocall __context__.scope_layer.mod::K"Value" + _at_eval_code(__context__, __context__.macrocall, mod, ex) +end + +function Base.var"@eval"(__context__::MacroContext, mod, ex) + _at_eval_code(__context__, __context__.macrocall, mod, ex) +end + +#-------------------------------------------------------------------------------- +# The following `@islocal` and `@inert` are macros for special syntax known to +# lowering which don't exist in Base but arguably should. +# +# For now we have our own versions +function var"@islocal"(__context__::MacroContext, ex) + @chk kind(ex) == K"Identifier" + @ast __context__ __context__.macrocall [K"extension" + "islocal"::K"Symbol" + ex + ] +end + +""" +A non-interpolating quoted expression. + +For example, + +```julia +@inert quote + \$x +end +``` + +does not take `x` from the surrounding scope - instead it leaves the +interpolation `\$x` intact as part of the expression tree. + +TODO: What is the correct way for `@inert` to work? ie which of the following +should work? + +```julia +@inert quote + body +end + +@inert begin + body +end + +@inert x + +@inert \$x +``` + +The especially tricky cases involve nested interpolation ... +```julia +quote + @inert \$x +end + +@inert quote + quote + \$x + end +end + +@inert quote + quote + \$\$x + end +end +``` + +etc. Needs careful thought - we should probably just copy what lisp does with +quote+quasiquote 😅 +""" +function var"@inert"(__context__::MacroContext, ex) + @chk kind(ex) == K"quote" + @ast __context__ __context__.macrocall [K"inert" ex] +end diff --git a/JuliaLowering/src/utils.jl b/JuliaLowering/src/utils.jl new file mode 100644 index 0000000000000..a3807ae24a09e --- /dev/null +++ b/JuliaLowering/src/utils.jl @@ -0,0 +1,185 @@ +# Error handling + +TODO(msg::AbstractString) = throw(ErrorException("Lowering TODO: $msg")) +TODO(ex::SyntaxTree, msg="") = throw(LoweringError(ex, "Lowering TODO: $msg")) + +# Errors found during lowering will result in LoweringError being thrown to +# indicate the syntax causing the error. +struct LoweringError <: Exception + ex::SyntaxTree + msg::String +end + +function Base.showerror(io::IO, exc::LoweringError; show_detail=true) + print(io, "LoweringError:\n") + src = sourceref(exc.ex) + highlight(io, src; note=exc.msg) + + if show_detail + print(io, "\n\nDetailed provenance:\n") + showprov(io, exc.ex, tree=true) + end +end + +#------------------------------------------------------------------------------- +function _show_provtree(io::IO, ex::SyntaxTree, indent) + print(io, ex, "\n") + prov = provenance(ex) + for (i, e) in enumerate(prov) + islast = i == length(prov) + printstyled(io, "$indent$(islast ? "└─ " : "├─ ")", color=:light_black) + inner_indent = indent * (islast ? " " : "│ ") + _show_provtree(io, e, inner_indent) + end +end + +function _show_provtree(io::IO, prov, indent) + fn = filename(prov) + line, _ = source_location(prov) + printstyled(io, "@ $fn:$line\n", color=:light_black) +end + +function showprov(io::IO, exs::AbstractVector; + note=nothing, include_location::Bool=true, highlight_kwargs...) + for (i,ex) in enumerate(Iterators.reverse(exs)) + sr = sourceref(ex) + if i > 1 + print(io, "\n\n") + end + k = kind(ex) + ex_note = !isnothing(note) ? note : + i > 1 && k == K"macrocall" ? "in macro expansion" : + i > 1 && k == K"$" ? "interpolated here" : + "in source" + highlight(io, sr; note=ex_note, highlight_kwargs...) + + if include_location + line, _ = source_location(sr) + locstr = "$(filename(sr)):$line" + JuliaSyntax._printstyled(io, "\n# @ $locstr", fgcolor=:light_black) + end + end +end + +function showprov(io::IO, ex::SyntaxTree; tree::Bool=false, showprov_kwargs...) + if tree + _show_provtree(io, ex, "") + else + showprov(io, flattened_provenance(ex); showprov_kwargs...) + end +end + +function showprov(x; kws...) + showprov(stdout, x; kws...) +end + +function subscript_str(i) + replace(string(i), + "0"=>"₀", "1"=>"₁", "2"=>"₂", "3"=>"₃", "4"=>"₄", + "5"=>"₅", "6"=>"₆", "7"=>"₇", "8"=>"₈", "9"=>"₉") +end + +function _deref_ssa(stmts, ex) + while kind(ex) == K"SSAValue" + ex = stmts[ex.var_id] + end + ex +end + +function _find_method_lambda(ex, name) + @assert kind(ex) == K"code_info" + # Heuristic search through outer thunk for the method in question. + method_found = false + stmts = children(ex[1]) + for e in stmts + if kind(e) == K"method" && numchildren(e) >= 2 + sig = _deref_ssa(stmts, e[2]) + @assert kind(sig) == K"call" + arg_types = _deref_ssa(stmts, sig[2]) + @assert kind(arg_types) == K"call" + self_type = _deref_ssa(stmts, arg_types[2]) + if kind(self_type) == K"globalref" && occursin(name, self_type.name_val) + return e[3] + end + end + end +end + +function print_ir(io::IO, ex, method_filter=nothing) + @assert kind(ex) == K"code_info" + if !isnothing(method_filter) + filtered = _find_method_lambda(ex, method_filter) + if isnothing(filtered) + @warn "Method not found with method filter $method_filter" + else + ex = filtered + end + end + _print_ir(io, ex, "") +end + +function _print_ir(io::IO, ex, indent) + added_indent = " " + @assert (kind(ex) == K"lambda" || kind(ex) == K"code_info") && kind(ex[1]) == K"block" + if !ex.is_toplevel_thunk && kind(ex) == K"code_info" + slots = ex.slots + print(io, indent, "slots: [") + for (i,slot) in enumerate(slots) + print(io, "slot$(subscript_str(i))/$(slot.name)") + flags = String[] + slot.is_nospecialize && push!(flags, "nospecialize") + !slot.is_read && push!(flags, "!read") + slot.is_single_assign && push!(flags, "single_assign") + slot.is_maybe_undef && push!(flags, "maybe_undef") + slot.is_called && push!(flags, "called") + if !isempty(flags) + print(io, "($(join(flags, ",")))") + end + if i < length(slots) + print(io, " ") + end + end + println(io, "]") + end + stmts = children(ex[1]) + for (i, e) in enumerate(stmts) + lno = rpad(i, 3) + if kind(e) == K"method" && numchildren(e) == 3 + print(io, indent, lno, " --- method ", string(e[1]), " ", string(e[2])) + if kind(e[3]) == K"lambda" || kind(e[3]) == K"code_info" + println(io) + _print_ir(io, e[3], indent*added_indent) + else + println(io, " ", string(e[3])) + end + elseif kind(e) == K"opaque_closure_method" + @assert numchildren(e) == 5 + print(io, indent, lno, " --- opaque_closure_method ") + for i=1:4 + print(io, " ", e[i]) + end + println(io) + _print_ir(io, e[5], indent*added_indent) + elseif kind(e) == K"code_info" + println(io, indent, lno, " --- ", e.is_toplevel_thunk ? "thunk" : "code_info") + _print_ir(io, e, indent*added_indent) + else + code = string(e) + println(io, indent, lno, " ", code) + end + end +end + +# Wrap a function body in Base.Compiler.@zone for profiling +if isdefined(Base.Compiler, Symbol("@zone")) + macro fzone(str, f) + @assert f isa Expr && f.head === :function && length(f.args) === 2 && str isa String + esc(Expr(:function, f.args[1], + # Use source of our caller, not of this macro. + Expr(:macrocall, :(Base.Compiler.var"@zone"), __source__, str, f.args[2]))) + end +else + macro fzone(str, f) + esc(f) + end +end diff --git a/JuliaLowering/test/arrays.jl b/JuliaLowering/test/arrays.jl new file mode 100644 index 0000000000000..bc3e43af0b089 --- /dev/null +++ b/JuliaLowering/test/arrays.jl @@ -0,0 +1,148 @@ +using Test, JuliaLowering + +@testset "Array syntax" begin + +test_mod = Module() + +# Test that two array element types are equal and that they are also equal +# elementwise +function ≅(a, b) + eltype(a) == eltype(b) && a == b +end + +# vect +@test JuliaLowering.include_string(test_mod, """ +[1,2,3] +""") ≅ [1,2,3] + +# hcat +@test JuliaLowering.include_string(test_mod, """ +[1 2 3] +""") ≅ [1 2 3] + +# typed_hcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 2.0 3.0] +""") ≅ [1 2 3] + +# splat with vect/hcat/typed_hcat +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + [0, xs...] +end +""") ≅ [0,1,2,3] +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + [0 xs...] +end +""") ≅ [0 1 2 3] +@test JuliaLowering.include_string(test_mod, """ +let xs = [1,2,3] + Int[0 xs...] +end +""") ≅ Int[0 1 2 3] + +# vcat +@test JuliaLowering.include_string(test_mod, """ +[1;2;3] +""") ≅ [1; 2; 3] + +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1,2) + [xs...; xs...] +end +""") ≅ [1,2,1,2] + +# hvcat +@test JuliaLowering.include_string(test_mod, """ +[1 2 3; 4 5 6] +""") ≅ [1 2 3; + 4 5 6] + +# hvcat_rows +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1,2) + [xs... 3; 4 xs...] +end +""") ≅ [1 2 3; + 4 1 2] + +# typed_vcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0; 2.0; 3.0] +""") ≅ [1; 2; 3] + +# typed_hvcat +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 2.0 3.0; 4.0 5.0 6.0] +""") ≅ [1 2 3; + 4 5 6] + +# typed_hvcat_rows +@test JuliaLowering.include_string(test_mod, """ +let + xs = (1.0,2.0) + Int[xs... 3; 4 xs...] +end +""") ≅ [1 2 3; + 4 1 2] + +# ncat with a single dimension +@test JuliaLowering.include_string(test_mod, """ +[1 ;;; 2 ;;; 3] +""") ≅ [1 ;;; 2 ;;; 3] + +@test JuliaLowering.include_string(test_mod, """ +Int[1.0 ;;; 2.0 ;;; 3.0] +""") ≅ [1 ;;; 2 ;;; 3] + +# Lowering of ref to setindex +@test JuliaLowering.include_string(test_mod, """ +let + as = [0,0,0,0] + as[begin] = 1 + as[2] = 2 + as[end] = 4 + as +end +""") == [1, 2, 0, 4] + +@test JuliaLowering.include_string(test_mod, """ +let + as = zeros(Int, 2,3) + as[begin, end] = 1 + as[end, begin] = 2 + js = (2,) + as[js..., end] = 3 + as +end +""") == [0 0 1; + 2 0 3] + +# getindex +@test JuliaLowering.include_string(test_mod, """ +let + x = [1 2; + 3 4] + (x[end,begin], x[begin,end]) +end +""") == (3, 2) + +# getindex with splats +@test JuliaLowering.include_string(test_mod, """ +let + x = [1 2; + 3 4 + ;;; + 5 6; + 7 8] + inds = (2,1) + ind1 = (1,) + (x[inds..., begin], x[inds..., end], x[1, inds...], + x[ind1..., ind1..., end]) +end +""") == (3, 7, 2, 5) + +end # @testset "Array syntax" begin diff --git a/JuliaLowering/test/arrays_ir.jl b/JuliaLowering/test/arrays_ir.jl new file mode 100644 index 0000000000000..4595603e4b79d --- /dev/null +++ b/JuliaLowering/test/arrays_ir.jl @@ -0,0 +1,498 @@ +######################################## +# vect syntax +[10, 20, 30] +#--------------------- +1 (call top.vect 10 20 30) +2 (return %₁) + +######################################## +# vect with splat +[x, xs...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate top.vect %₂ %₃) +5 (return %₄) + +######################################## +# vect with splats +[x, xs..., y, ys...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 TestMod.y +5 (call core.tuple %₄) +6 TestMod.ys +7 (call core._apply_iterate top.iterate top.vect %₂ %₃ %₅ %₆) +8 (return %₇) + +######################################## +# Error: vect syntax with parameters +[10, 20; 30] +#--------------------- +LoweringError: +[10, 20; 30] +# └──┘ ── unexpected semicolon in array expression + +######################################## +# Error: vect syntax with embedded assignments +[a=20, 30] +#--------------------- +LoweringError: +[a=20, 30] +#└──┘ ── misplaced assignment statement in `[ ... ]` + +######################################## +# hcat syntax +[10 20 30] +#--------------------- +1 (call top.hcat 10 20 30) +2 (return %₁) + +######################################## +# hcat with splat +[x xs...] +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate top.hcat %₂ %₃) +5 (return %₄) + +######################################## +# typed hcat syntax +T[10 20 30] +#--------------------- +1 TestMod.T +2 (call top.typed_hcat %₁ 10 20 30) +3 (return %₂) + +######################################## +# typed hcat syntax with splat +T[x xs...] +#--------------------- +1 TestMod.T +2 TestMod.x +3 (call core.tuple %₁ %₂) +4 TestMod.xs +5 (call core._apply_iterate top.iterate top.typed_hcat %₃ %₄) +6 (return %₅) + +######################################## +# Error: hcat syntax with embedded assignments +[10 20 a=40] +#--------------------- +LoweringError: +[10 20 a=40] +# └───┘ ── misplaced assignment statement in `[ ... ]` + +######################################## +# vcat syntax +[10; 20; 30] +#--------------------- +1 (call top.vcat 10 20 30) +2 (return %₁) + +######################################## +# vcat with splats +[a...; 20; 30] +#--------------------- +1 TestMod.a +2 (call core.tuple 20 30) +3 (call core._apply_iterate top.iterate top.vcat %₁ %₂) +4 (return %₃) + +######################################## +# hvcat syntax +[10; 20 30; 40 e f] +#--------------------- +1 (call core.tuple 1 2 3) +2 TestMod.e +3 TestMod.f +4 (call top.hvcat %₁ 10 20 30 40 %₂ %₃) +5 (return %₄) + +######################################## +# hvcat with splats nested within rows +[10; 20 a...] +#--------------------- +1 (call core.tuple 10) +2 (call core.tuple 20) +3 TestMod.a +4 (call core._apply_iterate top.iterate core.tuple %₂ %₃) +5 (call top.hvcat_rows %₁ %₄) +6 (return %₅) + +######################################## +# Error: vcat syntax with assignments +[a=20; 30] +#--------------------- +LoweringError: +[a=20; 30] +#└──┘ ── misplaced assignment statement in `[ ... ]` + +######################################## +# typed_vcat syntax +T[10; 20; 30] +#--------------------- +1 TestMod.T +2 (call top.typed_vcat %₁ 10 20 30) +3 (return %₂) + +######################################## +# typed_hvcat syntax +T[10; 20 30; 40 50 60] +#--------------------- +1 TestMod.T +2 (call core.tuple 1 2 3) +3 (call top.typed_hvcat %₁ %₂ 10 20 30 40 50 60) +4 (return %₃) + +######################################## +# typed_hvcat with splats nested within rows +T[10; 20 a...] +#--------------------- +1 TestMod.T +2 (call core.tuple 10) +3 (call core.tuple 20) +4 TestMod.a +5 (call core._apply_iterate top.iterate core.tuple %₃ %₄) +6 (call top.typed_hvcat_rows %₁ %₂ %₅) +7 (return %₆) + +######################################## +# ncat with a single dimension +[10 ;;; 20 ;;; 30] +#--------------------- +1 (call top.hvncat 3 10 20 30) +2 (return %₁) + +######################################## +# typed_ncat with a single dimension +T[10 ;;; 20 ;;; 30] +#--------------------- +1 TestMod.T +2 (call top.typed_hvncat %₁ 3 10 20 30) +3 (return %₂) + +######################################## +# ncat with balanced column major element layout +[10 ; 20 ; 30 ;;; 40 ; 50 ; 60] +#--------------------- +1 (call core.tuple 3 1 2) +2 (call top.hvncat %₁ false 10 20 30 40 50 60) +3 (return %₂) + +######################################## +# typed multidimensional ncat +T[10 ; 20 ; 30 ;;; 40 ; 50 ; 60] +#--------------------- +1 TestMod.T +2 (call core.tuple 3 1 2) +3 (call top.typed_hvncat %₁ %₂ false 10 20 30 40 50 60) +4 (return %₃) + +######################################## +# ncat with balanced row major element layout +[10 20 30 ; 40 50 60 ;;;] +#--------------------- +1 (call core.tuple 2 3 1) +2 (call top.hvncat %₁ true 10 20 30 40 50 60) +3 (return %₂) + +######################################## +# ncat of 3D array with balanced layout +[10 ; 20 ;; 30 ; 40 ;;; 50 ; 60 ;; 70 ; 80] +#--------------------- +1 (call core.tuple 2 2 2) +2 (call top.hvncat %₁ false 10 20 30 40 50 60 70 80) +3 (return %₂) + +######################################## +# ncat with unbalanced column major layout +[10 ; 20 ;; 30 ;;; 40 ;;;;] +#--------------------- +1 (call core.tuple 2 1 1) +2 (call core.tuple 3 1) +3 (call core.tuple 4) +4 (call core.tuple 4) +5 (call core.tuple %₁ %₂ %₃ %₄) +6 (call top.hvncat %₅ false 10 20 30 40) +7 (return %₆) + +######################################## +# ncat with unbalanced row major layout +[10 20 ; 30 40 ; 50 60 ;;; 70 ;;; 80 ;;;;] +#--------------------- +1 (call core.tuple 2 2 2 1 1) +2 (call core.tuple 6 1 1) +3 (call core.tuple 8) +4 (call core.tuple 8) +5 (call core.tuple %₁ %₂ %₃ %₄) +6 (call top.hvncat %₅ true 10 20 30 40 50 60 70 80) +7 (return %₆) + +######################################## +# Splatting with 1D ncat +[xs ;;; ys... ;;; zs] +#--------------------- +1 TestMod.xs +2 (call core.tuple 3 %₁) +3 TestMod.ys +4 TestMod.zs +5 (call core.tuple %₄) +6 (call core._apply_iterate top.iterate top.hvncat %₂ %₃ %₅) +7 (return %₆) + +######################################## +# Error: splatting with multi-dimensional ncat +[xs ; ys ;;; zs...] +#--------------------- +LoweringError: +[xs ; ys ;;; zs...] +# └───┘ ── Splatting ... in an `ncat` with multiple dimensions is not supported + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"nrow"(syntax_flags=set_numeric_flags(1)) + [K"nrow"(syntax_flags=set_numeric_flags(1)) + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - Badly nested rows in `ncat` + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"nrow"(syntax_flags=set_numeric_flags(2)) + [K"row" + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - 2D `nrow` cannot be mixed with `row` in `ncat` + +######################################## +# Error: bad nrow nesting +@ast_ [K"ncat"(syntax_flags=set_numeric_flags(3)) + [K"row" + [K"row" + 1::K"Integer" + ] + ] +] +#--------------------- +LoweringError: +#= line 1 =# - Badly nested rows in `ncat` + +######################################## +# Simple getindex +a[i] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# simple 1D getindex with begin +a[begin] +#--------------------- +1 TestMod.a +2 (call top.firstindex %₁) +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# simple 1D getindex with end +a[end] +#--------------------- +1 TestMod.a +2 (call top.lastindex %₁) +3 (call top.getindex %₁ %₂) +4 (return %₃) + +######################################## +# multidimensional getindex with begin +a[i, begin] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.firstindex %₁ 2) +4 (call top.getindex %₁ %₂ %₃) +5 (return %₄) + +######################################## +# multidimensional getindex with end +a[i, end] +#--------------------- +1 TestMod.a +2 TestMod.i +3 (call top.lastindex %₁ 2) +4 (call top.getindex %₁ %₂ %₃) +5 (return %₄) + +######################################## +# multidimensional getindex with begin/end and splats +a[is..., end, js..., begin] +#--------------------- +1 TestMod.a +2 TestMod.is +3 (call top.length %₂) +4 (call top.+ 1 %₃) +5 (call top.lastindex %₁ %₄) +6 TestMod.js +7 (call top.length %₂) +8 (call top.length %₆) +9 (call top.+ 2 %₇ %₈) +10 (call top.firstindex %₁ %₉) +11 (call core.tuple %₁) +12 (call core.tuple %₅) +13 (call core.tuple %₁₀) +14 (call core._apply_iterate top.iterate top.getindex %₁₁ %₂ %₁₂ %₆ %₁₃) +15 (return %₁₄) + +######################################## +# getindex with nontrivial array expression and begin/end +f()[end] +#--------------------- +1 TestMod.f +2 (call %₁) +3 (call top.lastindex %₂) +4 (call top.getindex %₂ %₃) +5 (return %₄) + +######################################## +# nested refs with getindex and begin/end +b[a[begin, end], begin, end] +#--------------------- +1 TestMod.b +2 TestMod.a +3 (call top.firstindex %₂ 1) +4 (call top.lastindex %₂ 2) +5 (call top.getindex %₂ %₃ %₄) +6 (call top.firstindex %₁ 2) +7 (call top.lastindex %₁ 3) +8 (call top.getindex %₁ %₅ %₆ %₇) +9 (return %₈) + +######################################## +# Error: parameters in array ref +a[i, j; w=1] +#--------------------- +LoweringError: +a[i, j; w=1] +# └───┘ ── unexpected semicolon in array expression + +######################################## +# simple setindex! +a[i] = x +#--------------------- +1 TestMod.x +2 TestMod.a +3 TestMod.i +4 (call top.setindex! %₂ %₁ %₃) +5 (return %₁) + +######################################## +# simple setindex! with begin +a[begin] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 (call top.firstindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) + +######################################## +# simple setindex! with end +a[end] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 (call top.lastindex %₁) +4 (call top.setindex! %₁ %₂ %₃) +5 (return %₂) + +######################################## +# multidimensional setindex! with begin +a[i, begin] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 TestMod.i +4 (call top.firstindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) + +######################################## +# multidimensional setindex! with end +a[i, end] = x +#--------------------- +1 TestMod.a +2 TestMod.x +3 TestMod.i +4 (call top.lastindex %₁ 2) +5 (call top.setindex! %₁ %₂ %₃ %₄) +6 (return %₂) + +######################################## +# multidimensional setindex! with begin/end and splats +a[is..., end, js..., begin] = x +#--------------------- +1 TestMod.a +2 TestMod.is +3 (call top.length %₂) +4 (call top.+ 1 %₃) +5 (call top.lastindex %₁ %₄) +6 TestMod.js +7 (call top.length %₂) +8 (call top.length %₆) +9 (call top.+ 2 %₇ %₈) +10 (call top.firstindex %₁ %₉) +11 TestMod.x +12 (call core.tuple %₁ %₁₁) +13 (call core.tuple %₅) +14 (call core.tuple %₁₀) +15 (call core._apply_iterate top.iterate top.setindex! %₁₂ %₂ %₁₃ %₆ %₁₄) +16 (return %₁₁) + +######################################## +# setindex! with nontrivial array expression and begin/end +f()[end] = x +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.x +4 (call top.lastindex %₂) +5 (call top.setindex! %₂ %₃ %₄) +6 (return %₃) + +######################################## +# nested refs +b[a[begin]] = x +#--------------------- +1 TestMod.b +2 TestMod.x +3 TestMod.a +4 (call top.firstindex %₃) +5 (call top.getindex %₃ %₄) +6 (call top.setindex! %₁ %₂ %₅) +7 (return %₂) + +######################################## +# empty ref and setindex! +a[] = rhs +#--------------------- +1 TestMod.rhs +2 TestMod.a +3 (call top.setindex! %₂ %₁) +4 (return %₁) diff --git a/JuliaLowering/test/assignments.jl b/JuliaLowering/test/assignments.jl new file mode 100644 index 0000000000000..d15706c2f8d70 --- /dev/null +++ b/JuliaLowering/test/assignments.jl @@ -0,0 +1,98 @@ +@testset "assignments" begin + +test_mod = Module() + +Base.include_string(test_mod, +""" +mutable struct X + a + b +end +""") + +# TODO: Desugaring of assignment done, but needs `where` lowering +JuliaLowering.include_string(test_mod, """ +MyVector{T} = Array{1,T} +""") +@test test_mod.MyVector{Int} == Array{1,Int} + +# Chained assignment +@test JuliaLowering.include_string(test_mod, """ +let + a = b = 42 +end +""") == 42 + +# Assignment in value but not tail position +@test JuliaLowering.include_string(test_mod, """ +let + x = begin + y = 42 + end + x +end +""") == 42 + +@test JuliaLowering.include_string(test_mod, """ +let + x = [] + a = b = (push!(x, 1); 42) + (a,b,x) +end +""") == (42,42,[1]) + +# setproperty! +@test JuliaLowering.include_string(test_mod, """ +let + x = X(1,2) + x.a = 10 + (x.a, x.b) +end +""") == (10,2) + +# Declarations +@test JuliaLowering.include_string(test_mod, """ +let + x::Int = 1 + x = 10.0 + x +end +""") === 10 + +# Updating assignments +@test JuliaLowering.include_string(test_mod, """ +let x = "hi" + x *= " ho" + x +end +""") == "hi ho" + +@test JuliaLowering.include_string(test_mod, """ +let x = [1,3] + x .-= [0,1] + x +end +""") == [1,2] + +@test JuliaLowering.include_string(test_mod, """ +let x = [1 2; 3 4] + x[begin, 1:end] .-= 1 + x +end +""") == [0 1 ; 3 4] + +# Test that side effects of computing indices in left hand side only occur +# once. +@test JuliaLowering.include_string(test_mod, """ +let + x = [1, 2] + n_calls = 0 + the_index() = (n_calls = n_calls + 1; 1) + x[the_index()] += 1 + x[the_index()]::Int += 1 + x[the_index():end] .+= 1 + n_calls +end +""") == 3 + +end diff --git a/JuliaLowering/test/assignments_ir.jl b/JuliaLowering/test/assignments_ir.jl new file mode 100644 index 0000000000000..2b002fbcef61e --- /dev/null +++ b/JuliaLowering/test/assignments_ir.jl @@ -0,0 +1,361 @@ +######################################## +# chain of assignments +let + a = b = c = 1 +end +#--------------------- +1 1 +2 (= slot₁/a %₁) +3 (= slot₂/b %₁) +4 (= slot₃/c %₁) +5 (return %₁) + +######################################## +# chain of assignments with nontrivial rhs +let + a = b = c = f() +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 (= slot₁/a %₂) +4 (= slot₂/b %₂) +5 (= slot₃/c %₂) +6 (return %₂) + +######################################## +# Assignment in value but not tail position +let + x = begin + y = 42 + end + x +end +#--------------------- +1 42 +2 (= slot₂/y %₁) +3 (= slot₁/x %₁) +4 slot₁/x +5 (return %₄) + +######################################## +# short form function def, not chain of assignments +begin + local a + a = b() = c = d +end +#--------------------- +1 (method TestMod.b) +2 latestworld +3 TestMod.b +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::3:9 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/c(!read)] + 1 TestMod.d + 2 (= slot₂/c %₁) + 3 (return %₁) +10 latestworld +11 TestMod.b +12 (= slot₁/a %₁₁) +13 (return %₁₁) + +######################################## +# a.b = ... => setproperty! assignment +let + a.b = c +end +#--------------------- +1 TestMod.a +2 TestMod.c +3 (call top.setproperty! %₁ :b %₂) +4 TestMod.c +5 (return %₄) + +######################################## +# a.b.c = f() => setproperty! assignment, complex case +let + a.b.c = f() +end +#--------------------- +1 TestMod.a +2 (call top.getproperty %₁ :b) +3 TestMod.f +4 (call %₃) +5 (call top.setproperty! %₂ :c %₄) +6 (return %₄) + +######################################## +# declarations of typed locals +let + x::T = f() + x +end +#--------------------- +1 (newvar slot₁/x) +2 TestMod.f +3 (call %₂) +4 TestMod.T +5 (= slot₂/tmp %₃) +6 slot₂/tmp +7 (call core.isa %₆ %₄) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₃) +10 slot₂/tmp +11 (call top.convert %₄ %₁₀) +12 (= slot₂/tmp (call core.typeassert %₁₁ %₄)) +13 slot₂/tmp +14 (= slot₁/x %₁₃) +15 slot₁/x +16 (return %₁₅) + +######################################## +# "complex lhs" of `::T` => type-assert, not decl +let + a.b::T = f() + x +end +#--------------------- +1 TestMod.a +2 (call top.getproperty %₁ :b) +3 TestMod.T +4 (call core.typeassert %₂ %₃) +5 TestMod.f +6 (call %₅) +7 TestMod.a +8 (call top.setproperty! %₇ :b %₆) +9 TestMod.x +10 (return %₉) + +######################################## +# UnionAll expansion at global scope results in const decl +X{T} = Y{T,T} +#--------------------- +1 (call core.TypeVar :T) +2 (= slot₁/T %₁) +3 slot₁/T +4 TestMod.Y +5 slot₁/T +6 slot₁/T +7 (call core.apply_type %₄ %₅ %₆) +8 (call core.UnionAll %₃ %₇) +9 (call core.declare_const TestMod :X %₈) +10 latestworld +11 (return %₈) + +######################################## +# UnionAll expansion in local scope +let + X{T} = Y{T,T} +end +#--------------------- +1 (call core.TypeVar :T) +2 (= slot₂/T %₁) +3 slot₂/T +4 TestMod.Y +5 slot₂/T +6 slot₂/T +7 (call core.apply_type %₄ %₅ %₆) +8 (call core.UnionAll %₃ %₇) +9 (= slot₁/X %₈) +10 (return %₈) + +######################################## +# Error: Invalid lhs in `=` +a.(b) = rhs +#--------------------- +LoweringError: +a.(b) = rhs +└───┘ ── invalid dot call syntax on left hand side of assignment + +######################################## +# Error: Invalid lhs in `=` +T[x y] = rhs +#--------------------- +LoweringError: +T[x y] = rhs +└────┘ ── invalid spacing in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +T[x; y] = rhs +#--------------------- +LoweringError: +T[x; y] = rhs +└─────┘ ── unexpected `;` in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +T[x ;;; y] = rhs +#--------------------- +LoweringError: +T[x ;;; y] = rhs +└────────┘ ── unexpected `;` in left side of indexed assignment + +######################################## +# Error: Invalid lhs in `=` +[x, y] = rhs +#--------------------- +LoweringError: +[x, y] = rhs +└────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x y] = rhs +#--------------------- +LoweringError: +[x y] = rhs +└───┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x; y] = rhs +#--------------------- +LoweringError: +[x; y] = rhs +└────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +[x ;;; y] = rhs +#--------------------- +LoweringError: +[x ;;; y] = rhs +└───────┘ ── use `(a, b) = ...` to assign multiple values + +######################################## +# Error: Invalid lhs in `=` +1 = rhs +#--------------------- +LoweringError: +1 = rhs +╙ ── invalid assignment location + +######################################## +# Basic updating assignment +begin + local x + x += y +end +#--------------------- +1 TestMod.+ +2 slot₁/x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (= slot₁/x %₄) +6 (return %₄) + +######################################## +# Broadcasted updating assignment +begin + local x + x .+= y +end +#--------------------- +1 (newvar slot₁/x) +2 slot₁/x +3 TestMod.+ +4 TestMod.y +5 (call top.broadcasted %₃ %₂ %₄) +6 (call top.materialize! %₂ %₅) +7 (return %₆) + +######################################## +# Broadcasted updating assignment with general left hand side permitted +f() .+= y +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.+ +4 TestMod.y +5 (call top.broadcasted %₃ %₂ %₄) +6 (call top.materialize! %₂ %₅) +7 (return %₆) + +######################################## +# Updating assignment with basic ref as left hand side +x[i] += y +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.i +4 (call top.getindex %₂ %₃) +5 TestMod.y +6 (call %₁ %₄ %₅) +7 TestMod.x +8 TestMod.i +9 (call top.setindex! %₇ %₆ %₈) +10 (return %₆) + +######################################## +# Updating assignment with complex ref as left hand side +g()[f(), end] += y +#--------------------- +1 TestMod.g +2 (call %₁) +3 TestMod.f +4 (call %₃) +5 (call top.lastindex %₂ 2) +6 TestMod.+ +7 (call top.getindex %₂ %₄ %₅) +8 TestMod.y +9 (call %₆ %₇ %₈) +10 (call top.setindex! %₂ %₉ %₄ %₅) +11 (return %₉) + +######################################## +# Updating assignment with type assert on left hand side +begin + local x + x::T += y +end +#--------------------- +1 TestMod.+ +2 slot₁/x +3 TestMod.T +4 (call core.typeassert %₂ %₃) +5 TestMod.y +6 (call %₁ %₄ %₅) +7 (= slot₁/x %₆) +8 (return %₆) + +######################################## +# Updating assignment with ref and type assert on left hand side +begin + local x + x[f()]::T += y +end +#--------------------- +1 (newvar slot₁/x) +2 TestMod.f +3 (call %₂) +4 TestMod.+ +5 slot₁/x +6 (call top.getindex %₅ %₃) +7 TestMod.T +8 (call core.typeassert %₆ %₇) +9 TestMod.y +10 (call %₄ %₈ %₉) +11 slot₁/x +12 (call top.setindex! %₁₁ %₁₀ %₃) +13 (return %₁₀) + +######################################## +# Error: Updating assignment with invalid left hand side +f() += y +#--------------------- +LoweringError: +f() += y +└─┘ ── invalid assignment location + +######################################## +# Error: Updating assignment with invalid tuple destructuring on left hand side +(if false end, b) += 2 +#--------------------- +LoweringError: +(if false end, b) += 2 +└───────────────┘ ── invalid multiple assignment location diff --git a/JuliaLowering/test/branching.jl b/JuliaLowering/test/branching.jl new file mode 100644 index 0000000000000..2b7eac29f348c --- /dev/null +++ b/JuliaLowering/test/branching.jl @@ -0,0 +1,317 @@ +# Branching + +@testset "branching" begin + +test_mod = Module() + +#------------------------------------------------------------------------------- +@testset "Tail position" begin + +@test JuliaLowering.include_string(test_mod, """ +let a = true + if a + 1 + end +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + if a + 1 + end +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + if a + 1 + else + 2 + end +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + if a + 1 + else + 2 + end +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + if a + 1 + elseif b + 2 + else + 3 + end +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + if a + 1 + elseif b + 2 + else + 3 + end +end +""") === 3 + +end + +#------------------------------------------------------------------------------- +@testset "Value required but not tail position" begin + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = if a + 1 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = if a + 1 + end + x +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = if a + 1 + else + 2 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = if a + 1 + else + 2 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + x = if a + 1 + elseif b + 2 + else + 3 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + x = if a + 1 + elseif b + 2 + else + 3 + end + x +end +""") === 3 + +end + +#------------------------------------------------------------------------------- +@testset "Side effects (not value or tail position)" begin + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = nothing + if a + x = 1 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = nothing + if a + x = 1 + end + x +end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +let a = true + x = nothing + if a + x = 1 + else + x = 2 + end + x +end +""") === 1 + +@test JuliaLowering.include_string(test_mod, """ +let a = false + x = nothing + if a + x = 1 + else + x = 2 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = true + x = nothing + if a + x = 1 + elseif b + x = 2 + else + x = 3 + end + x +end +""") === 2 + +@test JuliaLowering.include_string(test_mod, """ +let a = false, b = false + x = nothing + if a + x = 1 + elseif b + x = 2 + else + x = 3 + end + x +end +""") === 3 + +end +#------------------------------------------------------------------------------- +# Block condition +@test JuliaLowering.include_string(test_mod, """ +let a = true + if begin; x = 2; a; end + x + end +end +""") === 2 + +#------------------------------------------------------------------------------- +@testset "`&&` and `||` chains" begin + +@test JuliaLowering.include_string(test_mod, """ +true && "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +true && true && "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +false && "hi" +""") == false + +@test JuliaLowering.include_string(test_mod, """ +true && false && "hi" +""") == false + +@test JuliaLowering.include_string(test_mod, """ +begin + z = true && "hi" + z +end +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = false && "hi" + z +end +""") == false + + +@test JuliaLowering.include_string(test_mod, """ +true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +true || true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +false || "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +false || true || "hi" +""") == true + +@test JuliaLowering.include_string(test_mod, """ +false || false || "hi" +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = false || "hi" + z +end +""") == "hi" + +@test JuliaLowering.include_string(test_mod, """ +begin + z = true || "hi" + z +end +""") == true + +end + +@testset "symbolic goto/label" begin + +JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 1 + @label foo + push!(a, i) + i = i + 1 + if i <= 2 + @goto foo + end + a +end +""") == [1,2] + +end + +end diff --git a/JuliaLowering/test/branching_ir.jl b/JuliaLowering/test/branching_ir.jl new file mode 100644 index 0000000000000..f7a63f40291e6 --- /dev/null +++ b/JuliaLowering/test/branching_ir.jl @@ -0,0 +1,239 @@ +######################################## +# Basic branching tail && value +begin + local a, b + if a + b + end +end +#--------------------- +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 slot₁/a +4 (gotoifnot %₃ label₇) +5 slot₂/b +6 (return %₅) +7 (return core.nothing) + +######################################## +# Branching, !tail && !value +begin + local a, b, c + if a + b + end + c +end +#--------------------- +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 slot₁/a +5 (gotoifnot %₄ label₇) +6 slot₂/b +7 slot₃/c +8 (return %₇) + +######################################## +# Branching with else +begin + local a, b, c + if a + b + else + c + end +end +#--------------------- +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 slot₁/a +5 (gotoifnot %₄ label₈) +6 slot₂/b +7 (return %₆) +8 slot₃/c +9 (return %₈) + +######################################## +# Branching with else, !tail && !value +begin + local a, b, c, d + if a + b + else + c + end + d +end +#--------------------- +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 (newvar slot₄/d) +5 slot₁/a +6 (gotoifnot %₅ label₉) +7 slot₂/b +8 (goto label₁₀) +9 slot₃/c +10 slot₄/d +11 (return %₁₀) + +######################################## +# Blocks compile directly to branches +begin + local a, b, c, d + if (a; b && c) + d + end +end +#--------------------- +1 (newvar slot₁/a) +2 (newvar slot₂/b) +3 (newvar slot₃/c) +4 (newvar slot₄/d) +5 slot₁/a +6 slot₂/b +7 (gotoifnot %₆ label₁₂) +8 slot₃/c +9 (gotoifnot %₈ label₁₂) +10 slot₄/d +11 (return %₁₀) +12 (return core.nothing) + +######################################## +# symbolic goto forward jump +begin + a + @goto foo + b + @label foo +end +#--------------------- +1 TestMod.a +2 (goto label₄) +3 TestMod.b +4 (return core.nothing) + +######################################## +# symbolic goto backward jump +begin + a + @label foo + b + @goto foo +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 (goto label₂) + +######################################## +# Jumping out of try and catch blocks using @goto +begin + try + a + @goto lab + b + catch + c + @goto lab + d + end + @label lab +end +#--------------------- +1 (enter label₈) +2 TestMod.a +3 (leave %₁) +4 (goto label₁₃) +5 TestMod.b +6 (leave %₁) +7 (goto label₁₃) +8 TestMod.c +9 (pop_exception %₁) +10 (goto label₁₃) +11 TestMod.d +12 (pop_exception %₁) +13 (return core.nothing) + +######################################## +# Jumping out of nested try/catch and catch/try +begin + try + try + a + catch + b + @goto lab + c + end + catch + try + d + @goto lab + e + catch + end + end + @label lab +end +#--------------------- +1 (enter label₁₄) +2 (enter label₆) +3 TestMod.a +4 (leave %₂) +5 (goto label₁₂) +6 TestMod.b +7 (pop_exception %₂) +8 (leave %₁) +9 (goto label₂₄) +10 TestMod.c +11 (pop_exception %₂) +12 (leave %₁) +13 (goto label₂₄) +14 (enter label₂₂) +15 TestMod.d +16 (pop_exception %₁) +17 (leave %₁₄) +18 (goto label₂₄) +19 TestMod.e +20 (leave %₁₄) +21 (goto label₂₃) +22 (pop_exception %₁₄) +23 (pop_exception %₁) +24 (return core.nothing) + +######################################## +# Error: no symbolic label +begin + @goto foo +end +#--------------------- +LoweringError: +begin + @goto foo +# └─┘ ── label `foo` referenced but not defined +end + +######################################## +# Error: duplicate symbolic label +begin + @label foo + @label foo +end +#--------------------- +LoweringError: +begin + @label foo + @label foo +# └─┘ ── Label `foo` defined multiple times +end + +######################################## +# Error: using value of symbolic label +x = @label foo +#--------------------- +LoweringError: +x = @label foo +# └─┘ ── misplaced label in value position diff --git a/JuliaLowering/test/closures.jl b/JuliaLowering/test/closures.jl new file mode 100644 index 0000000000000..3999b1a2c486a --- /dev/null +++ b/JuliaLowering/test/closures.jl @@ -0,0 +1,244 @@ +@testset "Closures" begin + +test_mod = Module() + +# Capture assigned before closure +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + f(y) = x+y + f(2), f(3) +end +""") == (3,4) + +# Capture assigned after closure +@test JuliaLowering.include_string(test_mod, """ +let + f(y) = x+y + x = 1 + f(2) +end +""") == 3 + +# Capture assigned inside closure +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + function f(y) + x = y + end + f(100) + x +end +""") == 100 + +Base.eval(test_mod, :(call_it(f, args...) = f(args...))) + +# Closure where a local `x` is captured but not boxed +@test JuliaLowering.include_string(test_mod, """ +begin + function f_unboxed_test(x) + z = 0 + function g() + y = x # x will not be boxed + (y + 1, z) + end + z = 2 # will be boxed + (x, g()) + end + f_unboxed_test(10) +end +""") == (10,(11,2)) + +# Use of isdefined +@test JuliaLowering.include_string(test_mod, """ +begin + function f_isdefined(x) + local w + function g() + z = 3 + (@isdefined(x), # unboxed, always defined capture + @isdefined(y), # boxed capture + @isdefined(z), # normal local var + @isdefined(w)) # boxed undefined var + end + y = 2 + (@isdefined(y), @isdefined(w), g()) + end + f_isdefined(1) +end +""") == (true, false, (true, true, true, false)) + +# Mutually recursive closures (closure capturing a closure) +@test JuliaLowering.include_string(test_mod, """ +let + function recursive_a(n) + here = (:a, n) + n <= 0 ? here : (here, recursive_b(n-1)) + end + function recursive_b(n) + ((:b, n), recursive_a(n-1)) + end + recursive_a(2) +end +""") == ((:a, 2), ((:b, 1), (:a, 0))) + +# Global method capturing local variables +JuliaLowering.include_string(test_mod, """ +begin + local x = 1 + function f_global_method_capturing_local() + x = x + 1 + end +end +""") +@test test_mod.f_global_method_capturing_local() == 2 +@test test_mod.f_global_method_capturing_local() == 3 + +# Closure with multiple methods depending on local variables +f_closure_local_var_types = JuliaLowering.include_string(test_mod, """ +let T=Int, S=Float64 + function f_closure_local_var_types(::T) + 1 + end + function f_closure_local_var_types(::S) + 1.0 + end +end +""") +@test f_closure_local_var_types(2) == 1 +@test f_closure_local_var_types(2.0) == 1.0 +@test_throws MethodError f_closure_local_var_types("hi") + +# Multiply nested closures. In this case g_nest needs to capture `x` in order +# to construct an instance of `h_nest()` inside it. +@test JuliaLowering.include_string(test_mod, """ +begin + function f_nest(x) + function g_nest(y) + function h_nest(z) + (x,y,z) + end + end + end + + f_nest(1)(2)(3) +end +""") === (1,2,3) + +# Closure with return type must capture the return type +@test JuliaLowering.include_string(test_mod, """ +let T = Int + function f_captured_return_type()::T + 2.0 + end + f_captured_return_type() +end +""") === 2 + +# Capturing a typed local +@test JuliaLowering.include_string(test_mod, """ +let T = Int + x::T = 1.0 + function f_captured_typed_local() + x = 2.0 + end + f_captured_typed_local() + x +end +""") === 2 + +# Capturing a typed local where the type is a nontrivial expression +@test begin + res = JuliaLowering.include_string(test_mod, """ + let T = Int, V=Vector + x::V{T} = [1,2] + function f_captured_typed_local_composite() + x = [100.0, 200.0] + end + f_captured_typed_local_composite() + x + end + """) + res == [100, 200] && eltype(res) == Int +end + +# Evil case where we mutate `T` which is the type of `x`, such that x is +# eventually set to a Float64. +# +# Completely dynamic types for variables should be disallowed somehow?? For +# example, by emitting the expression computing the type of `x` alongside the +# newvar node. However, for now we verify that this potentially evil behavior +# is compatible with the existing implementation :) +@test JuliaLowering.include_string(test_mod, """ +let T = Int + x::T = 1.0 + function f_captured_mutating_typed_local() + x = 2 + end + T = Float64 + f_captured_mutating_typed_local() + x +end +""") === 2.0 + +# Anon function syntax +@test JuliaLowering.include_string(test_mod, """ +begin + local y = 2 + call_it(x->x+y, 3) +end +""") == 5 + +# Anon function syntax with `where` +@test JuliaLowering.include_string(test_mod, """ +begin + local y = 2 + call_it((x::T where {T<:Integer})->x+y, 3) +end +""") == 5 + +# Do block syntax +@test JuliaLowering.include_string(test_mod, """ +begin + local y = 2 + call_it(3) do x + x + y + end +end +""") == 5 + +# Attempt to reference capture which is not assigned +@test_throws UndefVarError(:x, :local) JuliaLowering.include_string(test_mod, """ +let + function f() + x + end + f() + x = 1 +end +""") + +# Opaque closure +@test JuliaLowering.include_string(test_mod, """ +let y = 1 + oc = Base.Experimental.@opaque x->2x + y + oc(3) +end +""") == 7 + +# Opaque closure with `...` +@test JuliaLowering.include_string(test_mod, """ +let + oc = Base.Experimental.@opaque (xs...)->xs + oc(3,4,5) +end +""") == (3,4,5) + +# opaque_closure_method internals +method_ex = lower_str(test_mod, "Base.Experimental.@opaque x -> 2x").args[1].code[3] +@test method_ex.head === :opaque_closure_method +@test method_ex.args[1] === nothing +@test method_ex.args[4] isa LineNumberNode + +end diff --git a/JuliaLowering/test/closures_ir.jl b/JuliaLowering/test/closures_ir.jl new file mode 100644 index 0000000000000..0916e3133c97b --- /dev/null +++ b/JuliaLowering/test/closures_ir.jl @@ -0,0 +1,765 @@ +######################################## +# Simple closure +# (FIXME: #self# should have `read` flag set) +let + x = 1 + function f(y) + x + y + end +end +#--------------------- +1 (= slot₂/x (call core.Box)) +2 1 +3 slot₂/x +4 (call core.setfield! %₃ :contents %₂) +5 (call core.svec :x) +6 (call core.svec true) +7 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₅ %₆) +8 latestworld +9 TestMod.#f##0 +10 slot₂/x +11 (new %₉ %₁₀) +12 (= slot₁/f %₁₁) +13 TestMod.#f##0 +14 (call core.svec %₁₃ core.Any) +15 (call core.svec) +16 SourceLocation::3:14 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/y slot₃/x(!read)] + 1 TestMod.+ + 2 (call core.getfield slot₁/#self# :x) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₃/x) + 7 slot₃/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ slot₂/y) + 10 (return %₉) +19 latestworld +20 slot₁/f +21 (return %₂₀) + +######################################## +# Closure declaration with no methods +begin + local no_method_f + function no_method_f + end +end +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#no_method_f##0 %₁ %₂) +4 latestworld +5 TestMod.#no_method_f##0 +6 (new %₅) +7 (= slot₁/no_method_f %₆) +8 slot₁/no_method_f +9 (return %₈) + +######################################## +# Closure which sets the value of a captured variable +let + x = 1 + function f(y) + x = 2 + end +end +#--------------------- +1 (= slot₂/x (call core.Box)) +2 1 +3 slot₂/x +4 (call core.setfield! %₃ :contents %₂) +5 (call core.svec :x) +6 (call core.svec true) +7 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₅ %₆) +8 latestworld +9 TestMod.#f##1 +10 slot₂/x +11 (new %₉ %₁₀) +12 (= slot₁/f %₁₁) +13 TestMod.#f##1 +14 (call core.svec %₁₃ core.Any) +15 (call core.svec) +16 SourceLocation::3:14 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/y(!read)] + 1 2 + 2 (call core.getfield slot₁/#self# :x) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +19 latestworld +20 slot₁/f +21 (return %₂₀) + +######################################## +# Function where arguments are captured into a closure and assigned +function f(x) + function g() + x = 10 + end + g() + x +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (call core.svec :x) +4 (call core.svec true) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##0 %₃ %₄) +6 latestworld +7 TestMod.#f#g##0 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read)] + 1 10 + 2 (call core.getfield slot₁/#self# :x) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g(called) slot₄/x(!read)] + 1 (= slot₂/x (call core.Box slot₂/x)) + 2 TestMod.#f#g##0 + 3 (new %₂ slot₂/x) + 4 (= slot₃/g %₃) + 5 slot₃/g + 6 (call %₅) + 7 slot₂/x + 8 (call core.isdefined %₇ :contents) + 9 (gotoifnot %₈ label₁₁) + 10 (goto label₁₃) + 11 (newvar slot₄/x) + 12 slot₄/x + 13 (call core.getfield %₇ :contents) + 14 (return %₁₃) +21 latestworld +22 TestMod.f +23 (return %₂₂) + +######################################## +# Closure where a local `x` is captured but not boxed +function f(x) + function g() + y = x + end + z = x +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (call core.svec :x) +4 (call core.svec false) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##1 %₃ %₄) +6 latestworld +7 TestMod.#f#g##1 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/y(!read)] + 1 (call core.getfield slot₁/#self# :x) + 2 (= slot₂/y %₁) + 3 (return %₁) +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/z(!read)] + 1 TestMod.#f#g##1 + 2 (call core.typeof slot₂/x) + 3 (call core.apply_type %₁ %₂) + 4 (new %₃ slot₂/x) + 5 (= slot₃/g %₄) + 6 slot₂/x + 7 (= slot₄/z %₆) + 8 (return %₆) +21 latestworld +22 TestMod.f +23 (return %₂₂) + +######################################## +# Closure where a static parameter of an outer function is captured +function f(::T) where T + function g() + use(T) + end +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (call core.svec :T) +4 (call core.svec true) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##2 %₃ %₄) +6 latestworld +7 TestMod.#f#g##2 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/T(!read)] + 1 TestMod.use + 2 (call core.getfield slot₁/#self# :T) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/T) + 7 slot₂/T + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈) + 10 (return %₉) +13 latestworld +14 (= slot₁/T (call core.TypeVar :T)) +15 TestMod.f +16 (call core.Typeof %₁₅) +17 slot₁/T +18 (call core.svec %₁₆ %₁₇) +19 slot₁/T +20 (call core.svec %₁₉) +21 SourceLocation::1:10 +22 (call core.svec %₁₈ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/g] + 1 TestMod.#f#g##2 + 2 static_parameter₁ + 3 (new %₁ %₂) + 4 (= slot₃/g %₃) + 5 slot₃/g + 6 (return %₅) +24 latestworld +25 TestMod.f +26 (return %₂₅) + +######################################## +# Closure captures with `isdefined` +function f(x) + function g() + z = 3 + (@isdefined(x), # unboxed, always defined capture + @isdefined(y), # boxed capture + @isdefined(z)) # normal local var + end + y = 2 + (@isdefined(y), # boxed local + @isdefined(x)) # always defined local (function arg) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (call core.svec :x :y) +4 (call core.svec false true) +5 (call JuliaLowering.eval_closure_type TestMod :#f#g##3 %₃ %₄) +6 latestworld +7 TestMod.#f#g##3 +8 (call core.svec %₇) +9 (call core.svec) +10 SourceLocation::2:14 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/z] + 1 (= slot₂/z 3) + 2 (call core.getfield slot₁/#self# :y) + 3 (call core.isdefined %₂ :contents) + 4 (isdefined slot₂/z) + 5 (call core.tuple true %₃ %₄) + 6 (return %₅) +13 latestworld +14 TestMod.f +15 (call core.Typeof %₁₄) +16 (call core.svec %₁₅ core.Any) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/x slot₃/g slot₄/y] + 1 (= slot₄/y (call core.Box)) + 2 TestMod.#f#g##3 + 3 (call core.typeof slot₂/x) + 4 (call core.apply_type %₂ %₃) + 5 slot₄/y + 6 (new %₄ slot₂/x %₅) + 7 (= slot₃/g %₆) + 8 2 + 9 slot₄/y + 10 (call core.setfield! %₉ :contents %₈) + 11 slot₄/y + 12 (call core.isdefined %₁₁ :contents) + 13 (call core.tuple %₁₂ true) + 14 (return %₁₃) +21 latestworld +22 TestMod.f +23 (return %₂₂) + +######################################## +# Nested captures - here `g` captures `x` because it is needed to initialize +# the closure `h` which captures both `x` and `y`. +# [method_filter: #f_nest#g_nest##0] +function f_nest(x) + function g_nest(y) + function h_nest(z) + (x,y,z) + end + end +end +#--------------------- +slots: [slot₁/#self#(!read) slot₂/y(!read) slot₃/h_nest] +1 TestMod.#f_nest#g_nest#h_nest##0 +2 (call core.getfield slot₁/#self# :x) +3 (call core.typeof %₂) +4 (call core.typeof slot₂/y) +5 (call core.apply_type %₁ %₃ %₄) +6 (call core.getfield slot₁/#self# :x) +7 (new %₅ %₆ slot₂/y) +8 (= slot₃/h_nest %₇) +9 slot₃/h_nest +10 (return %₉) + +######################################## +# Global method capturing local variables +begin + local x = 1 + function f() + x = x + 1 + end +end +#--------------------- +1 (= slot₁/x (call core.Box)) +2 1 +3 slot₁/x +4 (call core.setfield! %₃ :contents %₂) +5 (method TestMod.f) +6 latestworld +7 TestMod.f +8 (call core.Typeof %₇) +9 (call core.svec %₈) +10 (call core.svec) +11 SourceLocation::3:14 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.+ + 2 (captured_local 1) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/x) + 7 slot₂/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ 1) + 10 (captured_local 1) + 11 (call core.setfield! %₁₀ :contents %₉) + 12 (return %₉) +14 slot₁/x +15 (call core.svec %₁₄) +16 (call JuliaLowering.replace_captured_locals! %₁₃ %₁₅) +17 --- method core.nothing %₁₂ %₁₆ +18 latestworld +19 TestMod.f +20 (return %₁₉) + +######################################## +# Anonymous function syntax with -> +x -> x*x +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) +4 latestworld +5 TestMod.#->##0 +6 (new %₅) +7 TestMod.#->##0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 SourceLocation::1:1 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/x] + 1 TestMod.* + 2 (call %₁ slot₂/x slot₂/x) + 3 (return %₂) +13 latestworld +14 (return %₆) + +######################################## +# Anonymous function syntax with `function` +function (x) + x*x +end +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :##anon###0 %₁ %₂) +4 latestworld +5 TestMod.##anon###0 +6 (new %₅) +7 TestMod.##anon###0 +8 (call core.svec %₇ core.Any) +9 (call core.svec) +10 SourceLocation::1:10 +11 (call core.svec %₈ %₉ %₁₀) +12 --- method core.nothing %₁₁ + slots: [slot₁/#self#(!read) slot₂/x] + 1 TestMod.* + 2 (call %₁ slot₂/x slot₂/x) + 3 (return %₂) +13 latestworld +14 (return %₆) + +######################################## +# `do` blocks +f(x; a=1) do y + y + 2 +end +#--------------------- +1 TestMod.f +2 (call core.tuple :a) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1) +5 (call %₃ %₄) +6 (call core.svec) +7 (call core.svec) +8 (call JuliaLowering.eval_closure_type TestMod :#do##0 %₆ %₇) +9 latestworld +10 TestMod.#do##0 +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:13 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/y] + 1 TestMod.+ + 2 (call %₁ slot₂/y 2) + 3 (return %₂) +16 latestworld +17 TestMod.#do##0 +18 (new %₁₇) +19 TestMod.x +20 (call core.kwcall %₅ %₁ %₁₈ %₁₉) +21 (return %₂₀) + +######################################## +# Error: Static parameter clashing with closure name +function f(::g) where {g} + function g() + end +end +#--------------------- +LoweringError: +function f(::g) where {g} + function g() +# ╙ ── local variable name `g` conflicts with a static parameter + end +end + +######################################## +# Opaque closure +let y = 1 + Base.Experimental.@opaque (x, z::T)->2x + y - z +end +#--------------------- +1 1 +2 (= slot₁/y (call core.Box)) +3 slot₁/y +4 (call core.setfield! %₃ :contents %₁) +5 TestMod.T +6 (call core.apply_type core.Tuple core.Any %₅) +7 (call core.apply_type core.Union) +8 --- opaque_closure_method core.nothing 2 false SourceLocation::2:31 + slots: [slot₁/#self#(!read) slot₂/x slot₃/z slot₄/y(!read)] + 1 TestMod.- + 2 TestMod.+ + 3 TestMod.* + 4 (call %₃ 2 slot₂/x) + 5 (call core.getfield slot₁/#self# 1) + 6 (call core.isdefined %₅ :contents) + 7 (gotoifnot %₆ label₉) + 8 (goto label₁₁) + 9 (newvar slot₄/y) + 10 slot₄/y + 11 (call core.getfield %₅ :contents) + 12 (call %₂ %₄ %₁₁) + 13 (call %₁ %₁₂ slot₃/z) + 14 (return %₁₃) +9 slot₁/y +10 (new_opaque_closure %₆ %₇ core.Any true %₈ %₉) +11 (return %₁₀) + +######################################## +# Opaque closure with `...` +let + Base.Experimental.@opaque (x, ys...)->ys +end +#--------------------- +1 (call core.apply_type core.Vararg core.Any) +2 (call core.apply_type core.Tuple core.Any %₁) +3 (call core.apply_type core.Union) +4 --- opaque_closure_method core.nothing 2 true SourceLocation::2:31 + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] + 1 slot₃/ys + 2 (return %₁) +5 (new_opaque_closure %₂ %₃ core.Any true %₄) +6 (return %₅) + +######################################## +# Error: Opaque closure with default args +Base.Experimental.@opaque (x=1)->2x +#--------------------- +LoweringError: +Base.Experimental.@opaque (x=1)->2x +# ╙ ── Default positional arguments cannot be used in an opaque closure + +######################################## +# Mutually recursive closures +let + function recursive_a() + recursive_b() + end + function recursive_b() + recursive_a() + end +end +#--------------------- +1 (= slot₁/recursive_a (call core.Box)) +2 (= slot₂/recursive_b (call core.Box)) +3 (call core.svec :recursive_b) +4 (call core.svec true) +5 (call JuliaLowering.eval_closure_type TestMod :#recursive_a##0 %₃ %₄) +6 latestworld +7 TestMod.#recursive_a##0 +8 slot₂/recursive_b +9 (new %₇ %₈) +10 slot₁/recursive_a +11 (call core.setfield! %₁₀ :contents %₉) +12 TestMod.#recursive_a##0 +13 (call core.svec %₁₂) +14 (call core.svec) +15 SourceLocation::2:14 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(!read) slot₂/recursive_b(!read)] + 1 (call core.getfield slot₁/#self# :recursive_b) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/recursive_b) + 6 slot₂/recursive_b + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +18 latestworld +19 (call core.svec :recursive_a) +20 (call core.svec true) +21 (call JuliaLowering.eval_closure_type TestMod :#recursive_b##0 %₁₉ %₂₀) +22 latestworld +23 TestMod.#recursive_b##0 +24 slot₁/recursive_a +25 (new %₂₃ %₂₄) +26 slot₂/recursive_b +27 (call core.setfield! %₂₆ :contents %₂₅) +28 TestMod.#recursive_b##0 +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::5:14 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read) slot₂/recursive_a(!read)] + 1 (call core.getfield slot₁/#self# :recursive_a) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/recursive_a) + 6 slot₂/recursive_a + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +34 latestworld +35 slot₂/recursive_b +36 (call core.isdefined %₃₅ :contents) +37 (gotoifnot %₃₆ label₃₉) +38 (goto label₄₁) +39 (newvar slot₄/recursive_b) +40 slot₄/recursive_b +41 (call core.getfield %₃₅ :contents) +42 (return %₄₁) + +######################################## +# Closure with keywords +let y = y_init + function f_kw_closure(; x::X=x_default) + x + y + end +end +#--------------------- +1 TestMod.y_init +2 (= slot₁/y (call core.Box)) +3 (= slot₂/#f_kw_closure#0 (call core.Box)) +4 slot₁/y +5 (call core.setfield! %₄ :contents %₁) +6 (call core.svec :#f_kw_closure#0) +7 (call core.svec true) +8 (call JuliaLowering.eval_closure_type TestMod :#f_kw_closure##0 %₆ %₇) +9 latestworld +10 TestMod.#f_kw_closure##0 +11 slot₂/#f_kw_closure#0 +12 (new %₁₀ %₁₁) +13 (= slot₃/f_kw_closure %₁₂) +14 (call core.svec :y) +15 (call core.svec true) +16 (call JuliaLowering.eval_closure_type TestMod :##f_kw_closure#0##0 %₁₄ %₁₅) +17 latestworld +18 TestMod.##f_kw_closure#0##0 +19 slot₁/y +20 (new %₁₈ %₁₉) +21 slot₂/#f_kw_closure#0 +22 (call core.setfield! %₂₁ :contents %₂₀) +23 TestMod.##f_kw_closure#0##0 +24 TestMod.X +25 TestMod.#f_kw_closure##0 +26 (call core.svec %₂₃ %₂₄ %₂₅) +27 (call core.svec) +28 SourceLocation::2:14 +29 (call core.svec %₂₆ %₂₇ %₂₈) +30 --- method core.nothing %₂₉ + slots: [slot₁/#self#(!read) slot₂/x slot₃/#self#(!read) slot₄/y(!read)] + 1 (meta :nkw 1) + 2 TestMod.+ + 3 (call core.getfield slot₁/#self# :y) + 4 (call core.isdefined %₃ :contents) + 5 (gotoifnot %₄ label₇) + 6 (goto label₉) + 7 (newvar slot₄/y) + 8 slot₄/y + 9 (call core.getfield %₃ :contents) + 10 (call %₂ slot₂/x %₉) + 11 (return %₁₀) +31 latestworld +32 (call core.typeof core.kwcall) +33 TestMod.#f_kw_closure##0 +34 (call core.svec %₃₂ core.NamedTuple %₃₃) +35 (call core.svec) +36 SourceLocation::2:14 +37 (call core.svec %₃₄ %₃₅ %₃₆) +38 --- code_info + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/x(!read) slot₆/#f_kw_closure#0(!read)] + 1 (newvar slot₅/x) + 2 (call core.isdefined slot₂/kws :x) + 3 (gotoifnot %₂ label₁₄) + 4 (call core.getfield slot₂/kws :x) + 5 TestMod.X + 6 (call core.isa %₄ %₅) + 7 (gotoifnot %₆ label₉) + 8 (goto label₁₂) + 9 TestMod.X + 10 (new core.TypeError :keyword argument :x %₉ %₄) + 11 (call core.throw %₁₀) + 12 (= slot₄/kwtmp %₄) + 13 (goto label₁₆) + 14 TestMod.x_default + 15 (= slot₄/kwtmp %₁₄) + 16 slot₄/kwtmp + 17 (call top.keys slot₂/kws) + 18 (call core.tuple :x) + 19 (call top.diff_names %₁₇ %₁₈) + 20 (call top.isempty %₁₉) + 21 (gotoifnot %₂₀ label₂₃) + 22 (goto label₂₄) + 23 (call top.kwerr slot₂/kws slot₃/#self#) + 24 (captured_local 1) + 25 (call core.isdefined %₂₄ :contents) + 26 (gotoifnot %₂₅ label₂₈) + 27 (goto label₃₀) + 28 (newvar slot₆/#f_kw_closure#0) + 29 slot₆/#f_kw_closure#0 + 30 (call core.getfield %₂₄ :contents) + 31 (call %₃₀ %₁₆ slot₃/#self#) + 32 (return %₃₁) +39 slot₂/#f_kw_closure#0 +40 (call core.svec %₃₉) +41 (call JuliaLowering.replace_captured_locals! %₃₈ %₄₀) +42 --- method core.nothing %₃₇ %₄₁ +43 latestworld +44 TestMod.#f_kw_closure##0 +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::2:14 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- method core.nothing %₄₈ + slots: [slot₁/#self# slot₂/#f_kw_closure#0(!read)] + 1 (call core.getfield slot₁/#self# :#f_kw_closure#0) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/#f_kw_closure#0) + 6 slot₂/#f_kw_closure#0 + 7 (call core.getfield %₁ :contents) + 8 TestMod.x_default + 9 (call %₇ %₈ slot₁/#self#) + 10 (return %₉) +50 latestworld +51 slot₃/f_kw_closure +52 (return %₅₁) + +######################################## +# Closure capturing a typed local must also capture the type expression +# [method_filter: #f_captured_typed_local##0] +let T=Blah + x::T = 1.0 + function f_captured_typed_local() + x = 2.0 + end + f_captured_typed_local() + x +end +#--------------------- +slots: [slot₁/#self#(!read) slot₂/T(!read) slot₃/tmp(!read)] +1 2.0 +2 (call core.getfield slot₁/#self# :x) +3 (call core.getfield slot₁/#self# :T) +4 (call core.isdefined %₃ :contents) +5 (gotoifnot %₄ label₇) +6 (goto label₉) +7 (newvar slot₂/T) +8 slot₂/T +9 (call core.getfield %₃ :contents) +10 (= slot₃/tmp %₁) +11 slot₃/tmp +12 (call core.isa %₁₁ %₉) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₈) +15 slot₃/tmp +16 (call top.convert %₉ %₁₅) +17 (= slot₃/tmp (call core.typeassert %₁₆ %₉)) +18 slot₃/tmp +19 (call core.setfield! %₂ :contents %₁₈) +20 (return %₁) + +######################################## +# Error: Closure outside any top level context +# (Should only happen in a user-visible way when lowering code emitted +# from a `@generated` function code generator.) +@ast_ [K"lambda"(is_toplevel_thunk=false, toplevel_pure=false) + [K"block"] + [K"block"] + [K"->" [K"tuple"] [K"block"]] +] +#--------------------- +LoweringError: +#= line 1 =# - Top level code was found outside any top level context. `@generated` functions may not contain closures, including `do` syntax and generators/comprehension diff --git a/JuliaLowering/test/compat.jl b/JuliaLowering/test/compat.jl new file mode 100644 index 0000000000000..a7fce558e9f40 --- /dev/null +++ b/JuliaLowering/test/compat.jl @@ -0,0 +1,629 @@ +using Test +const JS = JuliaSyntax +const JL = JuliaLowering + +@testset "expr->syntaxtree" begin + @testset "semantics only" begin + # Test that `s` evaluates to the same thing both under normal parsing + # and with the expr->tree->expr transformation + + programs = [ + "let x = 2; x += 5; x -= 1; [1] .*= 1; end", + "let var\"x\" = 123; x; end", + "try; 1; catch e; e; else; 2; finally; 3; end", + "for x in 1:2, y in 3:4; x + y; end", + "[x+y for x in 1:2, y in 3:4]", + "Int[x+y for x in 1:2, y in 3:4 if true]", + "for x in 1; x+=1\n if true\n continue \n elseif false \n break\n end\n end", + "Base.Meta.@lower 1", + "function foo(x, y=1; z, what::Int=5); x + y + z + what; end; foo(1,2;z=3)", + "(()->1)()", + "((x)->2)(3)", + "((x,y)->4)(5,6)", + "filter([1,2,3]) do x; x > 1; end", + """ + struct X + f1::Int # hi + "foo" + f2::Int + f3::Int + X(y) = new(y,y,y) + end + """, + "global x,y", + "global (x,y)", + "999999999999999999999999999999999999999", + "0x00000000000000001", + "(0x00000000000000001)", + "let x = 1; 2x; end", + "let x = 1; (2)(3)x; end", + "if false\n1\nelseif true\n 3\nend", + "\"str\"", + "\"\$(\"str\")\"", + "'a'", + "'α'", + "'\\xce\\xb1'", + "let x = 1; \"\"\"\n a\n \$x\n b\n c\"\"\"; end", + "try throw(0) catch e; 1 end", + "try 0 finally 1 end", + "try throw(0) catch e; 1 finally 2 end", + "try throw(0) catch e; 1 else 2 end", + "try throw(0) catch e; 1 else 2 finally 3 end", + "try throw(0) finally 1 catch e; 2 end", + ":.+", + ":.=", + ":(.=)", + ":+=", + ":(+=)", + ":.+=", + ":(.+=)", + ] + + test_mod_1 = Module() + test_mod_2 = Module() + + for p in programs + @testset "`$p`" begin + local good_expr, good_out, test_st, test_expr, test_out + try + good_expr = JS.parseall(Expr, p; ignore_errors=true) + good_out = Core.eval(test_mod_1, good_expr) + catch e + @error "Couldn't eval the reference expression---fix your test" + rethrow(e) + end + + test_st = JuliaLowering.expr_to_syntaxtree(good_expr) + test_expr = Expr(test_st) + test_out = Core.eval(test_mod_2, test_expr) + + @test good_out == test_out + end + end + end + + # Remove any information that can't be recovered from an Expr + function normalize_st!(st) + k = JS.kind(st) + args = JS.children(st) + + if JS.is_infix_op_call(st) && (k === K"call" || k === K"dotcall") + # Infix calls are not preserved in Expr; we need to re-order the children + pre_st_args = JL.NodeId[st[2]._id, st[1]._id] + for c in st[3:end] + push!(pre_st_args, c._id) + end + pre_st_flags = (JS.flags(st) & ~JS.INFIX_FLAG) | JS.PREFIX_CALL_FLAG + JL.setchildren!(st._graph, st._id, pre_st_args) + JL.setflags!(st._graph, st._id, pre_st_flags) + elseif JS.is_postfix_op_call(st) && (k === K"call" || k === K"dotcall") + pre_st_args = JL.NodeId[st[end]._id] + for c in st[1:end-1] + push!(pre_st_args, c._id) + end + pre_st_flags = (JS.flags(st) & ~JS.POSTFIX_OP_FLAG) | JS.PREFIX_CALL_FLAG + JL.setchildren!(st._graph, st._id, pre_st_args) + JL.setflags!(st._graph, st._id, pre_st_flags) + elseif k in JS.KSet"tuple block macrocall" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.PARENS_FLAG) + elseif k === K"toplevel" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TOPLEVEL_SEMICOLONS_FLAG) + end + + if k in JS.KSet"tuple call dotcall macrocall vect curly braces <: >:" + JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.TRAILING_COMMA_FLAG) + end + + k === K"quote" && JL.setflags!(st._graph, st._id, JS.flags(st) & ~JS.COLON_QUOTE) + k === K"wrapper" && JL.sethead!(st._graph, st._id, K"block") + + # All ops are prefix ops in an expr. + # Ignore trivia (shows up on some K"error"s) + JL.setflags!(st._graph, st._id, JS.flags(st) & + ~JS.PREFIX_OP_FLAG & ~JS.INFIX_FLAG & ~JS.TRIVIA_FLAG & ~JS.NON_TERMINAL_FLAG) + + for c in JS.children(st) + normalize_st!(c) + end + return st + end + + function st_roughly_equal(; st_good, st_test) + normalize_st!(st_good) + + if kind(st_good) === kind(st_test) === K"error" + # We could consider some sort of equivalence later, but we would + # need to specify within JS what the error node contains. + return true + end + + out = kind(st_good) === kind(st_test) && + JS.flags(st_good) === JS.flags(st_test) && + JS.numchildren(st_good) === JS.numchildren(st_test) && + JS.is_leaf(st_good) === JS.is_leaf(st_test) && + get(st_good, :value, nothing) === get(st_test, :value, nothing) && + get(st_good, :name_val, nothing) === get(st_test, :name_val, nothing) && + all(map((cg, ct)->st_roughly_equal(;st_good=cg, st_test=ct), + JS.children(st_good), JS.children(st_test))) + + !out && @warn("!st_roughly_equal (normalized_reference, st_test):", + JS.sourcetext(st_good), st_good, st_test) + return out + end + + @testset "SyntaxTree equivalence (tests taken from JuliaSyntax expr.jl)" begin + # test that string->tree->expr->tree ~= string->tree + # ^^ + programs = [ + "begin a\nb\n\nc\nend", + "(a;b;c)", + "begin end", + "(;;)", + "a;b", + "module A\n\nbody\nend", + "function f()\na\n\nb\nend", + "f() = 1", + "macro f()\na\nend", + "function f end", + "macro f end", + "function (f() where {T}) end", + "function (f()::S) end", + "a -> b", + "(a,) -> b", + "(a where {T}) -> b", + "a -> (\nb;c)", + "a -> begin\nb\nc\nend", + "(a;b=1) -> c", + "(a...;b...) -> c", + "(;) -> c", + "a::T -> b", + "let i=is, j=js\nbody\nend", + "for x=xs\n\nend", + "for x=xs\ny\nend", + "while cond\n\nend", + "while cond\ny\nend", + "f() = xs", + "f() =\n(a;b)", + "f() =\nbegin\na\nb\nend", + "let f(x) =\ng(x)=1\nend", + "f() .= xs", + "for i=is body end", + "for i=is, j=js\nbody\nend", + "f(x) do y\n body end", + "@f(x) do y body end", + "f(x; a=1) do y body end", + "g(f(x) do y\n body end)", + "f(a=1)", + "f(; b=2)", + "f(a=1; b=2)", + "f(a; b; c)", + "+(a=1,)", + "(a=1)()", + "(x=1) != 2", + "+(a=1)", + "(a=1)'", + "f.(a=1; b=2)", + "(a=1,)", + "(a=1,; b=2)", + "(a=1,; b=2; c=3)", + "x[i=j]", + "(i=j)[x]", + "x[a, b; i=j]", + "(i=j){x}", + "x{a, b; i=j}", + "[a=1,; b=2]", + "{a=1,; b=2}", + "f(a .= 1)", + "f(((a = 1)))", + "(((a = 1)),)", + "(;((a = 1)),)", + "a.b", + "a.@b x", + "f.(x,y)", + "f.(x=1)", + "f.(a=1; b=2)", + "(a=1).()", + "x .+ y", + "(x=1) .+ y", + "a .< b .< c", + "a .< (.<) .< c", + "quote .+ end", + ".+(x)", + ".+x", + "f(.+)", + "(a, .+)", + "x += y", + "x .+= y", + "x \u2212= y", + "let x=1\n end", + "let x=1 ; end", + "let x ; end", + "let x::1 ; end", + "let x=1,y=2 end", + "let x+=1 ; end", + "let ; end", + "let ; body end", + "let\na\nb\nend", + "A where {T}", + "A where {S, T}", + "A where {X, Y; Z}", + "@m\n", + "\n@m", + "@m(x; a)", + "@m(a=1; b=2)", + "@S[a,b]", + "@S[a b]", + "@S[a; b]", + "@S[a ;; b]", + "[x,y ; z]", + "[a ;;; b ;;;; c]", + "[a b ; c d]", + "[a\nb]", + "[a b]", + "[a b ; c d]", + "T[a ;;; b ;;;; c]", + "T[a b ; c d]", + "T[a\nb]", + "T[a b]", + "T[a b ; c d]", + "(x for a in as for b in bs)", + "(x for a in as, b in bs)", + "(x for a in as, b in bs if z)", + "(x for a in as, b in bs for c in cs, d in ds)", + "(x for a in as for b in bs if z)", + "(x for a in as if z for b in bs)", + "[x for a = as for b = bs if cond1 for c = cs if cond2]" , + "[x for a = as if begin cond2 end]" , + "(x for a in as if z)", + "return x", + "struct A end", + "mutable struct A end", + "struct A <: B \n a::X \n end", + "struct A \n a \n b \n end", + "struct A const a end", + "export a", + "export +, ==", + "export \n a", + "global x", + "local x", + "global x,y", + "const x,y = 1,2", + "const x = 1", + "global x ~ 1", + "global x += 1", + "(;)", + "(; a=1)", + "(; a=1; b=2)", + "(a; b; c,d)", + "module A end", + "baremodule A end", + "import A", + "A.x", + "A.\$x", + "try x catch e; y end", + "try x finally y end", + "try x catch e; y finally z end", + "try x catch e; y else z end", + "try x catch e; y else z finally w end", + ] + + for p in programs + @testset "`$(repr(p))`" begin + st_good = JS.parsestmt(JL.SyntaxTree, p; ignore_errors=true) + st_test = JL.expr_to_syntaxtree(Expr(st_good)) + @test st_roughly_equal(;st_good, st_test) + end + end + + # toplevel has a special parsing mode where docstrings and a couple of + # other things are enabled + toplevel_programs = [ + "\"docstr\"\nthing_to_be_documented", + ] + for p in toplevel_programs + @testset "`$(repr(p))`" begin + st_good = JS.parseall(JL.SyntaxTree, p; ignore_errors=true) + st_test = JL.expr_to_syntaxtree(Expr(st_good)) + @test st_roughly_equal(;st_good, st_test) + end + end + end + + @testset "provenance via scavenging for LineNumberNodes" begin + # Provenenance of a node should be the last seen LineNumberNode in the + # depth-first traversal of the Expr, or the initial line given if none + # have been seen yet. If none have been seen and no initial line was + # given, .source should still be defined on all nodes (of unspecified + # value, but hopefully a helpful value for the user.) + ex = Expr(:block, + LineNumberNode(123), + Expr(:block, + Expr(:block, LineNumberNode(456)), + Expr(:block)), + Expr(:block, + Expr(:block), + Expr(:block))) + + # No initial line provided + st = JuliaLowering.expr_to_syntaxtree(ex) + for i in length(st._graph.edge_ranges) + @test !isnothing(get(SyntaxTree(st._graph, i), :source, nothing)) + end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end + + # Same tree, but provide an initial line + st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(789)) + @test let lnn = st.source; lnn isa LineNumberNode && lnn.line === 789; end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 123; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][1].source; lnn isa LineNumberNode && lnn.line === 456; end + @test let lnn = st[2][2].source; lnn isa LineNumberNode && lnn.line === 456; end + + ex = parsestmt(Expr, """ + begin + try + maybe + lots + of + lines + catch exc + y + end + end""") + st = JuliaLowering.expr_to_syntaxtree(ex, LineNumberNode(1)) + + # sanity: ensure we're testing the tree we expect + @test st ≈ @ast_ [K"block" + [K"try" + [K"block" + "maybe"::K"Identifier" + "lots"::K"Identifier" + "of"::K"Identifier" + "lines"::K"Identifier" + ] + [K"catch" + "exc"::K"Identifier" + [K"block" + "y"::K"Identifier" + ] + ] + ] + ] + + @test let lnn = st.source; lnn isa LineNumberNode && lnn.line === 1; end + @test let lnn = st[1].source; lnn isa LineNumberNode && lnn.line === 2; end + @test let lnn = st[1][1].source; lnn isa LineNumberNode && lnn.line === 2; end + @test let lnn = st[1][1][1].source; lnn isa LineNumberNode && lnn.line === 3; end + @test let lnn = st[1][1][2].source; lnn isa LineNumberNode && lnn.line === 4; end + @test let lnn = st[1][1][3].source; lnn isa LineNumberNode && lnn.line === 5; end + @test let lnn = st[1][1][4].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][1].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][2].source; lnn isa LineNumberNode && lnn.line === 6; end + @test let lnn = st[1][2][2][1].source; lnn isa LineNumberNode && lnn.line === 8; end + + st_shortfunc = JuliaLowering.expr_to_syntaxtree( + Expr(:block, + LineNumberNode(11), + Expr(:(=), + Expr(:call, :f), + :body)) + ) + @test st_shortfunc ≈ @ast_ [K"block" + [K"function" + [K"call" "f"::K"Identifier"] + "body"::K"Identifier" + ] + ] + @test let lnn = st_shortfunc[1][1].source; lnn isa LineNumberNode && lnn.line === 11; end + + st_shortfunc_2 = JuliaLowering.expr_to_syntaxtree( + Expr(:block, + LineNumberNode(11), + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(22), + :body))) + ) + @test st_shortfunc_2 ≈ @ast_ [K"block" + [K"function" + [K"call" "f"::K"Identifier"] + "body"::K"Identifier" + ] + ] + @test let lnn = st_shortfunc_2[1][1].source; lnn isa LineNumberNode && lnn.line === 22; end + end + + @testset "`Expr(:escape)` handling" begin + # `x.y` with quoted y escaped (this esc does nothing, but is permitted by + # the existing expander) + @test JuliaLowering.expr_to_syntaxtree(Expr(:(.), :x, esc(QuoteNode(:y)))) ≈ + @ast_ [K"." + "x"::K"Identifier" + [K"escape" + "y"::K"Identifier" + ] + ] + + # `f(x; y)` with parameters escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :f, esc(Expr(:parameters, :y)), :x)) ≈ + @ast_ [K"call" + "f"::K"Identifier" + "x"::K"Identifier" + [K"escape" + [K"parameters" + "y"::K"Identifier" + ] + ] + ] + + # `.+(x)` with operator escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, esc(Symbol(".+")), :x)) ≈ + @ast_ [K"dotcall" + [K"escape" "+"::K"Identifier"] + "x"::K"Identifier" + ] + + # `let x \n end` with binding escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:let, esc(:x), Expr(:block))) ≈ + @ast_ [K"let" + [K"block" [K"escape" "x"::K"Identifier"]] + [K"block"] + ] + + # `x .+ y` with .+ escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:comparison, :x, esc(Symbol(".+")), :y)) ≈ + @ast_ [K"comparison" + "x"::K"Identifier" + [K"." + [K"escape" "+"::K"Identifier"] + ] + "y"::K"Identifier" + ] + + # `@mac x` with macro name escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(Symbol("@mac")), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"escape" [K"macro_name" "mac"::K"Identifier"]] + "x"::K"Identifier" + ] + + # `@mac x` with macro name escaped + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, esc(Expr(:(.), :A, QuoteNode(Symbol("@mac")))), nothing, :x) + ) ≈ @ast_ [K"macrocall" + [K"escape" + [K"." + "A"::K"Identifier" + [K"macro_name" "mac"::K"Identifier"] + ] + ] + "x"::K"Identifier" + ] + + # `x where y` + @test JuliaLowering.expr_to_syntaxtree(Expr(:where, :x, esc(:y))) ≈ + @ast_ [K"where" + "x"::K"Identifier" + [K"braces" + [K"escape" "y"::K"Identifier"] + ] + ] + + # Some weirdly placed esc's in try-catch + # `try body1 catch exc \n end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, :exc, esc(false))) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"catch" + "exc"::K"Identifier" + "nothing"::K"core" + ] + ] + # `try body1 catch \n body2 \n end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"catch" + ""::K"Placeholder" + "body2"::K"Identifier" + ] + ] + # `try body1 finally body2 end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"finally" + "body2"::K"Identifier" + ] + ] + + # `try body1 finally body2 end` + @test JuliaLowering.expr_to_syntaxtree(Expr(:try, :body1, esc(false), esc(false), esc(false), :body2)) ≈ + @ast_ [K"try" + "body1"::K"Identifier" + [K"else" + "body2"::K"Identifier" + ] + ] + + # [x ;;; y] with dim escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:ncat, esc(3), :x, :y)) ≈ + @ast_ [K"ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3)) + "x"::K"Identifier" + "y"::K"Identifier" + ] + + # T[x ;;; y] with dim escaped + @test JuliaLowering.expr_to_syntaxtree(Expr(:typed_ncat, :T, esc(3), :x, :y)) ≈ + @ast_ [K"typed_ncat"(syntax_flags=JuliaSyntax.set_numeric_flags(3)) + "T"::K"Identifier" + "x"::K"Identifier" + "y"::K"Identifier" + ] + + # One example of hygienic-scope (handled with the same mechanism as escape) + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, Expr(:var"hygienic-scope", Symbol("@mac"), :other, :args), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"hygienic_scope" + [K"macro_name" "mac"::K"Identifier"] + "other"::K"Identifier" # (<- normally a Module) + "args"::K"Identifier" # (<- normally a LineNumberNode) + ] + "x"::K"Identifier" + ] + + # One example of double escaping + @test JuliaLowering.expr_to_syntaxtree(Expr(:macrocall, esc(esc(Symbol("@mac"))), nothing, :x)) ≈ + @ast_ [K"macrocall" + [K"escape" [K"escape" [K"macro_name" "mac"::K"Identifier"]]] + "x"::K"Identifier" + ] + + # One example of nested escape and hygienic-scope + @test JuliaLowering.expr_to_syntaxtree( + Expr(:macrocall, + Expr(:var"hygienic-scope", esc(Symbol("@mac")), :other, :args), + nothing, + :x)) ≈ + @ast_ [K"macrocall" + [K"hygienic_scope" + [K"escape" + [K"macro_name" "mac"::K"Identifier"] + ] + "other"::K"Identifier" # (<- normally a Module) + "args"::K"Identifier" # (<- normally a LineNumberNode) + ] + "x"::K"Identifier" + ] + + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, LineNumberNode(1))) ≈ + @ast_ [K"block"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, esc(LineNumberNode(1)))) ≈ + @ast_ [K"block"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:block, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"block" LineNumberNode(1)::K"Value"] + + # toplevel (and all other non-block forms) keep LineNumberNodes in value position + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, esc(LineNumberNode(1)))) ≈ + @ast_ [K"toplevel" [K"escape" "nothing"::K"core"]] + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, LineNumberNode(1))) ≈ + @ast_ [K"toplevel" "nothing"::K"core"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:toplevel, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"toplevel" LineNumberNode(1)::K"Value"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, LineNumberNode(1))) ≈ + @ast_ [K"call" "identity"::K"Identifier" "nothing"::K"core"] + @test JuliaLowering.expr_to_syntaxtree(Expr(:call, :identity, QuoteNode(LineNumberNode(1)))) ≈ + @ast_ [K"call" "identity"::K"Identifier" LineNumberNode(1)::K"Value"] + + end +end diff --git a/JuliaLowering/test/decls.jl b/JuliaLowering/test/decls.jl new file mode 100644 index 0000000000000..ab25aaa0b7015 --- /dev/null +++ b/JuliaLowering/test/decls.jl @@ -0,0 +1,121 @@ +@testset "Declarations" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +begin + local x::Int = 1.0 + x +end +""") === 1 + +# In value position, yield the right hand side, not `x` +@test JuliaLowering.include_string(test_mod, """ +begin + local x::Int = 1.0 +end +""") === 1.0 + +# Global decl in value position without assignment returns nothing +@test JuliaLowering.include_string(test_mod, "global x_no_assign") === nothing + +# Unadorned declarations +@test JuliaLowering.include_string(test_mod, """ +let + a = 0.0 + x::Int = a + x +end +""") === 0 + +@test JuliaLowering.include_string(test_mod, """ +let + local x::Int = 1 + x1 = x + x = 20.0 + x2 = x + (x1,x2) +end +""") === (1, 20) + +# Global const mixes +@test JuliaLowering.include_string(test_mod, "global x_g = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_g) +@test !Base.isconst(test_mod, :x_g) +@test test_mod.x_g === 1 + +@test JuliaLowering.include_string(test_mod, "const x_c = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_c) +@test Base.isconst(test_mod, :x_c) +@test test_mod.x_c === 1 + +@test JuliaLowering.include_string(test_mod, "global const x_gc = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_gc) +@test Base.isconst(test_mod, :x_gc) +@test test_mod.x_gc === 1 + +@test JuliaLowering.include_string(test_mod, "const global x_cg = 1") === 1 +@test Base.isdefinedglobal(test_mod, :x_cg) +@test Base.isconst(test_mod, :x_cg) +@test test_mod.x_cg === 1 +# Possibly worth testing excessive global/const keywords or invalid combinations +# (local + global/const) once we decide whether that's a parse error or a +# lowering error + +# Global decls with types +@test JuliaLowering.include_string(test_mod, """ +global a_typed_global::Int = 10.0 +""") === 10.0 +@test Core.get_binding_type(test_mod, :a_typed_global) === Int +@test test_mod.a_typed_global === 10 + +# Also allowed in nontrivial scopes in a top level thunk +@test JuliaLowering.include_string(test_mod, """ +let + global a_typed_global_2::Int = 10.0 +end +""") === 10.0 +@test Core.get_binding_type(test_mod, :a_typed_global_2) === Int +@test test_mod.a_typed_global_2 === 10 + +@test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 9") === 9 +@test Base.isdefinedglobal(test_mod, :x_c_T) +@test Base.isconst(test_mod, :x_c_T) + +@testset "typed const redeclaration" begin + # redeclaration of the same value used to be allowed + @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 9") + @test_throws ErrorException JuliaLowering.include_string(test_mod, "x_c_T = 10") + # redeclaration with const should be OK + @test JuliaLowering.include_string(test_mod, "const x_c_T::Int = 0") === 0 +end + +# Tuple/destructuring assignments +@test JuliaLowering.include_string(test_mod, "(a0, a1, a2) = [1,2,3]") == [1,2,3] + +@test JuliaLowering.include_string(test_mod, "const a,b,c = 1,2,3") === (1, 2, 3) + +test_mod_2 = Module() +@testset "toplevel-preserving syntax" begin + JuliaLowering.include_string(test_mod_2, "if true; global v1::Bool; else const v1 = 1; end") + @test !isdefined(test_mod_2, :v1) + @test Base.binding_kind(test_mod_2, :v1) == Base.PARTITION_KIND_GLOBAL + @test Core.get_binding_type(test_mod_2, :v1) == Bool + + JuliaLowering.include_string(test_mod_2, "if false; global v2::Bool; else const v2 = 2; end") + @test test_mod_2.v2 === 2 + @test Base.binding_kind(test_mod_2, :v2) == Base.PARTITION_KIND_CONST + + JuliaLowering.include_string(test_mod_2, "v3 = if true; global v4::Bool; 4 else const v4 = 5; 6; end") + @test test_mod_2.v3 == 4 + @test !isdefined(test_mod_2, :v4) + @test Base.binding_kind(test_mod_2, :v4) == Base.PARTITION_KIND_GLOBAL + @test Core.get_binding_type(test_mod_2, :v4) == Bool + + JuliaLowering.include_string(test_mod_2, "v5 = if false; global v6::Bool; 4 else const v6 = 5; 6; end") + @test test_mod_2.v5 === 6 + @test test_mod_2.v6 === 5 + @test Base.binding_kind(test_mod_2, :v6) == Base.PARTITION_KIND_CONST +end + +end diff --git a/JuliaLowering/test/decls_ir.jl b/JuliaLowering/test/decls_ir.jl new file mode 100644 index 0000000000000..1092b4d70d3f5 --- /dev/null +++ b/JuliaLowering/test/decls_ir.jl @@ -0,0 +1,299 @@ +######################################## +# Local declaration with type +begin + local x::T = 1 +end +#--------------------- +1 (newvar slot₁/x) +2 1 +3 TestMod.T +4 (= slot₂/tmp %₂) +5 slot₂/tmp +6 (call core.isa %₅ %₃) +7 (gotoifnot %₆ label₉) +8 (goto label₁₂) +9 slot₂/tmp +10 (call top.convert %₃ %₉) +11 (= slot₂/tmp (call core.typeassert %₁₀ %₃)) +12 slot₂/tmp +13 (= slot₁/x %₁₂) +14 (return %₂) + +######################################## +# Error: Local declarations outside a scope are disallowed +# See https://github.com/JuliaLang/julia/issues/57483 +local x +#--------------------- +LoweringError: +local x +└─────┘ ── local declarations have no effect outside a scope + +######################################## +# Local declaration allowed in tail position +begin + local x +end +#--------------------- +1 (newvar slot₁/x) +2 (return core.nothing) + +######################################## +# Local declaration allowed in value position +# TODO: This may be a bug in flisp lowering - should we reconsider this? +let + y = local x +end +#--------------------- +1 (newvar slot₁/x) +2 core.nothing +3 (= slot₂/y %₂) +4 (return %₂) + +######################################## +# Global declaration allowed in tail position +global x +#--------------------- +1 (call core.declare_global TestMod :x false) +2 latestworld +3 (return core.nothing) + +######################################## +# Global declaration allowed in tail position, nested +begin + global x +end +#--------------------- +1 (call core.declare_global TestMod :x false) +2 latestworld +3 (return core.nothing) + +######################################## +# Error: Global declaration not allowed in tail position in functions +function f() + global x +end +#--------------------- +LoweringError: +function f() + global x +# ╙ ── global declaration doesn't read the variable and can't return a value +end + +######################################## +# Error: Global declaration not allowed in value position +y = global x +#--------------------- +LoweringError: +y = global x +# ╙ ── global declaration doesn't read the variable and can't return a value + +######################################## +# const +const xx = 10 +#--------------------- +1 10 +2 (call core.declare_const TestMod :xx %₁) +3 latestworld +4 (return %₁) + +######################################## +# Typed const +const xx::T = 10 +#--------------------- +1 TestMod.T +2 (= slot₁/tmp 10) +3 slot₁/tmp +4 (call core.isa %₃ %₁) +5 (gotoifnot %₄ label₇) +6 (goto label₁₀) +7 slot₁/tmp +8 (call top.convert %₁ %₇) +9 (= slot₁/tmp (call core.typeassert %₈ %₁)) +10 slot₁/tmp +11 (call core.declare_const TestMod :xx %₁₀) +12 latestworld +13 (return %₁₀) + +######################################## +# Const tuple +const xxx,xxxx,xxxxx = 10,20,30 +#--------------------- +1 10 +2 (call core.declare_const TestMod :xxx %₁) +3 latestworld +4 20 +5 (call core.declare_const TestMod :xxxx %₄) +6 latestworld +7 30 +8 (call core.declare_const TestMod :xxxxx %₇) +9 latestworld +10 (call core.tuple 10 20 30) +11 (return %₁₀) + +######################################## +# Const in chain: only first is const +const c0 = v0 = v1 = 123 +#--------------------- +1 123 +2 (call core.declare_const TestMod :c0 %₁) +3 latestworld +4 (call core.declare_global TestMod :v0 true) +5 latestworld +6 (call core.get_binding_type TestMod :v0) +7 (= slot₁/tmp %₁) +8 slot₁/tmp +9 (call core.isa %₈ %₆) +10 (gotoifnot %₉ label₁₂) +11 (goto label₁₄) +12 slot₁/tmp +13 (= slot₁/tmp (call top.convert %₆ %₁₂)) +14 slot₁/tmp +15 (call core.setglobal! TestMod :v0 %₁₄) +16 (call core.declare_global TestMod :v1 true) +17 latestworld +18 (call core.get_binding_type TestMod :v1) +19 (= slot₂/tmp %₁) +20 slot₂/tmp +21 (call core.isa %₂₀ %₁₈) +22 (gotoifnot %₂₁ label₂₄) +23 (goto label₂₆) +24 slot₂/tmp +25 (= slot₂/tmp (call top.convert %₁₈ %₂₄)) +26 slot₂/tmp +27 (call core.setglobal! TestMod :v1 %₂₆) +28 (return %₁) + +######################################## +# Global assignment +xx = 10 +#--------------------- +1 (call core.declare_global TestMod :xx true) +2 latestworld +3 (call core.get_binding_type TestMod :xx) +4 (= slot₁/tmp 10) +5 slot₁/tmp +6 (call core.isa %₅ %₃) +7 (gotoifnot %₆ label₉) +8 (goto label₁₁) +9 slot₁/tmp +10 (= slot₁/tmp (call top.convert %₃ %₉)) +11 slot₁/tmp +12 (call core.setglobal! TestMod :xx %₁₁) +13 (return 10) + +######################################## +# Typed global assignment +global xx::T = 10 +#--------------------- +1 (call core.declare_global TestMod :xx false) +2 latestworld +3 TestMod.T +4 (call core.declare_global TestMod :xx true %₃) +5 latestworld +6 (call core.declare_global TestMod :xx true) +7 latestworld +8 (call core.get_binding_type TestMod :xx) +9 (= slot₁/tmp 10) +10 slot₁/tmp +11 (call core.isa %₁₀ %₈) +12 (gotoifnot %₁₁ label₁₄) +13 (goto label₁₆) +14 slot₁/tmp +15 (= slot₁/tmp (call top.convert %₈ %₁₄)) +16 slot₁/tmp +17 (call core.setglobal! TestMod :xx %₁₆) +18 (return 10) + +######################################## +# Error: x declared twice +begin + local x::T = 1 + local x::S = 1 +end +#--------------------- +LoweringError: +begin + local x::T = 1 + local x::S = 1 +# └───────┘ ── multiple type declarations found for `x` +end + +######################################## +# Error: Const not supported on locals +const local x = 1 +#--------------------- +LoweringError: +const local x = 1 +└───────────────┘ ── unsupported `const local` declaration + +######################################## +# Error: Const not supported on locals +let + const x = 1 +end +#--------------------- +LoweringError: +let + const x = 1 +# └────┘ ── unsupported `const` declaration on local variable +end + +######################################## +# Type decl on function argument +function f(x) + x::Int = 1 + x = 2.0 + x +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read) slot₄/tmp(!read)] + 1 1 + 2 TestMod.Int + 3 (= slot₃/tmp %₁) + 4 slot₃/tmp + 5 (call core.isa %₄ %₂) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₁) + 8 slot₃/tmp + 9 (call top.convert %₂ %₈) + 10 (= slot₃/tmp (call core.typeassert %₉ %₂)) + 11 slot₃/tmp + 12 (= slot₂/x %₁₁) + 13 2.0 + 14 TestMod.Int + 15 (= slot₄/tmp %₁₃) + 16 slot₄/tmp + 17 (call core.isa %₁₆ %₁₄) + 18 (gotoifnot %₁₇ label₂₀) + 19 (goto label₂₃) + 20 slot₄/tmp + 21 (call top.convert %₁₄ %₂₀) + 22 (= slot₄/tmp (call core.typeassert %₂₁ %₁₄)) + 23 slot₄/tmp + 24 (= slot₂/x %₂₃) + 25 slot₂/x + 26 (return %₂₅) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Error: global type decls only allowed at top level +function f() + global x::Int = 1 +end +#--------------------- +LoweringError: +function f() + global x::Int = 1 +# └─────────┘ ── type declarations for global variables must be at top level, not inside a function +end diff --git a/JuliaLowering/test/demo.jl b/JuliaLowering/test/demo.jl new file mode 100644 index 0000000000000..0b2fe25a82cc2 --- /dev/null +++ b/JuliaLowering/test/demo.jl @@ -0,0 +1,910 @@ +# Just some hacking + +using JuliaSyntax +using JuliaLowering + +using JuliaLowering: SyntaxGraph, SyntaxTree, ensure_attributes!, ensure_attributes, newnode!, setchildren!, is_leaf, @ast, numchildren, children, child, setattr!, sourceref, makenode, sourcetext, showprov, lookup_binding + +using JuliaSyntaxFormatter + +# Extract variable kind for highlighting purposes +function var_kind(ctx, ex) + id = get(ex, :var_id, nothing) + if isnothing(id) + return nothing + end + binfo = lookup_binding(ctx, id) + return binfo.kind == :local ? + (binfo.is_captured ? :local_captured : :local) : + binfo.kind +end + +# Extract module of globals for highlighting +function var_mod(ctx, ex) + id = get(ex, :var_id, nothing) + if isnothing(id) + return nothing + end + return lookup_binding(ctx, id).mod +end + +function formatsrc(ex; kws...) + Text(JuliaSyntaxFormatter.formatsrc(ex; kws...)) +end + +function debug_lower(mod::Module, ex::SyntaxTree; expr_compat_mode::Bool=false, verbose::Bool=false, do_eval::Bool=false) + ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, expr_compat_mode, Base.get_world_counter()) + + verbose && @info "Macro expanded" formatsrc(ex_macroexpand, color_by=:scope_layer) + + ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) + verbose && @info "Desugared" formatsrc(ex_desugar, color_by=:scope_layer) + + ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) + verbose && @info "Resolved scopes" formatsrc(ex_scoped, color_by=e->var_kind(ctx2,e)) + + ctx4, ex_converted = JuliaLowering.convert_closures(ctx3, ex_scoped) + verbose && @info "Closure converted" formatsrc(ex_converted, color_by=:var_id) + + ctx5, ex_compiled = JuliaLowering.linearize_ir(ctx4, ex_converted) + verbose && @info "Linear IR" formatsrc(ex_compiled, color_by=:var_id) Text(sprint(JuliaLowering.print_ir, ex_compiled)) + + ex_expr = JuliaLowering.to_lowered_expr(ex_compiled) + verbose && @info "CodeInfo" ex_expr + + if do_eval + eval_result = Base.eval(mod, ex_expr) + verbose && @info "Eval" eval_result + else + eval_result = nothing + end + + (ctx1, ex_macroexpand, ctx2, ex_desugar, ctx3, ex_scoped, ctx4, ex_converted, ctx5, ex_compiled, ex_expr, eval_result) +end + + +# Currently broken - need to push info back onto src +# function annotate_scopes(mod, ex) +# ex = ensure_attributes(ex, var_id=Int) +# ctx1, ex_macroexpand = JuliaLowering.expand_forms_1(mod, ex, false) +# ctx2, ex_desugar = JuliaLowering.expand_forms_2(ctx1, ex_macroexpand) +# ctx3, ex_scoped = JuliaLowering.resolve_scopes(ctx2, ex_desugar) +# ex +# end + +#------------------------------------------------------------------------------- +# Module containing macros used in the demo. +define_macros = false +if !define_macros + eval(:(module M end)) +else +eval(JuliaLowering.@SyntaxTree :(baremodule M + using Base + + using JuliaLowering: JuliaLowering, @ast, @chk, adopt_scope, MacroExpansionError, makenode + using JuliaSyntax + using JuliaLowering: @inert, @label, @goto, @islocal + using Base: @locals + + macro K_str(str) + JuliaSyntax.Kind(str) + end + + # Introspection + macro __MODULE__() + __context__.scope_layer.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macrocall) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macrocall)[1] + end + + # Macro with local variables + module A + another_global = "global in A" + + macro bar(ex) + quote + x = "`x` in @bar" + (x, another_global, $ex) + end + end + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + quote + x = "`x` from @foo" + (x, someglobal, A.@bar $ex) + #(x, someglobal, $ex, A.@bar($ex), A.@bar(x)) + end + end + + macro call_show(x) + quote + z = "z in @call_show" + @show z $x + end + end + + macro call_info(x) + quote + z = "z in @call_info" + @info "hi" z $x + end + end + + macro call_oldstyle_macro(y) + quote + x = "x in call_oldstyle_macro" + @oldstyle $y x + end + end + + macro newstyle(x, y, z) + quote + x = "x in @newstyle" + ($x, $y, $z, x) + end + end + + macro set_a_global(val) + quote + global a_global = $val + end + end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + $e1 = $ex + end + end + + macro baz(ex) + quote + let $ex = 10 + $ex + end + end + end + + macro make_module() + :(module X + blah = 10 + end) + end + + macro return_a_value() + 42 + end + + macro nested_return_a_value() + :( + @return_a_value + ) + end + + macro inner() + :(2) + end + + macro outer() + :((1, @inner)) + end + + macro K_str(str) + JuliaSyntax.Kind(str[1].value) + end + + # Recursive macro call + macro recursive(N) + Nval = if kind(N) == K"Integer" || kind(N) == K"Value" + N.value + end + if !(Nval isa Integer) + throw(MacroExpansionError(N, "argument must be an integer")) + end + if Nval < 1 + return N + end + quote + x = $N + (@recursive($(Nval-1)), x) + end + end + + xx = "xx in M" + + macro test_inert_quote() + println(xx) + @inert quote + ($xx, xx) + end + end + + macro mmm(ex) + :(let + local x + function f() + (x, $ex) + end + f() + end) + end + +end)) +end + +Base.eval(M, :( +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +)) + +# +#------------------------------------------------------------------------------- +# Demos of the prototype + +# src = """ +# let +# local x, (y = 2), (w::T = ww), q::S +# end +# """ + +# src = """ +# function foo(x::f(T), y::w(let ; S end)) +# "a \$("b \$("c")")" +# end +# """ + +src = """ +begin + function f(x) + nothing + end + + f(1) +end +""" + +# src = """ +# x + y +# """ + +# src = """ +# module A +# function f(x)::Int +# x + 1 +# end +# +# b = f(2) +# end +# """ + +# src = """ +# function f() +# end +# """ +# +# src = """ +# # import A.B: C.c as d, E.e as f +# # import JuliaLowering +# using JuliaLowering +# """ +# +# src = """ +# module A +# z = 1 + 1 +# end +# """ + +src = raw""" +begin + x = 10 + y = :(g(z)) + quote + f($(x+1), $y) + end +end +""" + +function wrapscope(ex, scope_type) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end + +function softscope_test(ex) + g = ensure_attributes(ex._graph, scope_type=Symbol) + wrapscope(wrapscope(JuliaLowering.reparent(g, ex), :neutral), :soft) +end + +# src = """ +# M.@test_inert_quote() +# """ + +# src = """ +# macro mmm(a; b=2) +# end +# macro A.b(ex) +# end +# """ + +# src = """ +# M.@set_global_in_parent "bent hygiene!" +# """ + +# src = """ +# begin +# M.@__LINE__ +# end +# """ + +# src = """@foo z""" + +src = """ +M.@recursive 3 +""" + +# src = """ +# M.@set_global_in_parent "bent hygiene!" +# """ + +# src = """ +# begin +# x = 10 +# y = 20 +# let x = y + x +# z = "some string \$x \$y" +# +# function f(y) +# a = M.@foo z +# "\$z \$y \$a \$x" +# end +# print(x) +# end +# print(x) +# end +# """ + +# src = """ +# begin +# x = -1 +# M.@baz x +# end +# """ + +# src = """ +# _ = -1 +# """ + +# src = """ +# M.@make_module +# """ + +# src = """ +# M.@nested_return_a_value +# """ + +# src = """ +# function f(y) +# x = 42 + y +# M.@foo error(x) +# end +# """ + +src = """ +let + y = 0 + x = 1 + let x = x + 1 + y = x + end + (x, y) +end +""" + +#src = """M.@outer""" + +src = """ +begin + local a, b, c + if a + b + else + c + end +end +""" + +src = """ +begin + local i = 0 + while i < 10 + i = i + 1 + if isodd(i) + continue + end + println(i) + end +end +""" + +src = """ +for i in [3,1,2] + println("i = ", i, ", j = ", j) +end +""" + +# src = """ +# @ccall f()::T +# """ +# +# src = """ +# begin +# a = 1 +# xs = [:(a),] +# x = :(:(\$(\$(xs...)))) +# end +# """ + +# src = """ +# try +# a +# catch exc +# b +# end +# """ + +src = """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + i = 100 + end + a +end +""" + +src = """ +begin + function f(x) + y = x + 1 + "hi", x, y + end + + f(1) +end +""" + +src = """ +let + x = try + error("hi") + 1 + catch exc + current_exceptions() + else + 3 + end + x +end +""" + +src = """ +function f(y) + x = + try + try + error("hi") + 1 + catch exc + if y + return 2 + end + 3 + else + 4 + end + catch + 5 + end + x +end +""" + +src = """ +function f(x)::Int + if x + 42.0 + end + 0xff +end +""" + +src = """ +let x = 10 + global a = [] + try + try + return 100 + finally + push!(a, 1) + end + finally + push!(a, 2) + end + x +end +""" + +src = """ +let + for outer i = 1:2 + body + end +end +""" + +src = """ +let + i = "hi" + j = 1 + M.@label foo + try + println("i = ", i) + i = i + 1 + if i <= 2 + M.@goto foo + end + catch exc + println("Caught exception ", exc) + j = j + 1 + if j <= 2 + println("Trying again ", exc) + M.@goto foo + end + end +end +""" + +src = """ +let + M.@goto foo + M.@label foo +end +""" + +src = """ +x = M.@label foo +""" + +src = """ +begin + local x::T = 1 + local x::S = 1 +end +""" + +src = """ +begin + local a, b + if a + b + end +end +""" + +src = """ +let + A{S} = B{S} +end +""" + +src = """ +let + a = b = c = sin(1) + (a,b,c) +end +""" + +src = """ +a.b = c +""" + +src = """ +a[i j] = c +""" + +src = """ +let + as = [1,2,3,4] + (x,ys...,z) = as + (x,ys,z) +end +""" + +src = """ +let + x = (1,2) + (y,x) = x + (x,y) +end +""" + +src = """ +let + a = b = c = sin(1) + (a,b,c) +end +""" + +src = """ +begin + as = [(1,2), (3,4)] + ((x,y), (z,w)) = as +end +""" + +src = """ +let +(x, y) = (y,x) +end +""" + +src = """ +let x = 1 + M.@islocal x +end +""" + +src = """ +let x = 1 + local y + M.@locals +end +""" + +src = """ +let + (a, bs...,) = (1,2,3) + bs +end +""" + +src = """ +(; a=1, a=2) +""" + +src = """ +begin + kws = (c=3, d=4) + xs = 1:3 + f(xs...; kws..., a=1, b=2) +end +""" + +src = """ +"some docs" +function f() + println("hi") +end +""" + +src = """ +function f(::T, ::U, ::S) where T where {U,S} + println(T) + println(U) + println(S) +end +""" + +src = """ +function (x::XXX)(y) + println("hi", " ", x, " ", y) +end +""" + +src = """ +struct X + x + y::String +end +""" + +src = """ +struct X{U,V} + x::U + y::V +end +""" + +src = """ +struct S9{T} + x + y + + "Docs for S9" + S9{Int}(xs) = new(xs...) +end +""" + +# Default positional args with missing arg names +src = """ +function f(::Int, y=1, z=2) + (y, z) +end +""" + +# Default positional args with placeholders +src = """ +function f(_::Int, x=1) + x +end +""" + +# Positional args and type parameters with transitive dependencies +# Bug in flisp lowering - see https://github.com/JuliaLang/julia/issues/49275 +src = """ +function f(x, y::S=[1], z) where {T, S<:AbstractVector{T}} + (x, y, z, T) +end +""" + +# Default positional args before trailing slurp are allowed +src = """ +function f(x=1, ys...) + ys +end +""" + +# Default positional args after a slurp is an error +src = """ +function f(x=1, ys..., z=2) + ys +end +""" + +# Positional arg with slurp and default +src = """ +function f(x=1, ys...="hi") + ys +end +""" + +# Positional arg with slurp and splat +src = """ +function f(x=1, ys...=(1,2)...) + ys +end +""" + +src = """ +let + x = 10 + function f(y) + x + y + end +end +""" + +src = """ +begin + local f, set_x + local x = 10 + local y = 100 + function f() + z = 1 + y - x + z + end + function set_x() + x = 1 + end + println("f = ", f()) + set_x() + y = 10 + println("f = ", f()) +end +""" + +# TODO: fix this - it's interpreted in a bizarre way as a kw call. +# src = """ +# function f(x=y=1) +# x +# end +# """ + +function gen_stuff(ctx, N, x) + JuliaLowering.@ast ctx ctx.macrocall [K"tuple" + (i::K"Integer" for i in 1:N)... + ] +end + +src = raw""" +function gen(x::NTuple{N}) where {N} + nongen_stuff = :nongen + if @generated + quote + maybe_gen_stuff = ($N, $x) + end + else + maybe_gen_stuff = :nongen_2 + end + (nongen_stuff, maybe_gen_stuff) +end +""" + +src = raw""" +begin + function partially_gen(x::NTuple{N,T}) where {N,T} + shared = :shared_stuff + if @generated + quote + unshared = ($x, $N, $T) + end + else + # Uuuum. How do we test both sides of this branch?? + unshared = :nongen # (typeof(x), N, T) + end + (shared, unshared) + end + + partially_gen((1,2,3,4,5)) +end +""" + +src = """ +let + z = "z in outer ctx" + @call_show z +end +""" + +src = """ +let + x = "x in outer ctx" + @call_oldstyle_macro x +end +""" + +src = """ +let + z = "z in outer ctx" + @call_info z +end +""" + +ex = parsestmt(SyntaxTree, src, filename="foo.jl") +#ex = ensure_attributes(ex, var_id=Int) +#ex = softscope_test(ex) +@info "Input code" formatsrc(ex) + +(ctx1, ex_macroexpand, + ctx2, ex_desugar, + ctx3, ex_scoped, + ctx4, ex_converted, + ctx5, ex_compiled, + ex_expr, eval_result) = debug_lower(M, ex; verbose=true, do_eval=true) + +# Automatic test reduction +# bad = reduce_any_failing_toplevel(JuliaLowering, joinpath(@__DIR__, "../src/desugaring.jl")) +# if !isnothing(bad) +# @error "Reduced expression as code" formatsrc(bad) +# write("bad.jl", JuliaSyntaxFormatter.formatsrc(bad)) +# end + +# Old lowering +# text = read(joinpath(@__DIR__, "../src/desugaring.jl"), String) +# ex = parseall(SyntaxTree, text, filename="desugaring.jl") +# for e in Meta.parseall(text).args +# Meta.lower(JuliaLowering, e) +# end diff --git a/JuliaLowering/test/destructuring.jl b/JuliaLowering/test/destructuring.jl new file mode 100644 index 0000000000000..6158d8bc28ebf --- /dev/null +++ b/JuliaLowering/test/destructuring.jl @@ -0,0 +1,225 @@ +@testset "Destructuring" begin + +test_mod = Module() + +@testset "Destructuring via iteration" begin + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3] + (x,y) = as + (x,y) +end +""") == (1,2) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3] + (x,ys...) = as + (x,ys) +end +""") == (1, [2,3]) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3,4] + (x,ys...,z) = as + (x,ys,z) +end +""") == (1, [2, 3], 4) + +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2,3,4] + (xs...,y) = as + (xs,y) +end +""") == ([1, 2, 3], 4) + +# Case where indexed_iterate is just iteration +@test JuliaLowering.include_string(test_mod, """ +let + (x,ys...,z) = "aβcδe" + (x,ys,z) +end +""") == ('a', "βcδ", 'e') + + +# Use in value position yields rhs +@test JuliaLowering.include_string(test_mod, """ +let + as = [1,2] + zs = begin + (x,y) = as + end + (x,y, as === zs) +end +""") == (1, 2, true) + +# lhs variable name in rhs +@test JuliaLowering.include_string(test_mod, """ +let + x = (1,2) + (x,y) = x + (x,y) +end +""") == (1, 2) + +@test JuliaLowering.include_string(test_mod, """ +let + x = (1,2) + (x...,y) = x + (x,y) +end +""") == ((1,), 2) + +@test JuliaLowering.include_string(test_mod, """ +let + zs = [(1,2), (3,(4,5))] + ((a,b), (c,(d,e))) = zs + (a,b,c,d,e) +end +""") == (1,2,3,4,5) + +@test JuliaLowering.include_string(test_mod, """ +let + zs = [[1,2,3], 4] + ((a,bs...), c) = zs + (a, bs, c) +end +""") == (1, [2,3], 4) + +end + + +@testset "Tuple elimination with tuples on both sides" begin + +# Simple case +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2 + (x,y) = (a,b) + (x,y) +end +""") == (1, 2) + +# lhs variable name in rhs +@test JuliaLowering.include_string(test_mod, """ +let x = 1, y = 2 + (x,y) = (y,x) + (x,y) +end +""") == (2, 1) + +# Slurps and splats + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, c = 3 + (x, ys..., z) = (a, b, c) + (x, ys, z) +end +""") == (1, (2,), 3) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, cs = (3,4) + (x, ys...) = (a, b, cs...) + (x, ys) +end +""") == (1, (2,3,4)) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, bs = (2,3), c = 4 + (x, ys...) = (a, bs..., c) + (x, ys) +end +""") == (1, (2,3,4)) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1, b = 2, cs = (3,4) + (x, ys..., z) = (a, b, cs...) + (x, ys, z) +end +""") == (1, (2,3), 4) + +@test JuliaLowering.include_string(test_mod, """ +let a = 1 + (x, ys...) = (a,) + (x, ys) +end +""") == (1, ()) + +# dotted rhs in last place +@test JuliaLowering.include_string(test_mod, """ +let + rh = (2, 3) + (x,y,z) = (1,rh...) + (x,y,z) +end +""") == (1, 2, 3) + +# in value position +@test JuliaLowering.include_string(test_mod, """ +let + rh = (2, 3) + (x,y) = (1,rh...) +end +""") == (1, 2, 3) + +# Side effects in the right hand tuple can affect the previous left hand side +# bindings, for example, `x`, below. In this case we need to ensure `f()` is +# called before `x` is assigned the value from the right hand side. +# (the flisp implementation fails this test.) +@test JuliaLowering.include_string(test_mod, """ +let + function f() + x=100 + 2 + end + (x,y) = (1,f()) + x,y +end +""") == (1,2) + +# `x` is not assigned and no side effect from `f()` happens when the right hand +# side throws an UndefVarError +@test JuliaLowering.include_string(test_mod, """ +let x=1, y=2, z=3, side_effect=false, a + exc = try + function f() + side_effect=true + end + (x,y,z) = (100, a, f()) + catch e + e + end + (x, y, z, side_effect, exc.var) +end +""") == (1, 2, 3, false, :a) + +# Require that rhs is evaluated before any assignments, thus `x` is not defined +# here because accessing `a` first throws an UndefVarError +@test JuliaLowering.include_string(test_mod, """ +let x, y, a + try + (x, y) = (1, a) + catch + end + @isdefined(x) +end +""") == false + +end + + +@testset "Property destructuring" begin + +@test JuliaLowering.include_string(test_mod, """ +let + ab = (a=1, b=2) + (; a, b) = ab + (a, b) +end +""") == (1, 2) + +end + +end diff --git a/JuliaLowering/test/destructuring_ir.jl b/JuliaLowering/test/destructuring_ir.jl new file mode 100644 index 0000000000000..990096a87e916 --- /dev/null +++ b/JuliaLowering/test/destructuring_ir.jl @@ -0,0 +1,387 @@ +######################################## +# Simple destructuring +let + (x,y) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (= slot₃/y (call core.getfield %₇ 1)) +9 TestMod.as +10 (return %₉) + +######################################## +# Trivial slurping +let + (xs...,) = as +end +#--------------------- +1 TestMod.as +2 (= slot₁/xs (call top.rest %₁)) +3 TestMod.as +4 (return %₃) + +######################################## +# Slurping last arg +let + (x, ys...) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (= slot₃/ys (call top.rest %₅ %₆)) +8 TestMod.as +9 (return %₈) + +######################################## +# Slurping, first arg +let + (xs..., y, z) = as +end +#--------------------- +1 TestMod.as +2 (call top.split_rest %₁ 2) +3 (= slot₂/xs (call core.getfield %₂ 1)) +4 (call core.getfield %₂ 2) +5 (call top.indexed_iterate %₄ 1) +6 (= slot₃/y (call core.getfield %₅ 1)) +7 (= slot₁/iterstate (call core.getfield %₅ 2)) +8 slot₁/iterstate +9 (call top.indexed_iterate %₄ 2 %₈) +10 (= slot₄/z (call core.getfield %₉ 1)) +11 TestMod.as +12 (return %₁₁) + +######################################## +# Slurping, middle arg +let + (x, ys..., z) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.split_rest %₅ 1 %₆) +8 (= slot₃/ys (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 (call top.indexed_iterate %₉ 1) +11 (= slot₄/z (call core.getfield %₁₀ 1)) +12 TestMod.as +13 (return %₁₂) + +######################################## +# Error: Slurping multiple args +(xs..., ys...) = x +#--------------------- +LoweringError: +(xs..., ys...) = x +# └────┘ ── multiple `...` in destructuring assignment are ambiguous + +######################################## +# Recursive destructuring +let + ((x,y), (z,w)) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (call core.getfield %₂ 1) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (call core.getfield %₇ 1) +9 (call top.indexed_iterate %₃ 1) +10 (= slot₅/x (call core.getfield %₉ 1)) +11 (= slot₂/iterstate (call core.getfield %₉ 2)) +12 slot₂/iterstate +13 (call top.indexed_iterate %₃ 2 %₁₂) +14 (= slot₆/y (call core.getfield %₁₃ 1)) +15 (call top.indexed_iterate %₈ 1) +16 (= slot₇/z (call core.getfield %₁₅ 1)) +17 (= slot₃/iterstate (call core.getfield %₁₅ 2)) +18 slot₃/iterstate +19 (call top.indexed_iterate %₈ 2 %₁₈) +20 (= slot₄/w (call core.getfield %₁₉ 1)) +21 TestMod.as +22 (return %₂₁) + +######################################## +# Recursive destructuring with slurping +let + ((x,ys...), z) = as +end +#--------------------- +1 TestMod.as +2 (call top.indexed_iterate %₁ 1) +3 (call core.getfield %₂ 1) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 TestMod.as +6 slot₁/iterstate +7 (call top.indexed_iterate %₅ 2 %₆) +8 (= slot₅/z (call core.getfield %₇ 1)) +9 (call top.indexed_iterate %₃ 1) +10 (= slot₃/x (call core.getfield %₉ 1)) +11 (= slot₂/iterstate (call core.getfield %₉ 2)) +12 slot₂/iterstate +13 (= slot₄/ys (call top.rest %₃ %₁₂)) +14 TestMod.as +15 (return %₁₄) + +######################################## +# Destructuring with simple tuple elimination +let + (x, y) = (a, b) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 (= slot₁/x %₁) +4 (= slot₂/y %₂) +5 (call core.tuple %₁ %₂) +6 (return %₅) + +######################################## +# Destructuring with tuple elimination where variables are repeated +let + (x, y, z) = (y, a, x) +end +#--------------------- +1 slot₂/y +2 TestMod.a +3 slot₁/x +4 (= slot₁/x %₁) +5 (= slot₂/y %₂) +6 (= slot₃/z %₃) +7 (call core.tuple %₁ %₂ %₃) +8 (return %₇) + +######################################## +# Destructuring with simple tuple elimination and rhs with side effects +let + (x, y) = (f(), b) +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.b +4 (= slot₁/x %₂) +5 (= slot₂/y %₃) +6 (call core.tuple %₂ %₃) +7 (return %₆) + +######################################## +# Destructuring with simple tuple elimination and lhs with side effects +let + (x[10], y[20]) = (1,2) +end +#--------------------- +1 1 +2 TestMod.x +3 (call top.setindex! %₂ %₁ 10) +4 2 +5 TestMod.y +6 (call top.setindex! %₅ %₄ 20) +7 (call core.tuple 1 2) +8 (return %₇) + +######################################## +# Destructuring with tuple elimination and trailing rhs ... +let + (x, y) = (a, rhs...) +end +#--------------------- +1 TestMod.a +2 TestMod.rhs +3 (= slot₁/x %₁) +4 (call top.indexed_iterate %₂ 1) +5 (= slot₂/y (call core.getfield %₄ 1)) +6 (call core.tuple %₁) +7 (call core._apply_iterate top.iterate core.tuple %₆ %₂) +8 (return %₇) + +######################################## +# Destructuring with with non-trailing rhs `...` does not use tuple elimination +# (though we could do it for the `x = a` part here) +let + (x, y, z) = (a, rhs..., b) +end +#--------------------- +1 TestMod.a +2 (call core.tuple %₁) +3 TestMod.rhs +4 TestMod.b +5 (call core.tuple %₄) +6 (call core._apply_iterate top.iterate core.tuple %₂ %₃ %₅) +7 (call top.indexed_iterate %₆ 1) +8 (= slot₂/x (call core.getfield %₇ 1)) +9 (= slot₁/iterstate (call core.getfield %₇ 2)) +10 slot₁/iterstate +11 (call top.indexed_iterate %₆ 2 %₁₀) +12 (= slot₃/y (call core.getfield %₁₁ 1)) +13 (= slot₁/iterstate (call core.getfield %₁₁ 2)) +14 slot₁/iterstate +15 (call top.indexed_iterate %₆ 3 %₁₄) +16 (= slot₄/z (call core.getfield %₁₅ 1)) +17 (return %₆) + +######################################## +# Destructuring with tuple elimination and final ... on lhs +let + (x, ys...) = (a,b,c) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂ %₃) +6 (= slot₂/ys %₅) +7 (call core.tuple %₁ %₂ %₃) +8 (return %₇) + +######################################## +# Destructuring with tuple elimination, slurping, and completely effect free right hand sides +let + (x, ys...) = (1,2,3) +end +#--------------------- +1 (= slot₁/x 1) +2 (call core.tuple 2 3) +3 (= slot₂/ys %₂) +4 (call core.tuple 1 2 3) +5 (return %₄) + +######################################## +# Destructuring with tuple elimination and non-final ... on lhs +let + (x, ys..., z) = (a,b,c) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂) +6 (= slot₂/ys %₅) +7 (= slot₃/z %₃) +8 (call core.tuple %₁ %₂ %₃) +9 (return %₈) + +######################################## +# Error: Destructuring with tuple elimination and too few RHS elements +(x,) = () +#--------------------- +LoweringError: +(x,) = () +└───────┘ ── More variables on left hand side than right hand in tuple assignment + +######################################## +# Error: Destructuring with tuple elimination, slurping, and too few RHS elements +(x,y,ys...) = (1,) +#--------------------- +LoweringError: +(x,y,ys...) = (1,) +└────────────────┘ ── More variables on left hand side than right hand in tuple assignment + +######################################## +# Destructuring with tuple elimination but not in value position never creates +# the tuple +let + (x, ys...) = (a,b,c) + nothing +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 TestMod.c +4 (= slot₁/x %₁) +5 (call core.tuple %₂ %₃) +6 (= slot₂/ys %₅) +7 TestMod.nothing +8 (return %₇) + +######################################## +# Property destructuring +let + (; x, y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return %₁) + +######################################## +# Property destructuring with colliding symbolic lhs/rhs +let + local x + (; x, y) = x +end +#--------------------- +1 slot₁/x +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return %₁) + +######################################## +# Property destructuring with nontrivial rhs +let + (; x, y) = f() +end +#--------------------- +1 TestMod.f +2 (call %₁) +3 (= slot₁/x (call top.getproperty %₂ :x)) +4 (= slot₂/y (call top.getproperty %₂ :y)) +5 (return %₂) + +######################################## +# Property destructuring with type decl +let + (; x::T) = rhs +end +#--------------------- +1 (newvar slot₁/x) +2 TestMod.rhs +3 (call top.getproperty %₂ :x) +4 TestMod.T +5 (= slot₂/tmp %₃) +6 slot₂/tmp +7 (call core.isa %₆ %₄) +8 (gotoifnot %₇ label₁₀) +9 (goto label₁₃) +10 slot₂/tmp +11 (call top.convert %₄ %₁₀) +12 (= slot₂/tmp (call core.typeassert %₁₁ %₄)) +13 slot₂/tmp +14 (= slot₁/x %₁₃) +15 (return %₂) + +######################################## +# Error: Property destructuring with frankentuple +(x ; a, b) = rhs +#--------------------- +LoweringError: +(x ; a, b) = rhs +└────────┘ ── Property destructuring must use a single `;` before the property names, eg `(; a, b) = rhs` + +######################################## +# Error: Property destructuring with values for properties +(; a=1, b) = rhs +#--------------------- +LoweringError: +(; a=1, b) = rhs +# └─┘ ── invalid assignment location diff --git a/JuliaLowering/test/desugaring.jl b/JuliaLowering/test/desugaring.jl new file mode 100644 index 0000000000000..66a1766b342cb --- /dev/null +++ b/JuliaLowering/test/desugaring.jl @@ -0,0 +1,57 @@ +@testset "Desugaring" begin + +test_mod = Module(:TestMod) + +# @test desugar(test_mod, """ +# let +# y = 0 +# x = 1 +# let x = x + 1 +# y = x +# end +# (x, y) +# end +# """) ≈ @ast_ [K"block" +# [K"block" +# [K"=" +# "y"::K"Identifier" +# 0::K"Integer" +# ] +# [K"=" +# "x"::K"Identifier" +# 1::K"Integer" +# ] +# [K"block" +# [K"=" +# 1::K"BindingId" +# [K"call" +# "+"::K"Identifier" +# "x"::K"Identifier" +# 1::K"Integer" +# ] +# ] +# [K"block" +# [K"local_def" +# "x"::K"Identifier" +# ] +# [K"=" +# "x"::K"Identifier" +# 1::K"BindingId" +# ] +# [K"block" +# [K"=" +# "y"::K"Identifier" +# "x"::K"Identifier" +# ] +# ] +# ] +# ] +# [K"call" +# "tuple"::K"core" +# "x"::K"Identifier" +# "y"::K"Identifier" +# ] +# ] +# ] + +end diff --git a/JuliaLowering/test/exceptions.jl b/JuliaLowering/test/exceptions.jl new file mode 100644 index 0000000000000..e270ae38944f7 --- /dev/null +++ b/JuliaLowering/test/exceptions.jl @@ -0,0 +1,338 @@ +@testset "try/catch" begin + +test_mod = Module() + +@test isempty(current_exceptions()) + +@testset "tail position" begin + + @test JuliaLowering.include_string(test_mod, """ + try + 1 + catch + 2 + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + 1 + catch + 2 + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + catch exc + exc + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + try + 1 + catch + 2 + else + 3 + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + try + error("hi") + 1 + catch + 2 + else + 3 + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + begin + function f() + try + return 1 + catch + end + return 2 + end + f() + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + begin + function g() + try + return 1 + catch + end + end + g() + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + while true + try + error("hi") + catch + x = 2 + break + end + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + while true + try + x = 2 + break + catch + end + end + x + end + """) == 2 +end + +@testset "value position" begin + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + 1 + catch + 2 + end + x + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + 1 + catch + 2 + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + catch exc + exc + end + x + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + 1 + catch + 2 + else + 3 + end + x + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + let + x = try + error("hi") + 1 + catch + 2 + else + 3 + end + x + end + """) == 2 + +end + +@testset "not value/tail position" begin + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = 1 + catch + x = 2 + end + x + end + """) == 1 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + error("hi") + x = 1 + catch + x = 2 + end + x + end + """) == 2 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = error("hi") + catch exc + x = exc + end + x + end + """) == ErrorException("hi") + + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + x = 1 + catch + x = 2 + else + x = 3 + end + x + end + """) == 3 + + @test JuliaLowering.include_string(test_mod, """ + let x = -1 + try + error("hi") + x = 1 + catch + x = 2 + else + x = 3 + end + x + end + """) == 2 + +end + +@testset "exception stack" begin + + @test JuliaLowering.include_string(test_mod, """ + try + try + error("hi") + catch + error("ho") + end + catch + a = [] + for x in current_exceptions() + push!(a, x.exception) + end + a + end + """) == [ErrorException("hi"), ErrorException("ho")] + +end + +@test isempty(current_exceptions()) + +end + +#------------------------------------------------------------------------------- +@testset "try/finally" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let x = -1 + try + x = 1 + finally + x = 2 + end + x +end +""") == 2 + +@test JuliaLowering.include_string(test_mod, """ +let x = -1 + try + try + error("hi") + x = 1 + finally + x = 2 + end + catch + end + x +end +""") == 2 + +JuliaLowering.include_string(test_mod, """ +begin + function nested_finally(a, x, b, c) + try + try + if x + return b + end + c + finally + push!(a, 1) + end + finally + push!(a, 2) + end + end +end +""") +@test (a = []; res = test_mod.nested_finally(a, true, 100, 200); (a, res)) == ([1,2], 100) +@test (a = []; res = test_mod.nested_finally(a, false, 100, 200); (a, res)) == ([1,2], 200) + +@test JuliaLowering.include_string(test_mod, """ +try + 1 +catch + 2 +finally + 3 +end +""") == 1 + +@test JuliaLowering.include_string(test_mod, """ +try + error("hi") + 1 +catch + 2 +finally + 3 +end +""") == 2 + +end diff --git a/JuliaLowering/test/exceptions_ir.jl b/JuliaLowering/test/exceptions_ir.jl new file mode 100644 index 0000000000000..8cf423258f0c5 --- /dev/null +++ b/JuliaLowering/test/exceptions_ir.jl @@ -0,0 +1,357 @@ +######################################## +# Return from inside try/catch +try + f + return x +catch + g + return y +end +#--------------------- +1 (enter label₆) +2 TestMod.f +3 TestMod.x +4 (leave %₁) +5 (return %₃) +6 TestMod.g +7 TestMod.y +8 (pop_exception %₁) +9 (return %₇) + +######################################## +# Return from inside try/catch with simple return vals +try + f + return 10 +catch + g + return 20 +end +#--------------------- +1 (enter label₅) +2 TestMod.f +3 (leave %₁) +4 (return 10) +5 TestMod.g +6 (pop_exception %₁) +7 (return 20) + +######################################## +# Return from multiple try + try/catch +try + try + return 10 + catch + return 20 + end +catch +end +#--------------------- +1 (enter label₁₄) +2 (enter label₇) +3 (leave %₁ %₂) +4 (return 10) +5 (leave %₂) +6 (goto label₁₁) +7 (leave %₁) +8 (pop_exception %₂) +9 (return 20) +10 (pop_exception %₂) +11 slot₁/try_result +12 (leave %₁) +13 (return %₁₁) +14 (pop_exception %₁) +15 (return core.nothing) + +######################################## +# Return from multiple catch + try/catch +try +catch + try + return 10 + catch + return 20 + end +end +#--------------------- +1 (enter label₄) +2 (leave %₁) +3 (return core.nothing) +4 (enter label₈) +5 (leave %₄) +6 (pop_exception %₁) +7 (return 10) +8 (pop_exception %₁) +9 (return 20) + +######################################## +# try/catch/else, tail position +try + a +catch + b +else + c +end +#--------------------- +1 (enter label₆) +2 TestMod.a +3 (leave %₁) +4 TestMod.c +5 (return %₄) +6 TestMod.b +7 (pop_exception %₁) +8 (return %₆) + +######################################## +# try/catch/else, value position +let + z = try + a + catch + b + else + c + end +end +#--------------------- +1 (newvar slot₁/z) +2 (enter label₈) +3 TestMod.a +4 (leave %₂) +5 TestMod.c +6 (= slot₂/try_result %₅) +7 (goto label₁₁) +8 TestMod.b +9 (= slot₂/try_result %₈) +10 (pop_exception %₂) +11 slot₂/try_result +12 (= slot₁/z %₁₁) +13 (return %₁₁) + +######################################## +# try/catch/else, not value/tail +begin + try + a + catch + b + else + c + end + z +end +#--------------------- +1 (enter label₆) +2 TestMod.a +3 (leave %₁) +4 TestMod.c +5 (goto label₈) +6 TestMod.b +7 (pop_exception %₁) +8 TestMod.z +9 (return %₈) + +######################################## +# basic try/finally, tail position +try + a +finally + b +end +#--------------------- +1 (enter label₇) +2 (= slot₁/finally_tag -1) +3 (= slot₂/returnval_via_finally TestMod.a) +4 (= slot₁/finally_tag 1) +5 (leave %₁) +6 (goto label₈) +7 (= slot₁/finally_tag 2) +8 TestMod.b +9 (call core.=== slot₁/finally_tag 2) +10 (gotoifnot %₉ label₁₂) +11 (call top.rethrow) +12 slot₂/returnval_via_finally +13 (return %₁₂) + +######################################## +# basic try/finally, value position +let + z = try + a + finally + b + end +end +#--------------------- +1 (newvar slot₁/z) +2 (enter label₈) +3 (= slot₃/finally_tag -1) +4 TestMod.a +5 (= slot₂/try_result %₄) +6 (leave %₂) +7 (goto label₉) +8 (= slot₃/finally_tag 1) +9 TestMod.b +10 (call core.=== slot₃/finally_tag 1) +11 (gotoifnot %₁₀ label₁₃) +12 (call top.rethrow) +13 slot₂/try_result +14 (= slot₁/z %₁₃) +15 (return %₁₃) + +######################################## +# basic try/finally, not value/tail +begin + try + a + finally + b + end + z +end +#--------------------- +1 (enter label₆) +2 (= slot₁/finally_tag -1) +3 TestMod.a +4 (leave %₁) +5 (goto label₇) +6 (= slot₁/finally_tag 1) +7 TestMod.b +8 (call core.=== slot₁/finally_tag 1) +9 (gotoifnot %₈ label₁₁) +10 (call top.rethrow) +11 TestMod.z +12 (return %₁₁) + +######################################## +# try/finally + break +while true + try + a + break + finally + b + end +end +#--------------------- +1 (gotoifnot true label₁₅) +2 (enter label₉) +3 (= slot₁/finally_tag -1) +4 TestMod.a +5 (leave %₂) +6 (goto label₁₅) +7 (leave %₂) +8 (goto label₁₀) +9 (= slot₁/finally_tag 1) +10 TestMod.b +11 (call core.=== slot₁/finally_tag 1) +12 (gotoifnot %₁₁ label₁₄) +13 (call top.rethrow) +14 (goto label₁) +15 (return core.nothing) + +######################################## +# try/catch/finally +try + a +catch + b +finally + c +end +#--------------------- +1 (enter label₁₅) +2 (= slot₁/finally_tag -1) +3 (enter label₈) +4 TestMod.a +5 (= slot₂/try_result %₄) +6 (leave %₃) +7 (goto label₁₁) +8 TestMod.b +9 (= slot₂/try_result %₈) +10 (pop_exception %₃) +11 (= slot₃/returnval_via_finally slot₂/try_result) +12 (= slot₁/finally_tag 1) +13 (leave %₁) +14 (goto label₁₆) +15 (= slot₁/finally_tag 2) +16 TestMod.c +17 (call core.=== slot₁/finally_tag 2) +18 (gotoifnot %₁₇ label₂₀) +19 (call top.rethrow) +20 slot₃/returnval_via_finally +21 (return %₂₀) + +######################################## +# Nested finally blocks +try + try + if x + return a + end + b + finally + c + end +finally + d +end +#--------------------- +1 (enter label₃₀) +2 (= slot₁/finally_tag -1) +3 (enter label₁₅) +4 (= slot₃/finally_tag -1) +5 TestMod.x +6 (gotoifnot %₅ label₁₁) +7 (= slot₄/returnval_via_finally TestMod.a) +8 (= slot₃/finally_tag 1) +9 (leave %₃) +10 (goto label₁₆) +11 TestMod.b +12 (= slot₂/try_result %₁₁) +13 (leave %₃) +14 (goto label₁₆) +15 (= slot₃/finally_tag 2) +16 TestMod.c +17 (call core.=== slot₃/finally_tag 2) +18 (gotoifnot %₁₇ label₂₀) +19 (call top.rethrow) +20 (call core.=== slot₃/finally_tag 1) +21 (gotoifnot %₂₀ label₂₆) +22 (= slot₅/returnval_via_finally slot₄/returnval_via_finally) +23 (= slot₁/finally_tag 1) +24 (leave %₁) +25 (goto label₃₁) +26 (= slot₆/returnval_via_finally slot₂/try_result) +27 (= slot₁/finally_tag 2) +28 (leave %₁) +29 (goto label₃₁) +30 (= slot₁/finally_tag 3) +31 TestMod.d +32 (call core.=== slot₁/finally_tag 3) +33 (gotoifnot %₃₂ label₃₅) +34 (call top.rethrow) +35 (call core.=== slot₁/finally_tag 2) +36 (gotoifnot %₃₅ label₃₉) +37 slot₆/returnval_via_finally +38 (return %₃₇) +39 slot₅/returnval_via_finally +40 (return %₃₉) + +######################################## +# Access to the exception object +try + a +catch exc + b +end +#--------------------- +1 (enter label₅) +2 TestMod.a +3 (leave %₁) +4 (return %₂) +5 (= slot₁/exc (call JuliaLowering.current_exception)) +6 TestMod.b +7 (pop_exception %₁) +8 (return %₆) diff --git a/JuliaLowering/test/function_calls_ir.jl b/JuliaLowering/test/function_calls_ir.jl new file mode 100644 index 0000000000000..f2772a65d6967 --- /dev/null +++ b/JuliaLowering/test/function_calls_ir.jl @@ -0,0 +1,653 @@ +######################################## +# Simple call +f(x, y) +#--------------------- +1 TestMod.f +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# Keyword calls +f(x; a=1, b=2) +#--------------------- +1 TestMod.f +2 (call core.tuple :a :b) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.x +7 (call core.kwcall %₅ %₁ %₆) +8 (return %₇) + +######################################## +# Keyword call with only splats for kws +f(; ks1..., ks2...) +#--------------------- +1 TestMod.f +2 (call core.NamedTuple) +3 TestMod.ks1 +4 (call top.merge %₂ %₃) +5 TestMod.ks2 +6 (call top.merge %₄ %₅) +7 (call top.isempty %₆) +8 (gotoifnot %₇ label₁₁) +9 (call %₁) +10 (return %₉) +11 (call core.kwcall %₆ %₁) +12 (return %₁₁) + +######################################## +# Error: Call with repeated keywords +f(x; a=1, a=2) +#--------------------- +LoweringError: +f(x; a=1, a=2) +# ╙ ── Repeated keyword argument name + +######################################## +# literal_pow lowering +x^42 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 42) +4 (call %₃) +5 (call top.literal_pow %₁ %₂ %₄) +6 (return %₅) + +######################################## +# almost but not quite literal_pow lowering :) +x^42.0 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call %₁ %₂ 42.0) +4 (return %₃) + +######################################## +# Error: infix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.INFIX_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + +######################################## +# Error: postfix call without enough arguments +@ast_ [K"call"(syntax_flags=JuliaSyntax.POSTFIX_OP_FLAG) + "x"::K"Identifier" +] +#--------------------- +LoweringError: +#= line 1 =# - Postfix/infix operators must have at least two positional arguments + +######################################## +# Error: Call with no function name +@ast_ [K"call"] +#--------------------- +LoweringError: +#= line 1 =# - Call expressions must have a function name + +######################################## +# Simple broadcast +x .* y .+ f.(z) +#--------------------- +1 TestMod.+ +2 TestMod.* +3 TestMod.x +4 TestMod.y +5 (call top.broadcasted %₂ %₃ %₄) +6 TestMod.f +7 TestMod.z +8 (call top.broadcasted %₆ %₇) +9 (call top.broadcasted %₁ %₅ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary function calls +.+x +#--------------------- +1 TestMod.+ +2 TestMod.x +3 (call top.broadcasted %₁ %₂) +4 (call top.materialize %₃) +5 (return %₄) + +######################################## +# Broadcast with short circuit operators +x .&& y .|| z +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.andand %₁ %₂) +4 TestMod.z +5 (call top.broadcasted top.oror %₃ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# Scalar comparison chain +x < y < z +#--------------------- +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.y +8 TestMod.z +9 (call %₆ %₇ %₈) +10 (return %₉) +11 (return false) + +######################################## +# Broadcasted comparison chain +x .< y .< z +#--------------------- +1 TestMod.< +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.y +7 TestMod.z +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Mixed scalar / broadcasted comparison chain +a < b < c .< d .< e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call %₁ %₂ %₃) +5 (gotoifnot %₄ label₁₁) +6 TestMod.< +7 TestMod.b +8 TestMod.c +9 (= slot₁/if_val (call %₆ %₇ %₈)) +10 (goto label₁₂) +11 (= slot₁/if_val false) +12 slot₁/if_val +13 TestMod.< +14 TestMod.c +15 TestMod.d +16 (call top.broadcasted %₁₃ %₁₄ %₁₅) +17 (call top.broadcasted top.& %₁₂ %₁₆) +18 TestMod.< +19 TestMod.d +20 TestMod.e +21 (call top.broadcasted %₁₈ %₁₉ %₂₀) +22 (call top.broadcasted top.& %₁₇ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) + +######################################## +# Mixed scalar / broadcasted comparison chain +a .< b .< c < d < e +#--------------------- +1 TestMod.< +2 TestMod.a +3 TestMod.b +4 (call top.broadcasted %₁ %₂ %₃) +5 TestMod.< +6 TestMod.b +7 TestMod.c +8 (call top.broadcasted %₅ %₆ %₇) +9 (call top.broadcasted top.& %₄ %₈) +10 TestMod.< +11 TestMod.c +12 TestMod.d +13 (call %₁₀ %₁₁ %₁₂) +14 (gotoifnot %₁₃ label₂₀) +15 TestMod.< +16 TestMod.d +17 TestMod.e +18 (= slot₁/if_val (call %₁₅ %₁₆ %₁₇)) +19 (goto label₂₁) +20 (= slot₁/if_val false) +21 slot₁/if_val +22 (call top.broadcasted top.& %₉ %₂₁) +23 (call top.materialize %₂₂) +24 (return %₂₃) + +######################################## +# Comparison chain fused with other broadcasting +x .+ (a .< b .< c) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.< +4 TestMod.a +5 TestMod.b +6 (call top.broadcasted %₃ %₄ %₅) +7 TestMod.< +8 TestMod.b +9 TestMod.c +10 (call top.broadcasted %₇ %₈ %₉) +11 (call top.broadcasted top.& %₆ %₁₀) +12 (call top.broadcasted %₁ %₂ %₁₁) +13 (call top.materialize %₁₂) +14 (return %₁₃) + +######################################## +# Broadcast with literal_pow +x.^3 +#--------------------- +1 TestMod.^ +2 TestMod.x +3 (call core.apply_type top.Val 3) +4 (call %₃) +5 (call top.broadcasted top.literal_pow %₁ %₂ %₄) +6 (call top.materialize %₅) +7 (return %₆) + +######################################## +# Broadcast with keywords +f.(x, y, z = 1; w = 2) +#--------------------- +1 top.broadcasted_kwsyntax +2 (call core.tuple :z :w) +3 (call core.apply_type core.NamedTuple %₂) +4 (call core.tuple 1 2) +5 (call %₃ %₄) +6 TestMod.f +7 TestMod.x +8 TestMod.y +9 (call core.kwcall %₅ %₁ %₆ %₇ %₈) +10 (call top.materialize %₉) +11 (return %₁₀) + +######################################## +# Broadcast with unary dot syntax +(.+)(x,y) +#--------------------- +1 TestMod.+ +2 TestMod.x +3 TestMod.y +4 (call top.broadcasted %₁ %₂ %₃) +5 (call top.materialize %₄) +6 (return %₅) + +######################################## +# Trivial in-place broadcast update +x .= y +#--------------------- +1 TestMod.x +2 TestMod.y +3 (call top.broadcasted top.identity %₂) +4 (call top.materialize! %₁ %₃) +5 (return %₄) + +######################################## +# Fused in-place broadcast update +x .= y .+ z +#--------------------- +1 TestMod.x +2 TestMod.+ +3 TestMod.y +4 TestMod.z +5 (call top.broadcasted %₂ %₃ %₄) +6 (call top.materialize! %₁ %₅) +7 (return %₆) + +######################################## +# In-place broadcast update with property assignment on left hand side +x.prop .= y +#--------------------- +1 TestMod.x +2 (call top.dotgetproperty %₁ :prop) +3 TestMod.y +4 (call top.broadcasted top.identity %₃) +5 (call top.materialize! %₂ %₄) +6 (return %₅) + +######################################## +# In-place broadcast update with ref on left hand side +x[i,end] .= y +#--------------------- +1 TestMod.x +2 TestMod.i +3 (call top.lastindex %₁ 2) +4 (call top.dotview %₁ %₂ %₃) +5 TestMod.y +6 (call top.broadcasted top.identity %₅) +7 (call top.materialize! %₄ %₆) +8 (return %₇) + +######################################## +# <: as a function call +x <: y +#--------------------- +1 TestMod.<: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# >: as a function call +x >: y +#--------------------- +1 TestMod.>: +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# --> as a function call +x --> y +#--------------------- +1 TestMod.--> +2 TestMod.x +3 TestMod.y +4 (call %₁ %₂ %₃) +5 (return %₄) + +######################################## +# basic ccall +ccall(:strlen, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 (call top.unsafe_convert %₁ %₂) +4 (foreigncall :strlen (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂) +5 (return %₄) + +######################################## +# ccall with library name as a global var +ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 (call top.unsafe_convert %₁ %₂) +4 (foreigncall (static_eval (call core.tuple :strlen TestMod.libc)) (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₃ %₂) +5 (return %₄) + +######################################## +# ccall with a calling convention +ccall(:foo, stdcall, Csize_t, ()) +#--------------------- +1 (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec)) 0 :stdcall) +2 (return %₁) + +######################################## +# ccall with Any args become core.Any and don't need conversion or GC roots +ccall(:foo, stdcall, Csize_t, (Any,), x) +#--------------------- +1 core.Any +2 TestMod.x +3 (foreigncall :foo (static_eval TestMod.Csize_t) (static_eval (call core.svec core.Any)) 0 :stdcall %₂) +4 (return %₃) + +######################################## +# ccall with variable as function name (must eval to a pointer) +ccall(ptr, Csize_t, (Cstring,), "asdfg") +#--------------------- +1 TestMod.Cstring +2 (call top.cconvert %₁ "asdfg") +3 TestMod.ptr +4 (call top.unsafe_convert %₁ %₂) +5 (foreigncall %₃ (static_eval TestMod.Csize_t) (static_eval (call core.svec TestMod.Cstring)) 0 :ccall %₄ %₂) +6 (return %₅) + +######################################## +# ccall with varargs +ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5") +#--------------------- +1 TestMod.Cstring +2 TestMod.Cstring +3 (call top.cconvert %₁ "%s = %s\n") +4 (call top.cconvert %₂ "2 + 2") +5 (call top.cconvert %₂ "5") +6 (call top.unsafe_convert %₁ %₃) +7 (call top.unsafe_convert %₂ %₄) +8 (call top.unsafe_convert %₂ %₅) +9 (foreigncall :printf (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.Cstring TestMod.Cstring TestMod.Cstring)) 1 :ccall %₆ %₇ %₈ %₃ %₄ %₅) +10 (return %₉) + +######################################## +# Error: ccall with too few arguments +ccall(:foo, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, Csize_t) +└──────────────────┘ ── too few arguments to ccall + +######################################## +# Error: ccall with calling conv and too few arguments +ccall(:foo, thiscall, Csize_t) +#--------------------- +LoweringError: +ccall(:foo, thiscall, Csize_t) +└────────────────────────────┘ ── too few arguments to ccall with calling convention specified + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, Csize_t, Cstring) +#--------------------- +LoweringError: +ccall(:foo, Csize_t, Cstring) +# └─────┘ ── ccall argument types must be a tuple; try `(T,)` + +######################################## +# Error: ccall without tuple for argument types +ccall(:foo, (Csize_t,), "arg") +#--------------------- +LoweringError: +ccall(:foo, (Csize_t,), "arg") +# └────────┘ ── ccall argument types must be a tuple; try `(T,)` and check if you specified a correct return type + +######################################## +# Error: ccall with library name which is a local variable +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let libc = "libc" + ccall((:strlen, libc), Csize_t, (Cstring,), "asdfg") +# └──┘ ── function name and library expression cannot reference local variable +end + +######################################## +# Error: ccall with return type which is a local variable +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Csize_t = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall return type cannot reference local variable +end + +######################################## +# Error: ccall with argument type which is a local variable +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +end +#--------------------- +LoweringError: +let Cstring = 1 + ccall(:strlen, Csize_t, (Cstring,), "asdfg") +# └─────┘ ── ccall argument type cannot reference local variable +end + +######################################## +# Error: ccall with too few arguments +ccall(:strlen, Csize_t, (Cstring,)) +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,)) +└─────────────────────────────────┘ ── Too few arguments in ccall compared to argument types + +######################################## +# Error: ccall with too many arguments +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:strlen, Csize_t, (Cstring,), "asdfg", "blah") +└──────────────────────────────────────────────────┘ ── More arguments than types in ccall + +######################################## +# Error: ccall varargs with too few args +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring...,), "asdfg") +# └────────┘ ── C ABI prohibits vararg without one required argument + +######################################## +# Error: ccall with multiple varargs +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +#--------------------- +LoweringError: +ccall(:foo, Csize_t, (Cstring..., Cstring...), "asdfg", "blah") +# └────────┘ ── only the trailing ccall argument type should have `...` + +######################################## +# cglobal special support for (sym, lib) tuple +cglobal((:sym, lib), Int) +#--------------------- +1 TestMod.Int +2 (call core.cglobal (static_eval (call core.tuple :sym TestMod.lib)) %₁) +3 (return %₂) + +######################################## +# cglobal - non-tuple expressions in first arg are lowered as normal +cglobal(f(), Int) +#--------------------- +1 TestMod.f +2 (call %₁) +3 TestMod.Int +4 (call core.cglobal %₂ %₃) +5 (return %₄) + +######################################## +# Error: cglobal with library name referencing local variable +let func="myfunc" + cglobal((func, "somelib"), Int) +end +#--------------------- +LoweringError: +let func="myfunc" + cglobal((func, "somelib"), Int) +# └──┘ ── function name and library expression cannot reference local variable +end + +######################################## +# Error: cglobal too many arguments +cglobal(:sym, Int, blah) +#--------------------- +LoweringError: +cglobal(:sym, Int, blah) +└──────────────────────┘ ── cglobal must have one or two arguments + +######################################## +# Error: assigning to `cglobal` +cglobal = 10 +#--------------------- +LoweringError: +cglobal = 10 +└─────┘ ── invalid assignment location + +######################################## +# Error: assigning to `ccall` +ccall = 10 +#--------------------- +LoweringError: +ccall = 10 +└───┘ ── invalid assignment location + +######################################## +# Error: assigning to `var"ccall"` +var"ccall" = 10 +#--------------------- +LoweringError: +var"ccall" = 10 +# └───┘ ── invalid assignment location + +######################################## +# Error: Invalid function name ccall +function ccall() +end +#--------------------- +LoweringError: +function ccall() +# └───┘ ── Invalid function name +end + +######################################## +# Error: Invalid function name ccall +function A.ccall() +end +#--------------------- +LoweringError: +function A.ccall() +# └─────┘ ── Invalid function name +end + +######################################## +# Nested splat: simple case +tuple((xs...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 TestMod.xs +4 (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃) +5 (return %₄) + +######################################## +# Nested splat: with mixed arguments +tuple(a, (xs...)..., b) +#--------------------- +1 TestMod.tuple +2 TestMod.a +3 (call core.tuple %₂) +4 (call core.tuple top.iterate %₁ %₃) +5 TestMod.xs +6 TestMod.b +7 (call core.tuple %₆) +8 (call core.tuple %₇) +9 (call core._apply_iterate top.iterate core._apply_iterate %₄ %₅ %₈) +10 (return %₉) + +######################################## +# Nested splat: multiple nested splats +tuple((xs...)..., (ys...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 TestMod.xs +4 TestMod.ys +5 (call core._apply_iterate top.iterate core._apply_iterate %₂ %₃ %₄) +6 (return %₅) + +######################################## +# Nested splat: triple nesting +tuple(((xs...)...)...) +#--------------------- +1 TestMod.tuple +2 (call core.tuple top.iterate %₁) +3 (call core.tuple top.iterate core._apply_iterate %₂) +4 TestMod.xs +5 (call core._apply_iterate top.iterate core._apply_iterate %₃ %₄) +6 (return %₅) + +######################################## +# Error: Standalone splat expression +(xs...) +#--------------------- +LoweringError: +(xs...) +#└───┘ ── `...` expression outside call diff --git a/JuliaLowering/test/functions.jl b/JuliaLowering/test/functions.jl new file mode 100644 index 0000000000000..8193d2c7b2efe --- /dev/null +++ b/JuliaLowering/test/functions.jl @@ -0,0 +1,553 @@ +@testset "Functions" begin + +test_mod = Module() + +# Function calls +# Splatting +@test JuliaLowering.include_string(test_mod, """ +let + x = 1 + y = 2 + zs = (3,4) + w = 5 + (tuple(zs...), + tuple(zs..., w), + tuple(y, zs...), + tuple(x, y, zs..., w)) +end +""") == ((3,4), + (3,4,5), + (2,3,4), + (1,2,3,4,5)) + +# Nested splatting +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2], [3, 4]] + tuple((xs...)...) +end +""") == (1, 2, 3, 4) + +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2]] + ys = [[3, 4]] + tuple((xs...)..., (ys...)...) +end +""") == (1, 2, 3, 4) + +# Multiple (>2) nested splat +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + tuple(((xs...)...)...) +end +""") == (1, 2) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + ys = [[[3, 4]]] + tuple(((xs...)...)..., ((ys...)...)...) +end +""") == (1, 2, 3, 4) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[[1, 2]]] + ys = [[[3, 4]]] + tuple(((xs...)...)..., ((ys...)...)) +end +""") == (1, 2, [3, 4]) + +# Trailing comma case should still work (different semantics) +@test JuliaLowering.include_string(test_mod, """ +let + xs = [[1, 2], [3, 4]] + tuple((xs...,)...) +end +""") == ([1, 2], [3, 4]) + +# Keyword calls +Base.eval(test_mod, :( +begin + function kwtest(; kws...) + values(kws) + end + + # Note this definition generates an arguably-spurious warning when run via + # `Pkg.test()` due to the use of `--warn-override=true` in the test + # harness. + function kwtest() + "non-kw version of kwtest" + end +end +)) + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (c=3,d=4) + kwtest(; kws..., a=1, d=0, e=5) +end +""") == (c=3, d=0, a=1, e=5) + +@test JuliaLowering.include_string(test_mod, """ +let + kws = (;) + kwtest(; kws..., kws...) +end +""") == "non-kw version of kwtest" + +# literal_pow +@test JuliaLowering.include_string(test_mod, """ +2^4 +""") == 16 + +#------------------------------------------------------------------------------- +# Arrow syntax +@test JuliaLowering.include_string(test_mod, """ +let + f = ((x::T, y::T) where T) -> x + y + f(1, 2) +end +""") === 3 + +@test JuliaLowering.include_string(test_mod, """ +let + f = ((x::T; y=2) where T) -> x + y + f(1) +end +""") === 3 + +# Passes desugaring, but T is detected as unused and throws an error. +# Is it clear whether this should be `f(x::T) where T` or `f(x::T where T)`? +@test_broken JuliaLowering.include_string(test_mod, """ +let + f = ((x::T) where T) -> x + f(1) +end +""") === 1 + +#------------------------------------------------------------------------------- +# Function definitions +@test JuliaLowering.include_string(test_mod, """ +begin + function f(x) + y = x + 1 + "hi", x, y + end + + f(1) +end +""") == ("hi", 1, 2) + +@test JuliaLowering.include_string(test_mod, """ +begin + function unused_arg(x, _, y) + x + y + end + unused_arg(1,2,3) +end +""") == 4 + +@test JuliaLowering.include_string(test_mod, """ +begin + function g(x)::Int + if x == 1 + return 42.0 + end + 0xff + end + (g(1), g(2)) +end +""") === (42, 255) + +# static parameters +@test JuliaLowering.include_string(test_mod, """ +begin + function h(x, y) + "fallback" + end + function h(::Vector{T}, ::S) where {T, S <: T} + T, S + end + (h(1, 2), h(Number[0xff], 1.0), h(Int[1], 1), h(Int[1], 1.0)) +end +""") === ("fallback", (Number, Float64), (Int, Int), "fallback") + +Base.eval(test_mod, +:(struct X1{T} end) +) + +# `where` params used in function obj type +@test JuliaLowering.include_string(test_mod, """ +begin + function (x::X1{T})() where T + T + end + X1{Int}()() +end +""") === Int + +Base.include_string(test_mod, +""" + struct X end + + # Erroneous `convert` to test type assert in function return values + Base.convert(::Type{X}, y) = y +""") + +@test_throws TypeError JuliaLowering.include_string(test_mod, """ +begin + function h()::X + return nothing + end + h() +end +""") + +@test JuliaLowering.include_string(test_mod, """ +x = 0 +function f_return_in_value_pos() + global x + x = return 42 +end + +(f_return_in_value_pos(), x) +""") === (42, 0) + +@testset "Default positional arguments" begin + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_simple(x=1, y=2, z=x) + (x,y,z) + end + + (f_def_simple(), f_def_simple(10), f_def_simple(10,20), f_def_simple(10,20,30)) + end + """) == ((1,2,1), (10,2,10), (10,20,10), (10,20,30)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_placeholders(::T=1, _::S=1.0) where {T,S} + (T,S) + end + + (f_def_placeholders(), f_def_placeholders(1.0), f_def_placeholders(1.0, 1)) + end + """) == ((Int,Float64), (Float64,Float64), (Float64,Int)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_typevars(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} + (x, y, z, T, S, U) + end + + (f_def_typevars(1), f_def_typevars(1,[1.0]), f_def_typevars(1,[1.0],-1.0)) + end + """) == ((1, [1], 2, Int, Vector{Int}, Int), + (1, [1.0], 2, Float64, Vector{Float64}, Int), + (1, [1.0], -1.0, Float64, Vector{Float64}, Float64)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_typevar_vararg_undef(x::T, y::Vararg{S}) where {T,S} + (x, y, @isdefined S) + end + + (f_def_typevar_vararg_undef(1), f_def_typevar_vararg_undef(1,2), f_def_typevar_vararg_undef(1,2,3)) + end + """) === ((1, (), false), (1, (2,), true), (1, (2, 3), true)) + + @test JuliaLowering.include_string(test_mod, """ + begin + f_def_typevar_with_lowerbound(x::T) where {T>:Int} = + (x, @isdefined(T)) + (f_def_typevar_with_lowerbound(1), f_def_typevar_with_lowerbound(1.0)) + end + """) == ((1, true), (1.0, false)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_slurp(x=1, ys...) + (x, ys) + end + + (f_def_slurp(), f_def_slurp(2), f_def_slurp(2,3)) + end + """) == ((1, ()), + (2, ()), + (2, (3,))) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_ret_type(x=1.0)::Int + x + end + + (f_def_ret_type(), f_def_ret_type(10.0)) + end + """) === (1,10) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_slurp_splat(ys...=(1,2)...) + ys + end + + (f_def_slurp_splat(), f_def_slurp_splat(10,20)) + end + """) == ((1,2), + (10,20)) + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_def_destructure(x, (y,z)::Tuple{Int,Int}, (w,)...=(4,)...) + (x,y,z,w) + end + + f_def_destructure(1, (2,3)) + end + """) == (1,2,3,4) + +end + +@testset "Slot flags" begin + + @test JuliaLowering.include_string(test_mod, """ + begin + function f_nospecialize(u, v, @nospecialize(x), y, @nospecialize(z)) + (u, v, x, y, z) + end + + f_nospecialize(1,2,3,4,5) + end + """) == (1,2,3,4,5) + # We dig into the internal of `Method` here to check which slots have been + # flagged as nospecialize. + @test only(methods(test_mod.f_nospecialize)).nospecialize == 0b10100 + + JuliaLowering.include_string(test_mod, """ + function f_slotflags(x, y, f, z) + f() + x + y + end + """) + @test only(methods(test_mod.f_slotflags)).called == 0b0100 + + # Branching combined with nospecialize meta in CodeInfo + @test JuliaLowering.include_string(test_mod, """ + begin + function f_branch_meta(@nospecialize(x), cond) + if cond + x + 1 + else + x + 2 + end + end + + (f_branch_meta(10, false), f_branch_meta(20, true)) + end + """) == (12, 21) +end + +@testset "Keyword functions" begin + JuliaLowering.include_string(test_mod, """ + function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) + (a, b, x, y) + end + """) + + @test test_mod.f_kw_simple() === (1, 1.0, 'a', true) + @test test_mod.f_kw_simple(x='b') === (1, 1.0, 'b', true) + @test test_mod.f_kw_simple(y=false) === (1, 1.0, 'a', false) + @test test_mod.f_kw_simple(x='b', y=false) === (1, 1.0, 'b', false) + + @test test_mod.f_kw_simple(20) === (20, 1.0, 'a', true) + @test test_mod.f_kw_simple(20; x='b') === (20, 1.0, 'b', true) + @test test_mod.f_kw_simple(20; y=false) === (20, 1.0, 'a', false) + @test test_mod.f_kw_simple(20; x='b', y=false) === (20, 1.0, 'b', false) + + @test test_mod.f_kw_simple(20, 2.0) === (20, 2.0, 'a', true) + @test test_mod.f_kw_simple(20, 2.0; x='b') === (20, 2.0, 'b', true) + @test test_mod.f_kw_simple(20, 2.0; y=false) === (20, 2.0, 'a', false) + @test test_mod.f_kw_simple(20, 2.0; x='b', y=false) === (20, 2.0, 'b', false) + + # Bad types for keyword args throw a type error + @test_throws(TypeError(Symbol("keyword argument"), :x, Char, 100), + test_mod.f_kw_simple(x=100)) + @test_throws(TypeError(Symbol("keyword argument"), :y, Bool, 100), + test_mod.f_kw_simple(y=100)) + + # Keywords which aren't present throw an error + try + test_mod.f_kw_simple(20; not_present=100) + @test false + catch exc + @test exc isa MethodError + @test exc.f == Core.kwcall + @test exc.args == ((; not_present=100), test_mod.f_kw_simple, 20, 1.0) + end + + # Slurping of positional args with keywords + JuliaLowering.include_string(test_mod, """ + function f_pos_slurp_with_kws(z, args...; x=1,y=2) + args + end + """) + @test test_mod.f_pos_slurp_with_kws(3, 2, 1; x = 100) === (2,1) + @test test_mod.f_pos_slurp_with_kws(3, 2, 1) === (2,1) + + # Slurping of keyword args + JuliaLowering.include_string(test_mod, """ + function f_kw_slurp_all(; kws...) + kws + end + """) + @test values(test_mod.f_kw_slurp_all(x = 1, y = 2)) === (x=1, y=2) + @test values(test_mod.f_kw_slurp_all()) === (;) + + # Slurping of keyword args + JuliaLowering.include_string(test_mod, """ + function f_kw_slurp_some(; x=1, y=2, kws...) + kws + end + """) + @test values(test_mod.f_kw_slurp_some(z=3, x = 1, y = 2, w=4)) === (z=3, w=4) + @test values(test_mod.f_kw_slurp_some(x = 1)) === (;) + @test values(test_mod.f_kw_slurp_some()) === (;) + + # Keyword defaults which depend on other keywords. + JuliaLowering.include_string(test_mod, """ + begin + aaa = :outer + function f_kw_default_dependencies(; x=1, y=x, bbb=aaa, aaa=:aaa_kw, ccc=aaa) + (x, y, bbb, aaa, ccc) + end + end + """) + @test values(test_mod.f_kw_default_dependencies()) === (1, 1, :outer, :aaa_kw, :aaa_kw) + @test values(test_mod.f_kw_default_dependencies(x = 10)) === (10, 10, :outer, :aaa_kw, :aaa_kw) + @test values(test_mod.f_kw_default_dependencies(x = 10, aaa=:blah)) === (10, 10, :outer, :blah, :blah) + + # Keywords with static parameters + JuliaLowering.include_string(test_mod, """ + function f_kw_sparams(x::X, y::Y; a::A, b::B) where {X,Y,A,B} + (X,Y,A,B) + end + """) + @test values(test_mod.f_kw_sparams(1, 1.0; a="a", b='b')) === (Int, Float64, String, Char) + + # Keywords with static parameters, where some keyword types can be inferred + # based on the positional parameters and others cannot. + JuliaLowering.include_string(test_mod, """ + function f_kw_type_errors(x::X; a::F, b::X) where {X<:Integer,F<:AbstractFloat} + (X,F) + end + """) + @test values(test_mod.f_kw_type_errors(1; a=1.0, b=10)) === (Int, Float64) + # The following is a keyword TypeError because we can infer `X` based on + # the positional parameters and use that to check the type of `b`. + @test_throws TypeError values(test_mod.f_kw_type_errors(1; a=1.0, b="str")) + # The following is only a method error as we can't infer `F` prior to + # dispatching to the body function. + @test_throws MethodError values(test_mod.f_kw_type_errors(1; a="str", b=10)) + + # Throwing of UndefKeywordError + JuliaLowering.include_string(test_mod, """ + function f_kw_no_default(; x) + x + end + """) + @test test_mod.f_kw_no_default(x = 10) == 10 + @test_throws UndefKeywordError(:x) test_mod.f_kw_no_default() == 10 + + # Closure with keywords + cl = JuliaLowering.include_string(test_mod, """ + let y = 1 + function f_kw_closure(; x=10) + x + y + end + end + """) + @test cl() == 11 + @test cl(x = 20) == 21 +end + +@testset "Generated functions" begin + @test JuliaLowering.include_string(test_mod, raw""" + begin + @generated function f_gen(x::NTuple{N,T}) where {N,T} + quote + ($x, $N, $T) + end + end + + f_gen((1,2,3,4,5)) + end + """) == (NTuple{5,Int}, 5, Int) + + @test JuliaLowering.include_string(test_mod, raw""" + begin + function f_partially_gen(x::NTuple{N,T}) where {N,T} + shared = :shared_stuff + if @generated + if N == 2 + error("intentionally broken codegen (will trigger nongen branch)") + end + quote + unshared = (:gen, ($x, $N, $T)) + end + else + unshared = (:nongen, (typeof(x), N, T)) + end + (shared, unshared) + end + + (f_partially_gen((1,2)), f_partially_gen((1,2,3,4,5))) + end + """) == ((:shared_stuff, (:nongen, (NTuple{2,Int}, 2, Int))), + (:shared_stuff, (:gen, (NTuple{5,Int}, 5, Int)))) + + # Test generated function edges to bindings + # (see also https://github.com/JuliaLang/julia/pull/57230) + JuliaLowering.include_string(test_mod, raw""" + const delete_me = 4 + @generated f_generated_return_delete_me() = return :(delete_me) + """) + @test test_mod.f_generated_return_delete_me() == 4 + Base.delete_binding(test_mod, :delete_me) + @test_throws UndefVarError test_mod.f_generated_return_delete_me() +end + +@testset "Broadcast" begin + @test JuliaLowering.include_string(test_mod, """ + let x = [1,2], y = [3,4], z = [5,6] + x .* y .+ z + end + """) == [8, 14] + + @test JuliaLowering.include_string(test_mod, """ + let nums = [1, 2, 3] + string.(nums, base=2; pad=2) + end + """) == ["01", "10", "11"] + + @test JuliaLowering.include_string(test_mod, """ + let lhs = [0,0], x = [1,2], y = [3,4], z = [5,6] + lhs .= x .* y .+ z + lhs + end + """) == [8, 14] + + @test JuliaLowering.include_string(test_mod, """ + [1,2] .+ ([3,4] .< [5,6] .< [7,1]) + """) == [2, 2] + + @test JuliaLowering.include_string(test_mod, """ + let + x = [0,0,0,0] + x[begin+1:end-1] .= [1,2] .+ [3,4] + x + end + """) == [0,4,6,0] +end + +end diff --git a/JuliaLowering/test/functions_ir.jl b/JuliaLowering/test/functions_ir.jl new file mode 100644 index 0000000000000..a537757b881ba --- /dev/null +++ b/JuliaLowering/test/functions_ir.jl @@ -0,0 +1,1592 @@ +######################################## +# Function declaration with no methods +function f +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (return %₃) + +######################################## +# Functions with placeholder arg +function f(x, _, y) + x + y +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/_(!read) slot₄/y] + 1 TestMod.+ + 2 (call %₁ slot₂/x slot₄/y) + 3 (return %₂) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Functions with argument types only, no name +function f(::T, x) + x +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] + 1 slot₃/x + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (return %₁₂) + +######################################## +# Functions argument types +function f(x, y::T) + body +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ core.Any %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] + 1 TestMod.body + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (return %₁₂) + +######################################## +# Functions with slurp of Any +function f(x, ys...) + body +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.apply_type core.Vararg core.Any) +6 (call core.svec %₄ core.Any %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] + 1 TestMod.body + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (return %₁₂) + +######################################## +# Functions with slurp of T +function f(x, ys::T...) + body +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.apply_type core.Vararg %₅) +7 (call core.svec %₄ core.Any %₆) +8 (call core.svec) +9 SourceLocation::1:10 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys(!read)] + 1 TestMod.body + 2 (return %₁) +12 latestworld +13 TestMod.f +14 (return %₁₃) + +######################################## +# Error: Function with slurp not in last position arg +function f(xs..., y) + body +end +#--------------------- +LoweringError: +function f(xs..., y) +# └───┘ ── `...` may only be used for the last positional argument + body +end + +######################################## +# Basic static parameters +function f(::T, ::U, ::V) where T where {U,V} + (T,U,V) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (= slot₂/U (call core.TypeVar :U)) +4 (= slot₃/V (call core.TypeVar :V)) +5 (= slot₁/T (call core.TypeVar :T)) +6 TestMod.f +7 (call core.Typeof %₆) +8 slot₁/T +9 slot₂/U +10 slot₃/V +11 (call core.svec %₇ %₈ %₉ %₁₀) +12 slot₂/U +13 slot₃/V +14 slot₁/T +15 (call core.svec %₁₂ %₁₃ %₁₄) +16 SourceLocation::1:10 +17 (call core.svec %₁₁ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/_(!read) slot₄/_(!read)] + 1 static_parameter₃ + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.tuple %₁ %₂ %₃) + 5 (return %₄) +19 latestworld +20 TestMod.f +21 (return %₂₀) + +######################################## +# Static parameter with bounds and used with apply_type in argument +function f(::S{T}) where X <: T <: Y + T +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.X +4 TestMod.Y +5 (= slot₁/T (call core.TypeVar :T %₃ %₄)) +6 TestMod.f +7 (call core.Typeof %₆) +8 TestMod.S +9 slot₁/T +10 (call core.apply_type %₈ %₉) +11 (call core.svec %₇ %₁₀) +12 slot₁/T +13 (call core.svec %₁₂) +14 SourceLocation::1:10 +15 (call core.svec %₁₁ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 static_parameter₁ + 2 (return %₁) +17 latestworld +18 TestMod.f +19 (return %₁₈) + +######################################## +# Static parameter with lower bound +function f(::S{T}) where T >: X + T +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.X +4 (= slot₁/T (call core.TypeVar :T %₃ core.Any)) +5 TestMod.f +6 (call core.Typeof %₅) +7 TestMod.S +8 slot₁/T +9 (call core.apply_type %₇ %₈) +10 (call core.svec %₆ %₉) +11 slot₁/T +12 (call core.svec %₁₁) +13 SourceLocation::1:10 +14 (call core.svec %₁₀ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 static_parameter₁ + 2 (return %₁) +16 latestworld +17 TestMod.f +18 (return %₁₇) + +######################################## +# Static parameter which is used only in the bounds of another static parameter +# See https://github.com/JuliaLang/julia/issues/49275 +function f(x, y::S) where {T, S<:AbstractVector{T}} + (T,S) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 TestMod.AbstractVector +5 slot₂/T +6 (call core.apply_type %₄ %₅) +7 (= slot₁/S (call core.TypeVar :S %₆)) +8 TestMod.f +9 (call core.Typeof %₈) +10 slot₁/S +11 (call core.svec %₉ core.Any %₁₀) +12 slot₂/T +13 slot₁/S +14 (call core.svec %₁₂ %₁₃) +15 SourceLocation::1:10 +16 (call core.svec %₁₁ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] + 1 static_parameter₁ + 2 static_parameter₂ + 3 (call core.tuple %₁ %₂) + 4 (return %₃) +18 latestworld +19 TestMod.f +20 (return %₁₉) + +######################################## +# Error: Static parameter which is unused +function f(::T) where {T,S} + (T,S) +end +#--------------------- +LoweringError: +function f(::T) where {T,S} +# ╙ ── Method definition declares type variable but does not use it in the type of any function parameter + (T,S) +end + +######################################## +# Return types +function f(x)::Int + if x + 42.0 + end + 0xff +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/tmp(!read)] + 1 TestMod.Int + 2 (gotoifnot slot₂/x label₃) + 3 (= slot₃/tmp 0xff) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₁) + 8 slot₃/tmp + 9 (call top.convert %₁ %₈) + 10 (= slot₃/tmp (call core.typeassert %₉ %₁)) + 11 slot₃/tmp + 12 (return %₁₁) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Callable type +function (::T)(x) + x +end +#--------------------- +1 TestMod.T +2 (call core.svec %₁ core.Any) +3 (call core.svec) +4 SourceLocation::1:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ + slots: [slot₁/#self#(!read) slot₂/x] + 1 slot₂/x + 2 (return %₁) +7 latestworld +8 (return core.nothing) + +######################################## +# Callable type with instance +function (y::T)(x) + (y, x) +end +#--------------------- +1 TestMod.T +2 (call core.svec %₁ core.Any) +3 (call core.svec) +4 SourceLocation::1:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ + slots: [slot₁/y slot₂/x] + 1 (call core.tuple slot₁/y slot₂/x) + 2 (return %₁) +7 latestworld +8 (return core.nothing) + +######################################## +# `where` params used in callable object type +function (x::X1{T})() where T + T +end +#--------------------- +1 (= slot₁/T (call core.TypeVar :T)) +2 TestMod.X1 +3 slot₁/T +4 (call core.apply_type %₂ %₃) +5 (call core.svec %₄) +6 slot₁/T +7 (call core.svec %₆) +8 SourceLocation::1:10 +9 (call core.svec %₅ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/x(!read)] + 1 static_parameter₁ + 2 (return %₁) +11 latestworld +12 (return core.nothing) + +######################################## +# Function with module ref in name +function A.f() +end +#--------------------- +1 TestMod.A +2 (call top.getproperty %₁ :f) +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) +9 latestworld +10 (return core.nothing) + +######################################## +# Error: Invalid dotop function name +function (.+)(x,y) +end +#--------------------- +LoweringError: +function (.+)(x,y) +# └───────┘ ── Cannot define function using `.` broadcast syntax +end + +######################################## +# Error: Invalid function name +function f[](x,y) +end +#--------------------- +LoweringError: +function f[](x,y) +# └─┘ ── Invalid function name +end + +######################################## +# Simple positional args with defaults +function f(x::T, y::S=1, z::U=2) + (x,y) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.T +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(called) slot₂/x] + 1 (call slot₁/#self# slot₂/x 1 2) + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.T +15 TestMod.S +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 (call core.svec) +18 SourceLocation::1:10 +19 (call core.svec %₁₆ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +21 latestworld +22 TestMod.f +23 (call core.Typeof %₂₂) +24 TestMod.T +25 TestMod.S +26 TestMod.U +27 (call core.svec %₂₃ %₂₄ %₂₅ %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z(!read)] + 1 (call core.tuple slot₂/x slot₃/y) + 2 (return %₁) +32 latestworld +33 TestMod.f +34 (return %₃₃) + +######################################## +# Default positional args which depend on other args +function f(x=1, y=x) + (x,y) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(called)] + 1 (call slot₁/#self# 1) + 2 (return %₁) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(called) slot₂/x] + 1 (call slot₁/#self# slot₂/x slot₂/x) + 2 (return %₁) +18 latestworld +19 TestMod.f +20 (call core.Typeof %₁₉) +21 (call core.svec %₂₀ core.Any core.Any) +22 (call core.svec) +23 SourceLocation::1:10 +24 (call core.svec %₂₁ %₂₂ %₂₃) +25 --- method core.nothing %₂₄ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y] + 1 (call core.tuple slot₂/x slot₃/y) + 2 (return %₁) +26 latestworld +27 TestMod.f +28 (return %₂₇) + +######################################## +# Default positional args with missing arg names (implicit placeholders) +function f(::Int, y=1, z=2) + (y, z) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.Int +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(called) slot₂/_] + 1 (call slot₁/#self# slot₂/_ 1 2) + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.Int +15 (call core.svec %₁₃ %₁₄ core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(called) slot₂/_ slot₃/y] + 1 (call slot₁/#self# slot₂/_ slot₃/y 2) + 2 (return %₁) +20 latestworld +21 TestMod.f +22 (call core.Typeof %₂₁) +23 TestMod.Int +24 (call core.svec %₂₂ %₂₃ core.Any core.Any) +25 (call core.svec) +26 SourceLocation::1:10 +27 (call core.svec %₂₄ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/y slot₄/z] + 1 (call core.tuple slot₃/y slot₄/z) + 2 (return %₁) +29 latestworld +30 TestMod.f +31 (return %₃₀) + +######################################## +# Default positional args with placeholders +function f(_::Int, x=1) + x +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 TestMod.Int +6 (call core.svec %₄ %₅) +7 (call core.svec) +8 SourceLocation::1:10 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(called) slot₂/_] + 1 (call slot₁/#self# slot₂/_ 1) + 2 (return %₁) +11 latestworld +12 TestMod.f +13 (call core.Typeof %₁₂) +14 TestMod.Int +15 (call core.svec %₁₃ %₁₄ core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/_(!read) slot₃/x] + 1 slot₃/x + 2 (return %₁) +20 latestworld +21 TestMod.f +22 (return %₂₁) + +######################################## +# Positional args with defaults and `where` clauses +function f(x::T, y::S=1, z::U=2) where {T,S<:T,U<:S} + (x,y,z) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 slot₂/T +5 (= slot₁/S (call core.TypeVar :S %₄)) +6 slot₁/S +7 (= slot₃/U (call core.TypeVar :U %₆)) +8 TestMod.f +9 (call core.Typeof %₈) +10 slot₂/T +11 (call core.svec %₉ %₁₀) +12 slot₂/T +13 (call core.svec %₁₂) +14 SourceLocation::1:10 +15 (call core.svec %₁₁ %₁₃ %₁₄) +16 --- method core.nothing %₁₅ + slots: [slot₁/#self#(called) slot₂/x] + 1 (call slot₁/#self# slot₂/x 1 2) + 2 (return %₁) +17 latestworld +18 TestMod.f +19 (call core.Typeof %₁₈) +20 slot₂/T +21 slot₁/S +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 slot₂/T +24 slot₁/S +25 (call core.svec %₂₃ %₂₄) +26 SourceLocation::1:10 +27 (call core.svec %₂₂ %₂₅ %₂₆) +28 --- method core.nothing %₂₇ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +29 latestworld +30 TestMod.f +31 (call core.Typeof %₃₀) +32 slot₂/T +33 slot₁/S +34 slot₃/U +35 (call core.svec %₃₁ %₃₂ %₃₃ %₃₄) +36 slot₂/T +37 slot₁/S +38 slot₃/U +39 (call core.svec %₃₆ %₃₇ %₃₈) +40 SourceLocation::1:10 +41 (call core.svec %₃₅ %₃₉ %₄₀) +42 --- method core.nothing %₄₁ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] + 1 (call core.tuple slot₂/x slot₃/y slot₄/z) + 2 (return %₁) +43 latestworld +44 TestMod.f +45 (return %₄₄) + +######################################## +# Positional args and type parameters with transitive dependencies +# See https://github.com/JuliaLang/julia/issues/49275 - the first method +# generated here for only `x` should contain zero type parameters. +function f(x, y::S=[1], z::U=2) where {T, S<:AbstractVector{T}, U} + (x, y, z, T, S, U) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 (= slot₂/T (call core.TypeVar :T)) +4 TestMod.AbstractVector +5 slot₂/T +6 (call core.apply_type %₄ %₅) +7 (= slot₁/S (call core.TypeVar :S %₆)) +8 (= slot₃/U (call core.TypeVar :U)) +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::1:10 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- method core.nothing %₁₄ + slots: [slot₁/#self#(called) slot₂/x] + 1 (call top.vect 1) + 2 (call slot₁/#self# slot₂/x %₁ 2) + 3 (return %₂) +16 latestworld +17 TestMod.f +18 (call core.Typeof %₁₇) +19 slot₁/S +20 (call core.svec %₁₈ core.Any %₁₉) +21 slot₂/T +22 slot₁/S +23 (call core.svec %₂₁ %₂₂) +24 SourceLocation::1:10 +25 (call core.svec %₂₀ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ + slots: [slot₁/#self#(called) slot₂/x slot₃/y] + 1 (call slot₁/#self# slot₂/x slot₃/y 2) + 2 (return %₁) +27 latestworld +28 TestMod.f +29 (call core.Typeof %₂₈) +30 slot₁/S +31 slot₃/U +32 (call core.svec %₂₉ core.Any %₃₀ %₃₁) +33 slot₂/T +34 slot₁/S +35 slot₃/U +36 (call core.svec %₃₃ %₃₄ %₃₅) +37 SourceLocation::1:10 +38 (call core.svec %₃₂ %₃₆ %₃₇) +39 --- method core.nothing %₃₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/z] + 1 static_parameter₁ + 2 static_parameter₂ + 3 static_parameter₃ + 4 (call core.tuple slot₂/x slot₃/y slot₄/z %₁ %₂ %₃) + 5 (return %₄) +40 latestworld +41 TestMod.f +42 (return %₄₁) + +######################################## +# Default positional args are allowed before trailing slurp with no default +function f(x=1, ys...) + ys +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(called)] + 1 (call slot₁/#self# 1) + 2 (return %₁) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ core.Any %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/ys] + 1 slot₃/ys + 2 (return %₁) +19 latestworld +20 TestMod.f +21 (return %₂₀) + +######################################## +# Error: Default positional args before non-default arg +function f(x=1, ys, z=2) + ys +end +#--------------------- +LoweringError: +function f(x=1, ys, z=2) +# └─┘ ── optional positional arguments must occur at end + ys +end + +######################################## +# Positional arg with slurp and default +function f(xs...=1) + xs +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(called)] + 1 (call slot₁/#self# 1) + 2 (return %₁) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/xs] + 1 slot₂/xs + 2 (return %₁) +19 latestworld +20 TestMod.f +21 (return %₂₀) + +######################################## +# Positional arg with slurp and splatted default value +function f(xs...=(1,2)...) + xs +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#] + 1 (call core.tuple 1 2) + 2 (call core._apply_iterate top.iterate slot₁/#self# %₁) + 3 (return %₂) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.apply_type core.Vararg core.Any) +14 (call core.svec %₁₂ %₁₃) +15 (call core.svec) +16 SourceLocation::1:10 +17 (call core.svec %₁₄ %₁₅ %₁₆) +18 --- method core.nothing %₁₇ + slots: [slot₁/#self#(!read) slot₂/xs] + 1 slot₂/xs + 2 (return %₁) +19 latestworld +20 TestMod.f +21 (return %₂₀) + +######################################## +# Trivial function argument destructuring +function f(x, (y,z), w) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/destructured_arg slot₄/w(!read) slot₅/iterstate slot₆/y(!read) slot₇/z(!read)] + 1 (call top.indexed_iterate slot₃/destructured_arg 1) + 2 (= slot₆/y (call core.getfield %₁ 1)) + 3 (= slot₅/iterstate (call core.getfield %₁ 2)) + 4 slot₅/iterstate + 5 (call top.indexed_iterate slot₃/destructured_arg 2 %₄) + 6 (= slot₇/z (call core.getfield %₅ 1)) + 7 (return core.nothing) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Function argument destructuring combined with splats, types and and defaults +function f((x,)::T...=rhs) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(called)] + 1 TestMod.rhs + 2 (call slot₁/#self# %₁) + 3 (return %₂) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 TestMod.T +14 (call core.apply_type core.Vararg %₁₃) +15 (call core.svec %₁₂ %₁₄) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/x(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) + 2 (= slot₃/x (call core.getfield %₁ 1)) + 3 (return core.nothing) +20 latestworld +21 TestMod.f +22 (return %₂₁) + +######################################## +# Function argument destructuring combined with splats, types and and defaults +function f(x=default_x)::T +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(called)] + 1 TestMod.default_x + 2 (call slot₁/#self# %₁) + 3 (return %₂) +10 latestworld +11 TestMod.f +12 (call core.Typeof %₁₁) +13 (call core.svec %₁₂ core.Any) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/tmp(!read)] + 1 TestMod.T + 2 (= slot₃/tmp core.nothing) + 3 slot₃/tmp + 4 (call core.isa %₃ %₁) + 5 (gotoifnot %₄ label₇) + 6 (goto label₁₀) + 7 slot₃/tmp + 8 (call top.convert %₁ %₇) + 9 (= slot₃/tmp (call core.typeassert %₈ %₁)) + 10 slot₃/tmp + 11 (return %₁₀) +18 latestworld +19 TestMod.f +20 (return %₁₉) + +######################################## +# Duplicate destructured placeholders ok +function f((_,), (_,)) +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/destructured_arg] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) + 2 (call core.getfield %₁ 1) + 3 (call top.indexed_iterate slot₃/destructured_arg 1) + 4 (call core.getfield %₃ 1) + 5 (return core.nothing) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Slot flags +function f(@nospecialize(x), g, y) + g() + y +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any core.Any core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x(nospecialize,!read) slot₃/g(called) slot₄/y] + 1 TestMod.+ + 2 (call slot₃/g) + 3 (call %₁ %₂ slot₄/y) + 4 (return %₃) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Function return without arguments +function f() + return + after_return # <- distinguish output from implicit return +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) + 2 TestMod.after_return + 3 (return %₂) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Function return in value position is allowed +function f() + x = return 1 +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 (return 1) + 2 (= slot₂/x core.nothing) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Binding docs to functions +""" +some docs +""" +function f() +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄) +6 (call core.svec) +7 SourceLocation::4:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) +10 latestworld +11 TestMod.f +12 (call JuliaLowering.bind_docs! %₁₁ "some docs\n" %₈) +13 TestMod.f +14 (return %₁₃) + +######################################## +# Binding docs to callable type +""" +some docs +""" +function (x::T)() +end +#--------------------- +1 TestMod.T +2 (call core.svec %₁) +3 (call core.svec) +4 SourceLocation::4:10 +5 (call core.svec %₂ %₃ %₄) +6 --- method core.nothing %₅ + slots: [slot₁/x(!read)] + 1 (return core.nothing) +7 latestworld +8 TestMod.T +9 (call JuliaLowering.bind_docs! %₈ "some docs\n" %₅) +10 (return core.nothing) + +######################################## +# Keyword function with defaults. +# Order of methods +# 1. #f_kw_simple#0(x, y, ::typeof(f_kw_simple), a, b) (body) +# 2. Core.kwcall(kws, ::typeof(f_kw_simple)) +# 3. Core.kwcall(kws, ::typeof(f_kw_simple), a) +# 4. Core.kwcall(kws, ::typeof(f_kw_simple), a, b) (kwcall body) +# 5. f_kw_simple() +# 6. f_kw_simple(a) +# 7. f_kw_simple(a, b) +function f_kw_simple(a::Int=1, b::Float64=1.0; x::Char='a', y::Bool=true) + (a, b, x, y) +end +#--------------------- +1 (method TestMod.f_kw_simple) +2 latestworld +3 (method TestMod.#f_kw_simple#0) +4 latestworld +5 TestMod.#f_kw_simple#0 +6 (call core.Typeof %₅) +7 TestMod.Char +8 TestMod.Bool +9 TestMod.f_kw_simple +10 (call core.Typeof %₉) +11 TestMod.Int +12 TestMod.Float64 +13 (call core.svec %₆ %₇ %₈ %₁₀ %₁₁ %₁₂) +14 (call core.svec) +15 SourceLocation::1:10 +16 (call core.svec %₁₃ %₁₄ %₁₅) +17 --- method core.nothing %₁₆ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/#self#(!read) slot₅/a slot₆/b] + 1 (meta :nkw 2) + 2 (call core.tuple slot₅/a slot₆/b slot₂/x slot₃/y) + 3 (return %₂) +18 latestworld +19 (call core.typeof core.kwcall) +20 TestMod.f_kw_simple +21 (call core.Typeof %₂₀) +22 (call core.svec %₁₉ core.NamedTuple %₂₁) +23 (call core.svec) +24 SourceLocation::1:10 +25 (call core.svec %₂₂ %₂₃ %₂₄) +26 --- method core.nothing %₂₅ + slots: [slot₁/#self#(called) slot₂/kws slot₃/#self#] + 1 (call slot₁/#self# slot₂/kws slot₃/#self# 1 1.0) + 2 (return %₁) +27 latestworld +28 (call core.typeof core.kwcall) +29 TestMod.f_kw_simple +30 (call core.Typeof %₂₉) +31 TestMod.Int +32 (call core.svec %₂₈ core.NamedTuple %₃₀ %₃₁) +33 (call core.svec) +34 SourceLocation::1:10 +35 (call core.svec %₃₂ %₃₃ %₃₄) +36 --- method core.nothing %₃₅ + slots: [slot₁/#self#(called) slot₂/kws slot₃/#self# slot₄/a] + 1 (call slot₁/#self# slot₂/kws slot₃/#self# slot₄/a 1.0) + 2 (return %₁) +37 latestworld +38 (call core.typeof core.kwcall) +39 TestMod.f_kw_simple +40 (call core.Typeof %₃₉) +41 TestMod.Int +42 TestMod.Float64 +43 (call core.svec %₃₈ core.NamedTuple %₄₀ %₄₁ %₄₂) +44 (call core.svec) +45 SourceLocation::1:10 +46 (call core.svec %₄₃ %₄₄ %₄₅) +47 --- method core.nothing %₄₆ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/a slot₅/b slot₆/kwtmp slot₇/x(!read) slot₈/y(!read)] + 1 (newvar slot₇/x) + 2 (newvar slot₈/y) + 3 (call core.isdefined slot₂/kws :x) + 4 (gotoifnot %₃ label₁₅) + 5 (call core.getfield slot₂/kws :x) + 6 TestMod.Char + 7 (call core.isa %₅ %₆) + 8 (gotoifnot %₇ label₁₀) + 9 (goto label₁₃) + 10 TestMod.Char + 11 (new core.TypeError :keyword argument :x %₁₀ %₅) + 12 (call core.throw %₁₁) + 13 (= slot₆/kwtmp %₅) + 14 (goto label₁₆) + 15 (= slot₆/kwtmp 'a') + 16 slot₆/kwtmp + 17 (call core.isdefined slot₂/kws :y) + 18 (gotoifnot %₁₇ label₂₉) + 19 (call core.getfield slot₂/kws :y) + 20 TestMod.Bool + 21 (call core.isa %₁₉ %₂₀) + 22 (gotoifnot %₂₁ label₂₄) + 23 (goto label₂₇) + 24 TestMod.Bool + 25 (new core.TypeError :keyword argument :y %₂₄ %₁₉) + 26 (call core.throw %₂₅) + 27 (= slot₆/kwtmp %₁₉) + 28 (goto label₃₀) + 29 (= slot₆/kwtmp true) + 30 slot₆/kwtmp + 31 (call top.keys slot₂/kws) + 32 (call core.tuple :x :y) + 33 (call top.diff_names %₃₁ %₃₂) + 34 (call top.isempty %₃₃) + 35 (gotoifnot %₃₄ label₃₇) + 36 (goto label₃₈) + 37 (call top.kwerr slot₂/kws slot₃/#self# slot₄/a slot₅/b) + 38 TestMod.#f_kw_simple#0 + 39 (call %₃₈ %₁₆ %₃₀ slot₃/#self# slot₄/a slot₅/b) + 40 (return %₃₉) +48 latestworld +49 TestMod.f_kw_simple +50 (call core.Typeof %₄₉) +51 (call core.svec %₅₀) +52 (call core.svec) +53 SourceLocation::1:10 +54 (call core.svec %₅₁ %₅₂ %₅₃) +55 --- method core.nothing %₅₄ + slots: [slot₁/#self#(called)] + 1 (call slot₁/#self# 1 1.0) + 2 (return %₁) +56 latestworld +57 TestMod.f_kw_simple +58 (call core.Typeof %₅₇) +59 TestMod.Int +60 (call core.svec %₅₈ %₅₉) +61 (call core.svec) +62 SourceLocation::1:10 +63 (call core.svec %₆₀ %₆₁ %₆₂) +64 --- method core.nothing %₆₃ + slots: [slot₁/#self#(called) slot₂/a] + 1 (call slot₁/#self# slot₂/a 1.0) + 2 (return %₁) +65 latestworld +66 TestMod.f_kw_simple +67 (call core.Typeof %₆₆) +68 TestMod.Int +69 TestMod.Float64 +70 (call core.svec %₆₇ %₆₈ %₆₉) +71 (call core.svec) +72 SourceLocation::1:10 +73 (call core.svec %₇₀ %₇₁ %₇₂) +74 --- method core.nothing %₇₃ + slots: [slot₁/#self# slot₂/a slot₃/b] + 1 TestMod.#f_kw_simple#0 + 2 (call %₁ 'a' true slot₁/#self# slot₂/a slot₃/b) + 3 (return %₂) +75 latestworld +76 TestMod.f_kw_simple +77 (return %₇₆) + +######################################## +# Keyword slurping - simple forwarding of all kws +function f_kw_slurp_simple(; all_kws...) + all_kws +end +#--------------------- +1 (method TestMod.f_kw_slurp_simple) +2 latestworld +3 (method TestMod.#f_kw_slurp_simple#0) +4 latestworld +5 TestMod.#f_kw_slurp_simple#0 +6 (call core.Typeof %₅) +7 (call top.pairs core.NamedTuple) +8 TestMod.f_kw_slurp_simple +9 (call core.Typeof %₈) +10 (call core.svec %₆ %₇ %₉) +11 (call core.svec) +12 SourceLocation::1:10 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/all_kws slot₃/#self#(!read)] + 1 (meta :nkw 1) + 2 slot₂/all_kws + 3 (return %₂) +15 latestworld +16 (call core.typeof core.kwcall) +17 TestMod.f_kw_slurp_simple +18 (call core.Typeof %₁₇) +19 (call core.svec %₁₆ core.NamedTuple %₁₈) +20 (call core.svec) +21 SourceLocation::1:10 +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/all_kws(!read)] + 1 (newvar slot₄/all_kws) + 2 (call top.pairs slot₂/kws) + 3 TestMod.#f_kw_slurp_simple#0 + 4 (call %₃ %₂ slot₃/#self#) + 5 (return %₄) +24 latestworld +25 TestMod.f_kw_slurp_simple +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ + slots: [slot₁/#self#] + 1 TestMod.#f_kw_slurp_simple#0 + 2 (call core.NamedTuple) + 3 (call top.pairs %₂) + 4 (call %₁ %₃ slot₁/#self#) + 5 (return %₄) +32 latestworld +33 TestMod.f_kw_slurp_simple +34 (return %₃₃) + +######################################## +# Keyword slurping +function f_kw_slurp(; x=x_default, non_x_kws...) + all_kws +end +#--------------------- +1 (method TestMod.f_kw_slurp) +2 latestworld +3 (method TestMod.#f_kw_slurp#0) +4 latestworld +5 TestMod.#f_kw_slurp#0 +6 (call core.Typeof %₅) +7 (call top.pairs core.NamedTuple) +8 TestMod.f_kw_slurp +9 (call core.Typeof %₈) +10 (call core.svec %₆ core.Any %₇ %₉) +11 (call core.svec) +12 SourceLocation::1:10 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/non_x_kws(!read) slot₄/#self#(!read)] + 1 (meta :nkw 2) + 2 TestMod.all_kws + 3 (return %₂) +15 latestworld +16 (call core.typeof core.kwcall) +17 TestMod.f_kw_slurp +18 (call core.Typeof %₁₇) +19 (call core.svec %₁₆ core.NamedTuple %₁₈) +20 (call core.svec) +21 SourceLocation::1:10 +22 (call core.svec %₁₉ %₂₀ %₂₁) +23 --- method core.nothing %₂₂ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/kwtmp slot₅/non_x_kws(!read) slot₆/x(!read)] + 1 (newvar slot₅/non_x_kws) + 2 (newvar slot₆/x) + 3 (call core.isdefined slot₂/kws :x) + 4 (gotoifnot %₃ label₈) + 5 (call core.getfield slot₂/kws :x) + 6 (= slot₄/kwtmp %₅) + 7 (goto label₁₀) + 8 TestMod.x_default + 9 (= slot₄/kwtmp %₈) + 10 slot₄/kwtmp + 11 (call core.tuple :x) + 12 (call core.apply_type core.NamedTuple %₁₁) + 13 (call top.structdiff slot₂/kws %₁₂) + 14 (call top.pairs %₁₃) + 15 TestMod.#f_kw_slurp#0 + 16 (call %₁₅ %₁₀ %₁₄ slot₃/#self#) + 17 (return %₁₆) +24 latestworld +25 TestMod.f_kw_slurp +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::1:10 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- method core.nothing %₃₀ + slots: [slot₁/#self#] + 1 TestMod.#f_kw_slurp#0 + 2 TestMod.x_default + 3 (call core.NamedTuple) + 4 (call top.pairs %₃) + 5 (call %₁ %₂ %₄ slot₁/#self#) + 6 (return %₅) +32 latestworld +33 TestMod.f_kw_slurp +34 (return %₃₃) + +######################################## +# Static parameters used in keywords, with and without the static parameter +# being present in positional argument types. +# +# Here the wrong type for `b` will get a `TypeError` but `A` will need to rely +# on a MethodError. +function f_kw_sparams(x::X; a::A=a_def, b::X=b_def) where {X,A} + (X,A) +end +#--------------------- +1 (method TestMod.f_kw_sparams) +2 latestworld +3 (method TestMod.#f_kw_sparams#0) +4 latestworld +5 (= slot₂/X (call core.TypeVar :X)) +6 (= slot₁/A (call core.TypeVar :A)) +7 TestMod.#f_kw_sparams#0 +8 (call core.Typeof %₇) +9 slot₁/A +10 slot₂/X +11 TestMod.f_kw_sparams +12 (call core.Typeof %₁₁) +13 slot₂/X +14 (call core.svec %₈ %₉ %₁₀ %₁₂ %₁₃) +15 slot₂/X +16 slot₁/A +17 (call core.svec %₁₅ %₁₆) +18 SourceLocation::1:10 +19 (call core.svec %₁₄ %₁₇ %₁₈) +20 --- method core.nothing %₁₉ + slots: [slot₁/#self#(!read) slot₂/a(!read) slot₃/b(!read) slot₄/#self#(!read) slot₅/x(!read)] + 1 (meta :nkw 2) + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.tuple %₂ %₃) + 5 (return %₄) +21 latestworld +22 (= slot₄/X (call core.TypeVar :X)) +23 (= slot₃/A (call core.TypeVar :A)) +24 (call core.typeof core.kwcall) +25 TestMod.f_kw_sparams +26 (call core.Typeof %₂₅) +27 slot₄/X +28 (call core.svec %₂₄ core.NamedTuple %₂₆ %₂₇) +29 slot₄/X +30 (call core.svec %₂₉) +31 SourceLocation::1:10 +32 (call core.svec %₂₈ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read) slot₂/kws slot₃/#self# slot₄/x slot₅/kwtmp slot₆/a(!read) slot₇/b(!read)] + 1 (newvar slot₆/a) + 2 (newvar slot₇/b) + 3 (call core.isdefined slot₂/kws :a) + 4 (gotoifnot %₃ label₈) + 5 (call core.getfield slot₂/kws :a) + 6 (= slot₅/kwtmp %₅) + 7 (goto label₁₀) + 8 TestMod.a_def + 9 (= slot₅/kwtmp %₈) + 10 slot₅/kwtmp + 11 (call core.isdefined slot₂/kws :b) + 12 (gotoifnot %₁₁ label₂₃) + 13 (call core.getfield slot₂/kws :b) + 14 static_parameter₁ + 15 (call core.isa %₁₃ %₁₄) + 16 (gotoifnot %₁₅ label₁₈) + 17 (goto label₂₁) + 18 static_parameter₁ + 19 (new core.TypeError :keyword argument :b %₁₈ %₁₃) + 20 (call core.throw %₁₉) + 21 (= slot₅/kwtmp %₁₃) + 22 (goto label₂₅) + 23 TestMod.b_def + 24 (= slot₅/kwtmp %₂₃) + 25 slot₅/kwtmp + 26 (call top.keys slot₂/kws) + 27 (call core.tuple :a :b) + 28 (call top.diff_names %₂₆ %₂₇) + 29 (call top.isempty %₂₈) + 30 (gotoifnot %₂₉ label₃₂) + 31 (goto label₃₃) + 32 (call top.kwerr slot₂/kws slot₃/#self# slot₄/x) + 33 TestMod.#f_kw_sparams#0 + 34 (call %₃₃ %₁₀ %₂₅ slot₃/#self# slot₄/x) + 35 (return %₃₄) +34 latestworld +35 (= slot₆/X (call core.TypeVar :X)) +36 (= slot₅/A (call core.TypeVar :A)) +37 TestMod.f_kw_sparams +38 (call core.Typeof %₃₇) +39 slot₆/X +40 (call core.svec %₃₈ %₃₉) +41 slot₆/X +42 (call core.svec %₄₁) +43 SourceLocation::1:10 +44 (call core.svec %₄₀ %₄₂ %₄₃) +45 --- method core.nothing %₄₄ + slots: [slot₁/#self# slot₂/x] + 1 TestMod.#f_kw_sparams#0 + 2 TestMod.a_def + 3 TestMod.b_def + 4 (call %₁ %₂ %₃ slot₁/#self# slot₂/x) + 5 (return %₄) +46 latestworld +47 TestMod.f_kw_sparams +48 (return %₄₇) + +######################################## +# Error: Static parameter which is unused in keyword body arg types +function f_kw_sparams(x::X; a::A) where {X,Y,A} + (X,A) +end +#--------------------- +LoweringError: +function f_kw_sparams(x::X; a::A) where {X,Y,A} +# ╙ ── Method definition declares type variable but does not use it in the type of any function parameter + (X,A) +end + +######################################## +# Error: argument unpacking in keywords +function f_kw_destruct(; (x,y)=10) +end +#--------------------- +LoweringError: +function f_kw_destruct(; (x,y)=10) +# └───┘ ── Invalid keyword name +end + +######################################## +# Error: keyword slurping combined with a default +function f_kw_slurp_default(; kws...=def) +end +#--------------------- +LoweringError: +function f_kw_slurp_default(; kws...=def) +# └────────┘ ── keyword argument with `...` cannot have a default value +end + +######################################## +# Error: keyword slurping combined with type +function f_kw_slurp_type(; kws::T...) +end +#--------------------- +LoweringError: +function f_kw_slurp_type(; kws::T...) +# └───────┘ ── keyword argument with `...` may not be given a type +end + +######################################## +# Error: keyword slurping on non-final argument +function f_kw_slurp_not_last(; kws..., x=1) +end +#--------------------- +LoweringError: +function f_kw_slurp_not_last(; kws..., x=1) +# └────┘ ── `...` may only be used for the last keyword argument +end + +######################################## +# Fully generated function +@generated function f_only_generated(x, y) + generator_code(x,y) +end +#--------------------- +1 (method TestMod.f_only_generated) +2 latestworld +3 (method TestMod.#f_only_generated@generator#0) +4 latestworld +5 TestMod.#f_only_generated@generator#0 +6 (call core.Typeof %₅) +7 (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any) +8 (call core.svec) +9 SourceLocation::1:21 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize) slot₅/y(nospecialize)] + 1 TestMod.generator_code + 2 (call %₁ slot₄/x slot₅/y) + 3 (return %₂) +12 latestworld +13 TestMod.f_only_generated +14 (call core.Typeof %₁₃) +15 (call core.svec %₁₄ core.Any core.Any) +16 (call core.svec) +17 SourceLocation::1:21 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/x(!read) slot₃/y(!read)] + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_only_generated@generator#0 SourceRef(SourceFile("@generated function f_only_generated(x, y)\n generator_code(x,y)\nend", 0, nothing, 1, [1, 44, 68]), 1, (macrocall (macro_name 1-1::@-t 2-10::Identifier) 11-11::Whitespace-t (function 12-19::function-t 20-20::Whitespace-t (call 21-36::Identifier 37-37::(-t 38-38::Identifier 39-39::,-t 40-40::Whitespace-t 41-41::Identifier 42-42::)-t) (block 43-47::NewlineWs-t (call 48-61::Identifier 62-62::(-t 63-63::Identifier 64-64::,-t 65-65::Identifier 66-66::)-t) 67-67::NewlineWs-t) 68-70::end-t))) (call core.svec :#self# :x :y) (call core.svec))) + 2 (meta :generated_only) + 3 (return core.nothing) +20 latestworld +21 TestMod.f_only_generated +22 (return %₂₁) + +######################################## +# Partially generated function with `if @generated` +function f_partially_generated(x, y) + nongen_stuff = bothgen(x, y) + if @generated + quote + maybe_gen_stuff = some_gen_stuff(x, y) + end + else + maybe_gen_stuff = some_nongen_stuff(x, y) + end + (nongen_stuff, maybe_gen_stuff) +end +#--------------------- +1 (method TestMod.f_partially_generated) +2 latestworld +3 (method TestMod.#f_partially_generated@generator#0) +4 latestworld +5 TestMod.#f_partially_generated@generator#0 +6 (call core.Typeof %₅) +7 (call core.svec %₆ JuliaLowering.MacroContext core.Any core.Any core.Any) +8 (call core.svec) +9 SourceLocation::1:10 +10 (call core.svec %₇ %₈ %₉) +11 --- method core.nothing %₁₀ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/#self#(!read) slot₄/x(nospecialize,!read) slot₅/y(nospecialize,!read)] + 1 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))) + 2 (call core.tuple %₁) + 3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (block (= nongen_stuff (call bothgen x y)) ($ (block (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (= maybe_gen_stuff (call some_gen_stuff x y))))))) (tuple-p nongen_stuff maybe_gen_stuff)))) %₂) + 4 (return %₃) +12 latestworld +13 TestMod.f_partially_generated +14 (call core.Typeof %₁₃) +15 (call core.svec %₁₄ core.Any core.Any) +16 (call core.svec) +17 SourceLocation::1:10 +18 (call core.svec %₁₅ %₁₆ %₁₇) +19 --- method core.nothing %₁₈ + slots: [slot₁/#self#(!read) slot₂/x slot₃/y slot₄/maybe_gen_stuff slot₅/nongen_stuff] + 1 (meta :generated (new JuliaLowering.GeneratedFunctionStub TestMod.#f_partially_generated@generator#0 SourceRef(SourceFile("function f_partially_generated(x, y)\n nongen_stuff = bothgen(x, y)\n if @generated\n quote\n maybe_gen_stuff = some_gen_stuff(x, y)\n end\n else\n maybe_gen_stuff = some_nongen_stuff(x, y)\n end\n (nongen_stuff, maybe_gen_stuff)\nend", 0, nothing, 1, [1, 38, 71, 89, 103, 154, 166, 175, 225, 233, 269]), 1, (function 1-8::function-t 9-9::Whitespace-t (call 10-30::Identifier 31-31::(-t 32-32::Identifier 33-33::,-t 34-34::Whitespace-t 35-35::Identifier 36-36::)-t) (block 37-41::NewlineWs-t (= 42-53::Identifier 54-54::Whitespace-t 55-55::=-t 56-56::Whitespace-t (call 57-63::Identifier 64-64::(-t 65-65::Identifier 66-66::,-t 67-67::Whitespace-t 68-68::Identifier 69-69::)-t)) 70-74::NewlineWs-t (if 75-76::if-t 77-77::Whitespace-t (macrocall (macro_name 78-78::@-t 79-87::Identifier)) (block 88-96::NewlineWs-t (quote (block 97-101::quote-t 102-114::NewlineWs-t (= 115-129::Identifier 130-130::Whitespace-t 131-131::=-t 132-132::Whitespace-t (call 133-146::Identifier 147-147::(-t 148-148::Identifier 149-149::,-t 150-150::Whitespace-t 151-151::Identifier 152-152::)-t)) 153-161::NewlineWs-t 162-164::end-t)) 165-169::NewlineWs-t) 170-173::else-t (block 174-182::NewlineWs-t (= 183-197::Identifier 198-198::Whitespace-t 199-199::=-t 200-200::Whitespace-t (call 201-217::Identifier 218-218::(-t 219-219::Identifier 220-220::,-t 221-221::Whitespace-t 222-222::Identifier 223-223::)-t)) 224-228::NewlineWs-t) 229-231::end-t) 232-236::NewlineWs-t (tuple-p 237-237::(-t 238-249::Identifier 250-250::,-t 251-251::Whitespace-t 252-266::Identifier 267-267::)-t) 268-268::NewlineWs-t) 269-271::end-t)) (call core.svec :#self# :x :y) (call core.svec))) + 2 TestMod.bothgen + 3 (= slot₅/nongen_stuff (call %₂ slot₂/x slot₃/y)) + 4 TestMod.some_nongen_stuff + 5 (= slot₄/maybe_gen_stuff (call %₄ slot₂/x slot₃/y)) + 6 slot₅/nongen_stuff + 7 slot₄/maybe_gen_stuff + 8 (call core.tuple %₆ %₇) + 9 (return %₈) +20 latestworld +21 TestMod.f_partially_generated +22 (return %₂₁) diff --git a/JuliaLowering/test/generators.jl b/JuliaLowering/test/generators.jl new file mode 100644 index 0000000000000..7dce6236afe20 --- /dev/null +++ b/JuliaLowering/test/generators.jl @@ -0,0 +1,76 @@ +@testset "Generators" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +collect(x^2 for x in 1:3) +""") == [1,4,9] + +@test JuliaLowering.include_string(test_mod, """ +collect(x for x in 1:5 if isodd(x)) +""") == [1,3,5] + +@test JuliaLowering.include_string(test_mod, """ +collect((y,x) for (x,y) in zip(1:3, 2:4) if y != 3) +""") == [(2,1), (4,3)] + +# product iterator +@test JuliaLowering.include_string(test_mod, """ +collect((x,y) for x in 1:3, y in 1:2) +""") == [(1,1) (1,2) + (2,1) (2,2) + (3,1) (3,2)] + +# flattened iterator +@test JuliaLowering.include_string(test_mod, """ +collect((x,y,z) for x in 1:3, y in 4:5 for z in 6:7) +""") == [ + (1,4,6) + (1,4,7) + (2,4,6) + (2,4,7) + (3,4,6) + (3,4,7) + (1,5,6) + (1,5,7) + (2,5,6) + (2,5,7) + (3,5,6) + (3,5,7) +] + +# Duplicate iteration variables - body sees only innermost +@test JuliaLowering.include_string(test_mod, """ +collect(x for x in 1:3 for x in 1:2) +""") == [1, 2, 1, 2, 1, 2] + +# Outer iteration variables are protected from mutation +@test JuliaLowering.include_string(test_mod, """ +collect((z=y; y=100; z) for y in 1:3 for x in 1:2) +""") == [1, 1, 2, 2, 3, 3] + +# Simple typed comprehension lowered to for loops +@test JuliaLowering.include_string(test_mod, """ +Tuple{Int,Int}[(x,y) for x in 1:2, y in 1:3] +""") == [(1,1) (1,2) (1,3) + (2,1) (2,2) (2,3)] + +# Triply nested comprehension +@test JuliaLowering.include_string(test_mod, """ +[(x,y,z) for x in 1:3 for y in 4:5 for z in 6:7] +""") == [ + (1, 4, 6) + (1, 4, 7) + (1, 5, 6) + (1, 5, 7) + (2, 4, 6) + (2, 4, 7) + (2, 5, 6) + (2, 5, 7) + (3, 4, 6) + (3, 4, 7) + (3, 5, 6) + (3, 5, 7) +] + +end diff --git a/JuliaLowering/test/generators_ir.jl b/JuliaLowering/test/generators_ir.jl new file mode 100644 index 0000000000000..28f0241c92e93 --- /dev/null +++ b/JuliaLowering/test/generators_ir.jl @@ -0,0 +1,299 @@ +######################################## +# Simple 1D generator +(x+1 for x in xs) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##0 %₁ %₂) +4 latestworld +5 TestMod.#->##0 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/x] + 1 TestMod.+ + 2 (call %₁ slot₂/x 1) + 3 (return %₂) +11 latestworld +12 TestMod.#->##0 +13 (new %₁₂) +14 TestMod.xs +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) + +######################################## +# Product iteration +(x+y for x in xs, y in ys) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##1 %₁ %₂) +4 latestworld +5 TestMod.#->##1 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) + 6 (= slot₅/y (call core.getfield %₅ 1)) + 7 TestMod.+ + 8 slot₄/x + 9 slot₅/y + 10 (call %₇ %₈ %₉) + 11 (return %₁₀) +11 latestworld +12 TestMod.#->##1 +13 (new %₁₂) +14 TestMod.xs +15 TestMod.ys +16 (call top.product %₁₄ %₁₅) +17 (call top.Generator %₁₃ %₁₆) +18 (return %₁₇) + +######################################## +# Use `identity` as the Generator function when possible eg in filters +((x,y) for (x,y) in iter if f(x)) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##2 %₁ %₂) +4 latestworld +5 TestMod.#->##2 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:29 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) + 6 (= slot₅/y (call core.getfield %₅ 1)) + 7 TestMod.f + 8 slot₄/x + 9 (call %₇ %₈) + 10 (return %₉) +11 latestworld +12 TestMod.#->##2 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Filter %₁₃ %₁₄) +16 (call top.Generator top.identity %₁₅) +17 (return %₁₆) + +######################################## +# Use of placeholders in iteration vars +(1 for _ in xs) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##3 %₁ %₂) +4 latestworld +5 TestMod.#->##3 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 (return 1) +11 latestworld +12 TestMod.#->##3 +13 (new %₁₂) +14 TestMod.xs +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) + +######################################## +# Error: Use of placeholders in body +(_ for _ in xs) +#--------------------- +LoweringError: +(_ for _ in xs) +#╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions + +######################################## +# 1D generator with destructuring +(body for (x,_,y) in iter) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##5 %₁ %₂) +4 latestworld +5 TestMod.#->##5 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:2 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/destructured_arg slot₃/iterstate slot₄/x(!read) slot₅/y(!read)] + 1 (call top.indexed_iterate slot₂/destructured_arg 1) + 2 (= slot₄/x (call core.getfield %₁ 1)) + 3 (= slot₃/iterstate (call core.getfield %₁ 2)) + 4 slot₃/iterstate + 5 (call top.indexed_iterate slot₂/destructured_arg 2 %₄) + 6 (call core.getfield %₅ 1) + 7 (= slot₃/iterstate (call core.getfield %₅ 2)) + 8 slot₃/iterstate + 9 (call top.indexed_iterate slot₂/destructured_arg 3 %₈) + 10 (= slot₅/y (call core.getfield %₉ 1)) + 11 TestMod.body + 12 (return %₁₁) +11 latestworld +12 TestMod.#->##5 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) + +######################################## +# return permitted in quoted syntax in generator +(:(return x) for _ in iter) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##6 %₁ %₂) +4 latestworld +5 TestMod.#->##6 +6 (call core.svec %₅ core.Any) +7 (call core.svec) +8 SourceLocation::1:4 +9 (call core.svec %₆ %₇ %₈) +10 --- method core.nothing %₉ + slots: [slot₁/#self#(!read) slot₂/_(!read)] + 1 (call JuliaLowering.interpolate_ast SyntaxTree (inert (return x))) + 2 (return %₁) +11 latestworld +12 TestMod.#->##6 +13 (new %₁₂) +14 TestMod.iter +15 (call top.Generator %₁₃ %₁₄) +16 (return %₁₅) + +######################################## +# Error: `return` not permitted in generator body +((return x) + y for x in iter) +#--------------------- +LoweringError: +((return x) + y for x in iter) +# └──────┘ ── `return` not allowed inside comprehension or generator + +######################################## +# Nested case with duplicate iteration variables +(x for x in 1:3 for x in 1:2) +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#->##7 %₁ %₂) +4 latestworld +5 (call core.svec) +6 (call core.svec) +7 (call JuliaLowering.eval_closure_type TestMod :#->#->##0 %₅ %₆) +8 latestworld +9 TestMod.#->#->##0 +10 (call core.svec %₉ core.Any) +11 (call core.svec) +12 SourceLocation::1:2 +13 (call core.svec %₁₀ %₁₁ %₁₂) +14 --- method core.nothing %₁₃ + slots: [slot₁/#self#(!read) slot₂/x slot₃/x] + 1 slot₂/x + 2 (= slot₃/x %₁) + 3 slot₃/x + 4 (return %₃) +15 latestworld +16 TestMod.#->##7 +17 (call core.svec %₁₆ core.Any) +18 (call core.svec) +19 SourceLocation::1:2 +20 (call core.svec %₁₇ %₁₈ %₁₉) +21 --- method core.nothing %₂₀ + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.#->#->##0 + 2 (new %₁) + 3 TestMod.: + 4 (call %₃ 1 2) + 5 (call top.Generator %₂ %₄) + 6 (return %₅) +22 latestworld +23 TestMod.#->##7 +24 (new %₂₃) +25 TestMod.: +26 (call %₂₅ 1 3) +27 (call top.Generator %₂₄ %₂₆) +28 (call top.Flatten %₂₇) +29 (return %₂₈) + +######################################## +# Comprehension lowers to generator with collect +[x for x in xs] +#--------------------- +1 TestMod.xs +2 (call top.Generator top.identity %₁) +3 (call top.collect %₂) +4 (return %₃) + +######################################## +# Simple typed comprehension lowers to for loop +T[(x,y) for x in xs, y in ys] +#--------------------- +1 TestMod.xs +2 TestMod.ys +3 (call top.product %₁ %₂) +4 (call top.IteratorSize %₃) +5 (call core.isa %₄ top.SizeUnknown) +6 TestMod.T +7 (call top._array_for %₆ %₃ %₄) +8 (call top.LinearIndices %₇) +9 (= slot₁/idx (call top.first %₈)) +10 (= slot₃/next (call top.iterate %₂)) +11 slot₃/next +12 (call core.=== %₁₁ core.nothing) +13 (call top.not_int %₁₂) +14 (gotoifnot %₁₃ label₅₀) +15 slot₃/next +16 (= slot₄/y (call core.getfield %₁₅ 1)) +17 (call core.getfield %₁₅ 2) +18 (= slot₂/next (call top.iterate %₁)) +19 slot₂/next +20 (call core.=== %₁₉ core.nothing) +21 (call top.not_int %₂₀) +22 (gotoifnot %₂₁ label₄₄) +23 slot₄/y +24 (= slot₆/y %₂₃) +25 slot₂/next +26 (= slot₅/x (call core.getfield %₂₅ 1)) +27 (call core.getfield %₂₅ 2) +28 slot₅/x +29 slot₆/y +30 (call core.tuple %₂₈ %₂₉) +31 (gotoifnot %₅ label₃₄) +32 (call top.push! %₇ %₃₀) +33 (goto label₃₆) +34 slot₁/idx +35 (call top.setindex! %₇ %₃₀ %₃₄) +36 slot₁/idx +37 (= slot₁/idx (call top.add_int %₃₆ 1)) +38 (= slot₂/next (call top.iterate %₁ %₂₇)) +39 slot₂/next +40 (call core.=== %₃₉ core.nothing) +41 (call top.not_int %₄₀) +42 (gotoifnot %₄₁ label₄₄) +43 (goto label₂₃) +44 (= slot₃/next (call top.iterate %₂ %₁₇)) +45 slot₃/next +46 (call core.=== %₄₅ core.nothing) +47 (call top.not_int %₄₆) +48 (gotoifnot %₄₇ label₅₀) +49 (goto label₁₅) +50 (return %₇) diff --git a/JuliaLowering/test/hooks.jl b/JuliaLowering/test/hooks.jl new file mode 100644 index 0000000000000..823e9b6027906 --- /dev/null +++ b/JuliaLowering/test/hooks.jl @@ -0,0 +1,86 @@ +const JL = JuliaLowering + +@testset "hooks" begin + test_mod = Module() + + @testset "`core_lowering_hook`" begin + # Non-AST types are often sent through lowering + stuff = Any[LineNumberNode(1), 123, 123.123, true, "foo", test_mod] + for s in stuff + @test JL.core_lowering_hook(s, test_mod) == Core.svec(s) + end + + for ast_type in (Expr, JL.SyntaxTree) + ex = parsestmt(ast_type, "[1,2,3] .+= 1") + out = JL.core_lowering_hook(ex, test_mod) + @test out isa Core.SimpleVector && out[1] isa Expr + val = Core.eval(test_mod, out[1]) + @test val == [2,3,4] + end + + # file argument mismatch with embedded linenumbernodes shouldn't crash + ex = Expr(:block, LineNumberNode(111), :(x = 1), LineNumberNode(222), :(x + 1)) + lwr = JuliaLowering.core_lowering_hook(ex, test_mod, "foo.jl", 333)[1] + @test Core.eval(test_mod, lwr) === 2 + end + + if isdefined(Core, :_lower) + function jeval(str) + prog = parseall(Expr, str) + local out + try + JL.activate!() + out = Core.eval(test_mod, prog) + finally + JL.activate!(false) + end + end + @testset "integration: `JuliaLowering.activate!`" begin + out = jeval("global asdf = 1") + @test out === 1 + @test isdefined(test_mod, :asdf) + + out = jeval("module M; x = 1; end") + @test out isa Module + @test isdefined(test_mod, :M) + @test isdefined(test_mod.M, :x) + + # Tricky cases with symbols + out = jeval("""module M2 + Base.@constprop :aggressive function f(x); x; end + const what = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), Core.nothing) + end""") + @test out isa Module + @test isdefined(test_mod, :M2) + @test isdefined(test_mod.M2, :f) + @test isdefined(test_mod.M2, :what) + + out = jeval(""" "docstring" module M3 end """) + @test out isa Module + @test isdefined(test_mod, :M3) + + # Macros may produce toplevel expressions. Note that julia handles + # this case badly (macro expansion replaces M5_inner with a + # globalref) and we handle esc(:M5_inner) badly + out = jeval("""module M5 + macro newmod() + return quote + let a = 1 + $(Expr(:toplevel, + Expr(:module, true, :M5_inner, + Expr(:block, :(global asdf = 1))))) + end + end + end + @newmod() + end""") + @test out isa Module + @test isdefined(test_mod, :M5) + @test isdefined(test_mod.M5, :M5_inner) + @test isdefined(test_mod.M5.M5_inner, :asdf) + + # TODO: broken, commented to prevent error logging + # @test jeval("Base.@propagate_inbounds @inline meta_double_quote_issue(x) = x") isa Function + end + end +end diff --git a/JuliaLowering/test/import.jl b/JuliaLowering/test/import.jl new file mode 100644 index 0000000000000..74cdd9260149e --- /dev/null +++ b/JuliaLowering/test/import.jl @@ -0,0 +1,80 @@ +@testset "using / import" begin + +test_mod = Module() + +# Test attributes are correctly set for export/public +JuliaLowering.include_string(test_mod, """ +x = 1 +y = 2 +export x +public y +""") +@test Base.isexported(test_mod, :x) +@test Base.ispublic(test_mod, :x) +@test Base.ispublic(test_mod, :y) +@test !Base.isexported(test_mod, :y) + +# Test various forms of `using` +C = JuliaLowering.include_string(test_mod, """ +module C + module D + export x + public y, f + x = [101] + y = [202] + + function f() + "hi" + end + end + module E + using ..D: f + using ..D + using .D: y as D_y + using .D: x as D_x_2, y as D_y_2 + import .D.y as D_y_3 + end +end +""") +@test C.D.f === C.E.f +@test C.D.x === C.E.x +@test C.D.y === C.E.D_y +@test C.D.x === C.E.D_x_2 +@test C.D.y === C.E.D_y_2 +@test C.D.y === C.E.D_y_3 + +# Test that using F brings in the exported symbol G immediately and that it can +# be used next in the import list. +F = JuliaLowering.include_string(test_mod, """ +module F + export G + module G + export G_global + G_global = "exported from G" + end +end +""") +JuliaLowering.include_string(test_mod, """ +using .F, .G +""") +@test test_mod.F === F +@test test_mod.G === F.G +@test test_mod.G_global === "exported from G" + +# Similarly, that import makes symbols available immediately +H = JuliaLowering.include_string(test_mod, """ +module H + module I + module J + end + end +end +""") +JuliaLowering.include_string(test_mod, """ +import .H.I, .I.J +""") +@test test_mod.I === H.I +@test test_mod.J === H.I.J +@test test_mod.G_global === "exported from G" + +end diff --git a/JuliaLowering/test/import_ir.jl b/JuliaLowering/test/import_ir.jl new file mode 100644 index 0000000000000..8f34f5f0c4939 --- /dev/null +++ b/JuliaLowering/test/import_ir.jl @@ -0,0 +1,69 @@ +######################################## +# Basic import +import A: b +#--------------------- +1 (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A)))))) :($(QuoteNode(:($(Expr(:., :b))))))) +2 latestworld +3 (return core.nothing) + +######################################## +# Import with paths and `as` +import A.B.C: b, c.d as e +#--------------------- +1 (call JuliaLowering.eval_import true TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e))))) +2 latestworld +3 (return core.nothing) + +######################################## +# Imports without `from` module need separating with latestworld +import A, B +#--------------------- +1 (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :A))))))) +2 latestworld +3 (call JuliaLowering.eval_import true TestMod top.nothing :($(QuoteNode(:($(Expr(:., :B))))))) +4 latestworld +5 (return core.nothing) + +######################################## +# Multiple usings need separating with latestworld +using A, B +#--------------------- +1 (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :A))))))) +2 latestworld +3 (call JuliaLowering.eval_using TestMod :($(QuoteNode(:($(Expr(:., :B))))))) +4 latestworld +5 (return core.nothing) + +######################################## +# Using with paths and `as` +using A.B.C: b, c.d as e +#--------------------- +1 (call JuliaLowering.eval_import false TestMod :($(QuoteNode(:($(Expr(:., :A, :B, :C)))))) :($(QuoteNode(:($(Expr(:., :b)))))) :($(QuoteNode(:(c.d as e))))) +2 latestworld +3 (return core.nothing) + +######################################## +# Error: Import not at top level +function f() + import A: b +end +#--------------------- +LoweringError: +function f() + import A: b +# └─────────┘ ── this syntax is only allowed in top level code +end + +######################################## +# Export +export a, b, c +#--------------------- +1 (call JuliaLowering.eval_public TestMod true ["a", "b", "c"]) +2 (return %₁) + +######################################## +# Public +public a, b, c +#--------------------- +1 (call JuliaLowering.eval_public TestMod false ["a", "b", "c"]) +2 (return %₁) diff --git a/JuliaLowering/test/ir_tests.jl b/JuliaLowering/test/ir_tests.jl new file mode 100644 index 0000000000000..3035a6f3a7bc4 --- /dev/null +++ b/JuliaLowering/test/ir_tests.jl @@ -0,0 +1,10 @@ +@testset "IR tests" begin + testdir = @__DIR__ + for filename in readdir(testdir) + if endswith(filename, "_ir.jl") + @testset "$filename" begin + test_ir_cases(joinpath(testdir, filename)) + end + end + end +end diff --git a/JuliaLowering/test/loops.jl b/JuliaLowering/test/loops.jl new file mode 100644 index 0000000000000..6f63b28cc2b0e --- /dev/null +++ b/JuliaLowering/test/loops.jl @@ -0,0 +1,279 @@ + +@testset "while loops" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + push!(a, i) + end + a +end +""") == [1,2,3,4,5] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + if i == 3 + break + end + push!(a, i) + end + a +end +""") == [1,2] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 0 + while i < 5 + i = i + 1 + if isodd(i) + continue + end + push!(a, i) + end + a +end +""") == [2,4] + +end + +@testset "for loops" begin + +test_mod = Module() + +# iteration +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:3 + push!(a, i) + end + a +end +""") == [1,2,3] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:0 + push!(a, i) + end + a +end +""") == [] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for _ = 1:3 + push!(a, 1) + end + a +end +""") == [1, 1, 1] + +# break +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:6 + if i == 3 + break + end + push!(a, i) + end + a +end +""") == [1, 2] +# Break from inner nested loop +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i in 1:2 + for j in 3:4 + push!(a, (i, j)) + j == 6 && break + end + end + a +end +""") == [(1, 3), (1, 4), (2, 3), (2, 4)] + +# continue +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:6 + if isodd(i) + continue + end + push!(a, i) + end + a +end +""") == [2, 4, 6] + +# Loop variable scope +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:3 + push!(a, i) + i = 100 + end + a +end +""") == [1,2,3] + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + for i = 1:3 + end + i +end +""") == 100 + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + for outer i = 1:2 + nothing + end + i +end +""") == 2 + +# Fancy for loop left hand side - unpacking and scoping +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + i = 100 + j = 200 + for (i,j) in [('a', 'b'), (1,2)] + push!(a, (i,j)) + end + (a, i, j) +end +""") == ([('a', 'b'), (1,2)], 100, 200) + +end + + +@testset "multidimensional for loops" begin + +test_mod = Module() + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + end + a +end +""") == [(1,3), (1,4), (2,3), (2,4)] + +@testset "break/continue" begin + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + break + end + a +end +""") == [(1,3)] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:4, j = 3:4 + if isodd(i) + continue + end + push!(a, (i,j)) + end + a +end +""") == [(2,3), (2,4), (4,3), (4,4)] + +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 1:4 + if isodd(j) + continue + end + push!(a, (i,j)) + end + a +end +""") == [(1,2), (1,4), (2,2), (2,4)] + + +end + + +@testset "Loop variable scope" begin + +# Test that `i` is copied in the inner loop +@test JuliaLowering.include_string(test_mod, """ +let + a = [] + for i = 1:2, j = 3:4 + push!(a, (i,j)) + i = 100 + end + a +end +""") == [(1,3), (1,4), (2,3), (2,4)] + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for i = 1:2, j = 3:4 + nothing + end + (i,j) +end +""") == (100,200) + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for outer i = 1:2, j = 3:4 + nothing + end + (i,j) +end +""") == (2,200) + +@test JuliaLowering.include_string(test_mod, """ +let + i = 100 + j = 200 + for i = 1:2, outer j = 3:4 + nothing + end + (i,j) +end +""") == (100,4) + +end + +end diff --git a/JuliaLowering/test/loops_ir.jl b/JuliaLowering/test/loops_ir.jl new file mode 100644 index 0000000000000..709322a084c68 --- /dev/null +++ b/JuliaLowering/test/loops_ir.jl @@ -0,0 +1,146 @@ +######################################## +# Basic while loop +while f(a) + body1 + body2 +end +#--------------------- +1 TestMod.f +2 TestMod.a +3 (call %₁ %₂) +4 (gotoifnot %₃ label₈) +5 TestMod.body1 +6 TestMod.body2 +7 (goto label₁) +8 (return core.nothing) + +######################################## +# While loop with short circuit condition +while a && b + body +end +#--------------------- +1 TestMod.a +2 (gotoifnot %₁ label₇) +3 TestMod.b +4 (gotoifnot %₃ label₇) +5 TestMod.body +6 (goto label₁) +7 (return core.nothing) + +######################################## +# While loop with with break and continue +while cond + body1 + break + body2 + continue + body3 +end +#--------------------- +1 TestMod.cond +2 (gotoifnot %₁ label₉) +3 TestMod.body1 +4 (goto label₉) +5 TestMod.body2 +6 (goto label₈) +7 TestMod.body3 +8 (goto label₁) +9 (return core.nothing) + +######################################## +# Basic for loop +for x in xs + body +end +#--------------------- +1 TestMod.xs +2 (= slot₁/next (call top.iterate %₁)) +3 slot₁/next +4 (call core.=== %₃ core.nothing) +5 (call top.not_int %₄) +6 (gotoifnot %₅ label₁₇) +7 slot₁/next +8 (= slot₂/x (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 TestMod.body +11 (= slot₁/next (call top.iterate %₁ %₉)) +12 slot₁/next +13 (call core.=== %₁₂ core.nothing) +14 (call top.not_int %₁₃) +15 (gotoifnot %₁₄ label₁₇) +16 (goto label₇) +17 (return core.nothing) + +######################################## +# Syntax sugar for nested for loop +for x in xs, y in ys + x = 10 # Copy of x; does not overwrite x iteration var +end +#--------------------- +1 TestMod.xs +2 (= slot₂/next (call top.iterate %₁)) +3 slot₂/next +4 (call core.=== %₃ core.nothing) +5 (call top.not_int %₄) +6 (gotoifnot %₅ label₃₄) +7 slot₂/next +8 (= slot₃/x (call core.getfield %₇ 1)) +9 (call core.getfield %₇ 2) +10 TestMod.ys +11 (= slot₁/next (call top.iterate %₁₀)) +12 slot₁/next +13 (call core.=== %₁₂ core.nothing) +14 (call top.not_int %₁₃) +15 (gotoifnot %₁₄ label₂₈) +16 slot₃/x +17 (= slot₄/x %₁₆) +18 slot₁/next +19 (= slot₅/y (call core.getfield %₁₈ 1)) +20 (call core.getfield %₁₈ 2) +21 (= slot₄/x 10) +22 (= slot₁/next (call top.iterate %₁₀ %₂₀)) +23 slot₁/next +24 (call core.=== %₂₃ core.nothing) +25 (call top.not_int %₂₄) +26 (gotoifnot %₂₅ label₂₈) +27 (goto label₁₆) +28 (= slot₂/next (call top.iterate %₁ %₉)) +29 slot₂/next +30 (call core.=== %₂₉ core.nothing) +31 (call top.not_int %₃₀) +32 (gotoifnot %₃₁ label₃₄) +33 (goto label₇) +34 (return core.nothing) + +######################################## +# Error: break outside for/while +break +#--------------------- +LoweringError: +break +└───┘ ── break must be used inside a `while` or `for` loop + +######################################## +# Error: continue outside for/while +continue +#--------------------- +LoweringError: +continue +└──────┘ ── continue must be used inside a `while` or `for` loop + +######################################## +# Error: `outer` without outer local variable +let + for outer i = 1:2 + nothing + end + i +end +#--------------------- +LoweringError: +let + for outer i = 1:2 +# ╙ ── `outer` annotations must match with a local variable in an outer scope but no such variable was found + nothing + end diff --git a/JuliaLowering/test/macros.jl b/JuliaLowering/test/macros.jl new file mode 100644 index 0000000000000..d92b3243a76b4 --- /dev/null +++ b/JuliaLowering/test/macros.jl @@ -0,0 +1,486 @@ +@testset "macro tests" begin + +test_mod = Module(:macro_test) +Base.eval(test_mod, :(const var"@ast" = $(JuliaLowering.var"@ast"))) +Base.eval(test_mod, :(const var"@K_str" = $(JuliaLowering.var"@K_str"))) + +# These libraries may either be packages or vendored into Base - need to pull +# them in via relative paths in the `using` statements below. +Base.eval(test_mod, :(const JuliaLowering = $(JuliaLowering))) +Base.eval(test_mod, :(const JuliaSyntax = $(JuliaSyntax))) + +JuliaLowering.include_string(test_mod, raw""" +module M + using ..JuliaLowering: JuliaLowering, adopt_scope + using ..JuliaSyntax + + # Introspection + macro __MODULE__() + __context__.scope_layer.mod + end + + macro __FILE__() + JuliaLowering.filename(__context__.macrocall) + end + + macro __LINE__() + JuliaLowering.source_location(__context__.macrocall)[1] + end + + someglobal = "global in module M" + + # Macro with local variables + macro foo(ex) + :(begin + x = "`x` from @foo" + (x, someglobal, $ex) + end) + end + + # Set `a_global` in M + macro set_a_global(val) + :(begin + global a_global = $val + end) + end + + macro set_other_global(ex, val) + :(begin + global $ex = $val + end) + end + + macro set_global_in_parent(ex) + e1 = adopt_scope(:(sym_introduced_from_M), __context__) + quote + $e1 = $ex + nothing + end + end + + macro inner() + :(y) + end + + macro outer() + :((x, @inner)) + end + + macro recursive(N) + Nval = N.value::Int + if Nval < 1 + return N + end + quote + x = $N + (x, @recursive $(Nval-1)) + end + end +end +""") + +@test JuliaLowering.include_string(test_mod, """ +let + x = "`x` from outer scope" + M.@foo x +end +""") == ("`x` from @foo", "global in module M", "`x` from outer scope") +@test !isdefined(test_mod.M, :x) + + +@test JuliaLowering.include_string(test_mod, """ +#line1 +(M.@__MODULE__(), M.@__FILE__(), M.@__LINE__()) +""", "foo.jl") == (test_mod, "foo.jl", 2) + +@test !isdefined(test_mod.M, :a_global) +@test JuliaLowering.include_string(test_mod, """ +begin + M.@set_a_global 42 + M.a_global +end +""") == 42 + +JuliaLowering.include_string(test_mod, """ +M.@set_global_in_parent "bent hygiene!" +""") +@test test_mod.sym_introduced_from_M == "bent hygiene!" + +JuliaLowering.include_string(test_mod, "M.@set_other_global global_in_test_mod 100") +@test !isdefined(test_mod.M, :global_in_test_mod) +@test test_mod.global_in_test_mod == 100 + +@test JuliaLowering.include_string(test_mod, """ +M.@recursive 3 +""") == (3, (2, (1, 0))) + +ex = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "M.@outer()", filename="foo.jl") +ctx, expanded = JuliaLowering.expand_forms_1(test_mod, ex, false, Base.get_world_counter()) +@test JuliaLowering.sourcetext.(JuliaLowering.flattened_provenance(expanded[2])) == [ + "M.@outer()" + "@inner" + "y" +] + +# World age support for macro expansion +JuliaLowering.include_string(test_mod, raw""" +macro world_age_test() + :(world1) +end +""") +world1 = Base.get_world_counter() +JuliaLowering.include_string(test_mod, raw""" +macro world_age_test() + :(world2) +end +""") +world2 = Base.get_world_counter() + +call_world_arg_test = JuliaLowering.parsestmt(JuliaLowering.SyntaxTree, "@world_age_test()") +@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world1)[2] ≈ + @ast_ "world1"::K"Identifier" +@test JuliaLowering.expand_forms_1(test_mod, call_world_arg_test, false, world2)[2] ≈ + @ast_ "world2"::K"Identifier" + +# Layer parenting +@test expanded[1].scope_layer == 2 +@test expanded[2].scope_layer == 3 +@test getfield.(ctx.scope_layers, :parent_layer) == [0,1,2] + +JuliaLowering.include_string(test_mod, """ +f_throw(x) = throw(x) +macro m_throw(x) + :(\$(f_throw(x))) +end +""") +let (err, st) = try + JuliaLowering.include_string(test_mod, "_never_exist = @m_throw 42") + catch e + e, stacktrace(catch_backtrace()) + end + @test err isa JuliaLowering.MacroExpansionError + @test !isnothing(err.err) + # Check that `catch_backtrace` can capture the stacktrace of the macro functions + @test any(sf->sf.func===:f_throw, st) + @test any(sf->sf.func===Symbol("@m_throw"), st) +end + +let err = try + JuliaLowering.include_string(test_mod, "_never_exist = @m_not_exist 42") + catch e + e + end + @test err isa JuliaLowering.MacroExpansionError + @test err.msg == "Macro not found" + @test err.err isa UndefVarError +end + +@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)::Csize_t") == 3 +@test JuliaLowering.include_string(test_mod, "@ccall strlen(\"asdf\"::Cstring)::Csize_t gc_safe=true") == 4 +@test JuliaLowering.include_string(test_mod, """ +begin + buf = zeros(UInt8, 20) + @ccall sprintf(buf::Ptr{UInt8}, "num:%d str:%s"::Cstring; 42::Cint, "hello"::Cstring)::Cint + String(buf) +end +""") == "num:42 str:hello\0\0\0\0" + +let (err, st) = try + JuliaLowering.include_string(test_mod, "@ccall strlen(\"foo\"::Cstring)") + catch e + e, stacktrace(catch_backtrace()) + end + @test err isa JuliaLowering.MacroExpansionError + @test err.msg == "Expected a return type annotation `::SomeType`" + @test isnothing(err.err) + # Check that `catch_backtrace` can capture the stacktrace of the macro function + @test any(sf->sf.func===:ccall_macro_parse, st) +end + +# Tests for interop between old and new-style macros + +# Hygiene interop +JuliaLowering.include_string(test_mod, raw""" + macro call_oldstyle_macro(a) + quote + x = "x in call_oldstyle_macro" + @oldstyle $a x + end + end + + macro newstyle(a, b, c) + quote + x = "x in @newstyle" + ($a, $b, $c, x) + end + end +""") +# TODO: Make this macro lowering go via JuliaSyntax rather than the flisp code +# (JuliaSyntax needs support for old-style quasiquote processing) +Base.eval(test_mod, :( +macro oldstyle(a, b) + quote + x = "x in @oldstyle" + @newstyle $(esc(a)) $(esc(b)) x + end +end +)) +@test JuliaLowering.include_string(test_mod, """ +let x = "x in outer scope" + @call_oldstyle_macro x +end +""") == ("x in outer scope", + "x in call_oldstyle_macro", + "x in @oldstyle", + "x in @newstyle") + +# Old style unhygenic escaping with esc() +Base.eval(test_mod, :( +macro oldstyle_unhygenic() + esc(:x) +end +)) +@test JuliaLowering.include_string(test_mod, """ +let x = "x in outer scope" + @oldstyle_unhygenic +end +""") == "x in outer scope" + +# Exceptions in old style macros +Base.eval(test_mod, :( +macro oldstyle_error() + error("Some error in old style macro") +end +)) +@test try + JuliaLowering.include_string(test_mod, """ + @oldstyle_error + """) +catch exc + sprint(showerror, exc) +end == """ +MacroExpansionError while expanding @oldstyle_error in module Main.macro_test: +@oldstyle_error +└─────────────┘ ── Error expanding macro +Caused by: +Some error in old style macro""" + +@test sprint( + showerror, + JuliaLowering.MacroExpansionError( + JuliaLowering.expr_to_syntaxtree(:(foo), LineNumberNode(1)), + "fake error")) == + "MacroExpansionError:\n#= line 1 =# - fake error" + +# Old-style macros returning non-Expr values +Base.eval(test_mod, :( +macro oldstyle_non_Expr() + 42 +end +)) +@test JuliaLowering.include_string(test_mod, """ +@oldstyle_non_Expr +""") === 42 + +# New-style macros called with the wrong arguments +JuliaLowering.include_string(test_mod, raw""" +macro method_error_test(a) +end +""") +Base.eval(test_mod, :( +macro method_error_test() +end +)) +try + JuliaLowering.include_string(test_mod, raw""" + @method_error_test x y + """) + @test false +catch exc + @test exc isa JuliaLowering.MacroExpansionError + mexc = exc.err + @test mexc isa MethodError + @test mexc.args isa Tuple{JuliaLowering.MacroContext, JuliaLowering.SyntaxTree, JuliaLowering.SyntaxTree} +end + +@testset "calling with old/new macro signatures" begin + # Old defined with 1 arg, new with 2 args, both with 3 (but with different values) + Base.eval(test_mod, :(macro sig_mismatch(x); x; end)) + Base.eval(test_mod, :(macro sig_mismatch(x, y, z); z; end)) + JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y); x; end") + JuliaLowering.include_string(test_mod, "macro sig_mismatch(x, y, z); x; end") + + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1)") === 1 + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2)") === 1 + @test JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3)") === 1 # 3 if we prioritize old sig + err = try + JuliaLowering.include_string(test_mod, "@sig_mismatch(1, 2, 3, 4)") === 1 + catch exc + sprint(showerror, exc, context=:module=>test_mod) + end + @test startswith(err, """ + MacroExpansionError while expanding @sig_mismatch in module Main.macro_test: + @sig_mismatch(1, 2, 3, 4) + └───────────────────────┘ ── Error expanding macro + Caused by: + MethodError: no method matching var"@sig_mismatch"(::JuliaLowering.MacroContext, ::JuliaLowering.SyntaxTree""") +end + +@testset "old macros producing exotic expr heads" begin + @test JuliaLowering.include_string(test_mod, """ + let # example from @preserve docstring + x = Ref{Int}(101) + p = Base.unsafe_convert(Ptr{Int}, x) + GC.@preserve x unsafe_load(p) + end""") === 101 # Expr(:gc_preserve) + + # only invokelatest produces :isglobal now, so MWE here + Base.eval(test_mod, :(macro isglobal(x); esc(Expr(:isglobal, x)); end)) + @test JuliaLowering.include_string(test_mod, """ + some_global = 1 + function isglobal_chk(some_arg) + local some_local = 1 + (@isglobal(some_undefined), @isglobal(some_global), @isglobal(some_arg), @isglobal(some_local)) + end + isglobal_chk(1) + """) === (true, true, false, false) + # with K"Placeholder"s + @test JuliaLowering.include_string(test_mod, """ + __ = 1 + function isglobal_chk(___) + local ____ = 1 + (@isglobal(_), @isglobal(__), @isglobal(___), @isglobal(____)) + end + isglobal_chk(1) + """) === (false, false, false, false) + + # @test appears to be the only macro in base to use :inert + test_result = JuliaLowering.include_string(test_mod, """ + using Test + @test identity(123) === 123 + """; expr_compat_mode=true) + @test test_result.value === true + + # @enum produces Expr(:toplevel) + JuliaLowering.include_string(test_mod, """ + @enum SOME_ENUM X1 X2 X3 + """; expr_compat_mode=true) + @test test_mod.SOME_ENUM <: Enum + @test test_mod.X1 isa Enum +end + +@testset "macros producing meta forms" begin + function find_method_ci(thunk) + ci = thunk.args[1]::Core.CodeInfo + m = findfirst(x->(x isa Expr && x.head === :method && length(x.args) === 3), ci.code) + ci.code[m].args[3] + end + jlower_e(s) = JuliaLowering.to_lowered_expr( + JuliaLowering.lower( + test_mod, JuliaLowering.parsestmt( + JuliaLowering.SyntaxTree, s); + expr_compat_mode=true)) + + prog = "Base.@assume_effects :foldable function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).purity === find_method_ci(our).purity + + prog = "Base.@inline function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).inlining === find_method_ci(our).inlining + + prog = "Base.@noinline function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).inlining === find_method_ci(our).inlining + + prog = "Base.@constprop :none function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).constprop === find_method_ci(our).constprop + + prog = "Base.@nospecializeinfer function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).nospecializeinfer === find_method_ci(our).nospecializeinfer + + prog = "Base.@propagate_inbounds function foo(); end" + ref = Meta.lower(test_mod, Meta.parse(prog)) + our = jlower_e(prog) + @test find_method_ci(ref).propagate_inbounds === find_method_ci(our).propagate_inbounds + +end + +@testset "scope layers for normally-inert ASTs" begin + # Right hand side of `.` + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(hi) + :(A.$x) + end + """) ≈ @ast_ [K"." + "A"::K"Identifier" + "hi"::K"Identifier" + ] + # module + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(AA) + :(module $x + end + ) + end + """) ≈ @ast_ [K"module" + "AA"::K"Identifier" + [K"block" + ] + ] + + # In macro expansion, require that expressions passed in as macro + # *arguments* get the lexical scope of the calling context, even for the + # `x` in `M.$x` where the right hand side of `.` is normally quoted. + @test JuliaLowering.include_string(test_mod, raw""" + let x = :(someglobal) + @eval M.$x + end + """) == "global in module M" + + JuliaLowering.include_string(test_mod, raw""" + let y = 101 + @eval module AA + x = $y + end + end + """) + @test test_mod.AA.x == 101 + + # "Deferred hygiene" in macros which emit quoted code currently doesn't + # work as might be expected. + # + # The old macro system also doesn't handle this - here's the equivalent + # implementation + # macro make_quoted_code(init, y) + # QuoteNode(:(let + # x = "inner x" + # $(esc(init)) + # ($(esc(y)), x) + # end)) + # end + # + # TODO: The following should throw an error rather than producing a + # surprising value, or work "as expected" whatever that is! + JuliaLowering.include_string(test_mod, raw""" + macro make_quoted_code(init, y) + q = :(let + x = "inner x" + $init + ($y, x) + end) + @ast q q [K"inert" q] + end + """) + code = JuliaLowering.include_string(test_mod, """@make_quoted_code(x="outer x", x)""") + @test_broken JuliaLowering.eval(test_mod, code) == ("outer x", "inner x") +end + +end diff --git a/JuliaLowering/test/macros_ir.jl b/JuliaLowering/test/macros_ir.jl new file mode 100644 index 0000000000000..183dce3944b35 --- /dev/null +++ b/JuliaLowering/test/macros_ir.jl @@ -0,0 +1,188 @@ +module MacroMethods + macro some_macro() + quote + some_global + end + end + + module ExtraMacroMethods + using ..MacroMethods + macro MacroMethods.some_macro(ex) + quote + some_global + end + end + end +end + +macro strmac_str(ex, suff=nothing) + s = "$(ex[1].value) from strmac" + if !isnothing(suff) + s = "$s with suffix $(suff.value)" + end + s +end + +macro cmdmac_cmd(ex, suff=nothing) + s = "$(ex[1].value) from cmdmac" + if !isnothing(suff) + s = "$s with suffix $(suff.value)" + end + s +end + +#******************************************************************************* +######################################## +# Simple macro +macro add_one(ex) + quote + $ex + 1 + end +end +#--------------------- +1 (method TestMod.@add_one) +2 latestworld +3 TestMod.@add_one +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any) +6 (call core.svec) +7 SourceLocation::1:7 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/__context__(!read) slot₃/ex] + 1 (call core.tuple slot₃/ex) + 2 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ ex) + 1))) %₁) + 3 (return %₂) +10 latestworld +11 TestMod.@add_one +12 (return %₁₁) + +######################################## +# Macro using `__context__` +macro foo(ex) + ctx = __context__ +end +#--------------------- +1 (method TestMod.@foo) +2 latestworld +3 TestMod.@foo +4 (call core.Typeof %₃) +5 (call core.svec %₄ JuliaLowering.MacroContext core.Any) +6 (call core.svec) +7 SourceLocation::1:7 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/__context__ slot₃/ex(!read) slot₄/ctx(!read)] + 1 slot₂/__context__ + 2 (= slot₄/ctx %₁) + 3 (return %₁) +10 latestworld +11 TestMod.@foo +12 (return %₁₁) + +######################################## +# Scope for symbols emitted by macros is the module where the method was +# defined, thus two different modules in this case, even though `@some_macro` +# belongs to the MacroMethods module. +(MacroMethods.@some_macro(), MacroMethods.@some_macro(unused)) +#--------------------- +1 TestMod.MacroMethods.some_global +2 TestMod.MacroMethods.ExtraMacroMethods.some_global +3 (call core.tuple %₁ %₂) +4 (return %₃) + +######################################## +# Error: Macro with kw args +macro mmm(a; b=2) +end +#--------------------- +LoweringError: +macro mmm(a; b=2) +# └───┘ ── macros cannot accept keyword arguments +end + +######################################## +# Error: Bad macro name +macro mmm[](ex) +end +#--------------------- +LoweringError: +macro mmm[](ex) +# └───┘ ── invalid macro name +end + +######################################## +# Error: Macros not allowed in local scope +let + macro foo(ex) + end +end +#--------------------- +LoweringError: +let +# ┌──────────── + macro foo(ex) + end +#─────┘ ── macro is only allowed in global scope +end + +######################################## +# Error: Macros not allowed in local scope +function f() + macro foo() + end +end +#--------------------- +LoweringError: +function f() +# ┌────────── + macro foo() + end +#─────┘ ── macro is only allowed in global scope +end + +######################################## +# Error: Macros not found +_never_exist = @m_not_exist 42 +#--------------------- +MacroExpansionError while expanding @m_not_exist in module Main.TestMod: +_never_exist = @m_not_exist 42 +# └──────────┘ ── Macro not found +Caused by: +UndefVarError: `@m_not_exist` not defined in `Main.TestMod` +Suggestion: check for spelling errors or missing imports. + +######################################## +# Simple cmdstring +`echo 1` +#--------------------- +1 Base.cmd_gen +2 (call core.tuple "echo") +3 (call core.tuple "1") +4 (call core.tuple %₂ %₃) +5 (call %₁ %₄) +6 (return %₅) + +######################################## +# Simple string macro +strmac"hello" +#--------------------- +1 (return "hello from strmac") + +######################################## +# String macro with suffix +strmac"hello"blah +#--------------------- +1 (return "hello from strmac with suffix blah") + +######################################## +# Simple cmd macro +cmdmac`hello` +#--------------------- +1 (return "hello from cmdmac") + +######################################## +# Cmd macro with suffix +cmdmac`hello`12345 +#--------------------- +1 (return "hello from cmdmac with suffix 12345") diff --git a/JuliaLowering/test/misc.jl b/JuliaLowering/test/misc.jl new file mode 100644 index 0000000000000..a0c9dba7a0e3c --- /dev/null +++ b/JuliaLowering/test/misc.jl @@ -0,0 +1,183 @@ +@testset "Miscellaneous" begin + +test_mod = Module() + +# Blocks +@test JuliaLowering.include_string(test_mod, """ +begin +end +""") == nothing + +# Placeholders +@test JuliaLowering.include_string(test_mod, """_ = 10""") == 10 + +# GC.@preserve +@test JuliaLowering.include_string(test_mod, """ +let x = [1,2] + GC.@preserve x begin + x + end +end +""") == [1,2] + +@test JuliaLowering.include_string(test_mod, raw""" +let + x = 10 + @eval $x + 2 +end +""") == 12 + +@test JuliaLowering.include_string(test_mod, raw""" +module EvalTest + _some_var = 2 +end +let + x = 10 + @eval EvalTest $x + _some_var +end +""") == 12 + +@test JuliaLowering.include_string(test_mod, """ +let x=11 + 20x +end +""") == 220 + +# ccall +@test JuliaLowering.include_string(test_mod, """ +ccall(:strlen, Csize_t, (Cstring,), "asdfg") +""") == 5 + +# cfunction +JuliaLowering.include_string(test_mod, """ +function f_ccallable(x, y) + x + y * 10 +end +""") +cf_int = JuliaLowering.include_string(test_mod, """ +@cfunction(f_ccallable, Int, (Int,Int)) +""") +@test @ccall($cf_int(2::Int, 3::Int)::Int) == 32 +cf_float = JuliaLowering.include_string(test_mod, """ +@cfunction(f_ccallable, Float64, (Float64,Float64)) +""") +@test @ccall($cf_float(2::Float64, 3::Float64)::Float64) == 32.0 + +# Test that hygiene works with @ccallable function names (this is broken in +# Base) +JuliaLowering.include_string(test_mod, raw""" +f_ccallable_hygiene() = 1 + +module Nested + f_ccallable_hygiene() = 2 + macro cfunction_hygiene() + :(@cfunction(f_ccallable_hygiene, Int, ())) + end +end +""") +cf_hygiene = JuliaLowering.include_string(test_mod, """ +Nested.@cfunction_hygiene +""") +@test @ccall($cf_hygiene()::Int) == 2 + +# Test that ccall can be passed static parameters in type signatures. +# +# Note that the cases where this works are extremely limited and tend to look +# like `Ptr{T}` or `Ref{T}` (`T` doesn't work!?) because of the compilation +# order in which the runtime inspects the arguments to ccall (`Ptr{T}` has a +# well defined C ABI even when `T` is not yet determined). See also +# https://github.com/JuliaLang/julia/issues/29400 +# https://github.com/JuliaLang/julia/pull/40947 +JuliaLowering.include_string(test_mod, raw""" +function sparam_ccallable(x::Ptr{T}) where {T} + unsafe_store!(x, one(T)) + nothing +end + +function ccall_with_sparams(::Type{T}) where {T} + x = T[zero(T)] + cf = @cfunction(sparam_ccallable, Cvoid, (Ptr{T},)) + @ccall $cf(x::Ptr{T})::Cvoid + x[1] +end +""") +@test test_mod.ccall_with_sparams(Int) === 1 +@test test_mod.ccall_with_sparams(Float64) === 1.0 + +# FIXME Currently JL cannot handle `@generated` functions, so the following test cases are commented out. +# # Test that ccall can be passed static parameters in the function name +# # Note that this only works with `@generated` functions from 1.13 onwards, +# # where the function name can be evaluated at code generation time. +# JuliaLowering.include_string(test_mod, raw""" +# # In principle, may add other strlen-like functions here for different string +# # types +# ccallable_sptest_name(::Type{String}) = :strlen +# +# @generated function ccall_with_sparams_in_name(s::T) where {T} +# name = QuoteNode(ccallable_sptest_name(T)) +# :(ccall($name, Csize_t, (Cstring,), s)) +# end +# """) +# @test test_mod.ccall_with_sparams_in_name("hii") == 3 + +@testset "CodeInfo: has_image_globalref" begin + @test lower_str(test_mod, "x + y").args[1].has_image_globalref === false + @test lower_str(Main, "x + y").args[1].has_image_globalref === true +end + +@testset "docstrings: doc-only expressions" begin + local jeval(mod, str) = JuliaLowering.include_string(mod, str; expr_compat_mode=true) + jeval(test_mod, "function fun_exists(x); x; end") + jeval(test_mod, "module M end; module M2 end") + # TODO: return values are to be determined, currently Base.Docs.Binding for + # both lowering implementations. We can't return the value of the + # expression in these special cases. + jeval(test_mod, "\"docstr1\" sym_noexist") + jeval(test_mod, "\"docstr2\" fun_noexist()") + jeval(test_mod, "\"docstr3\" fun_exists(sym_noexist)") + jeval(test_mod, "\"docstr4\" M.sym_noexist") + jeval(test_mod, "\"docstr5\" M.fun_noexist()") + jeval(test_mod, "\"docstr6\" M.fun_exists(sym_noexist)") + @test jeval(test_mod, "@doc sym_noexist") |> string === "docstr1\n" + @test jeval(test_mod, "@doc fun_noexist()") |> string === "docstr2\n" + @test jeval(test_mod, "@doc fun_exists(sym_noexist)") |> string === "docstr3\n" + @test jeval(test_mod, "@doc M.sym_noexist") |> string === "docstr4\n" + @test jeval(test_mod, "@doc M.fun_noexist()") |> string === "docstr5\n" + @test jeval(test_mod, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n" + @test jeval(test_mod.M, "@doc M.sym_noexist") |> string === "docstr4\n" + @test jeval(test_mod.M, "@doc M.fun_noexist()") |> string === "docstr5\n" + @test jeval(test_mod.M, "@doc M.fun_exists(sym_noexist)") |> string === "docstr6\n" + + jeval(test_mod.M2, "\"docstr7\" M2.M2.sym_noexist") + jeval(test_mod.M2, "\"docstr8\" M2.M2.fun_noexist()") + jeval(test_mod.M2, "\"docstr9\" M2.M2.fun_exists(sym_noexist)") + @test jeval(test_mod, "@doc M2.M2.sym_noexist") |> string === "docstr7\n" + @test jeval(test_mod, "@doc M2.M2.fun_noexist()") |> string === "docstr8\n" + @test jeval(test_mod, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n" + @test jeval(test_mod.M2, "@doc M2.M2.sym_noexist") |> string === "docstr7\n" + @test jeval(test_mod.M2, "@doc M2.M2.fun_noexist()") |> string === "docstr8\n" + @test jeval(test_mod.M2, "@doc M2.M2.fun_exists(sym_noexist)") |> string === "docstr9\n" + + # Try with signatures and type variables + jeval(test_mod, "abstract type T_exists end") + + jeval(test_mod, "\"docstr10\" f10(x::Int, y, z::T_exists)") + d = jeval(test_mod, "@doc f10") + @test d |> string === "docstr10\n" + # TODO: Is there a better way of accessing this? Feel free to change tests + # if docsystem storage changes. + @test d.meta[:results][1].data[:typesig] === Tuple{Int, Any, test_mod.T_exists} + + jeval(test_mod, "\"docstr11\" f11(x::T_exists, y::U, z::T) where {T, U<:Number}") + d = jeval(test_mod, "@doc f11") + @test d |> string === "docstr11\n" + @test d.meta[:results][1].data[:typesig] === Tuple{test_mod.T_exists, U, T} where {T, U<:Number} + + jeval(test_mod, "\"docstr12\" f12(x::Int, y::U, z::T=1) where {T, U<:Number}") + d = jeval(test_mod, "@doc f12") + @test d |> string === "docstr12\n" + @test d.meta[:results][1].data[:typesig] === Union{Tuple{Int64, U, T}, Tuple{Int64, U}} where {T, U<:Number} + +end + +end diff --git a/JuliaLowering/test/misc_ir.jl b/JuliaLowering/test/misc_ir.jl new file mode 100644 index 0000000000000..775960b87d238 --- /dev/null +++ b/JuliaLowering/test/misc_ir.jl @@ -0,0 +1,584 @@ +module JuxtuposeTest + macro emit_juxtupose() + :(10x) + end +end + +#******************************************************************************* +######################################## +# Getproperty syntax +x.a +#--------------------- +1 TestMod.x +2 (call top.getproperty %₁ :a) +3 (return %₂) + +######################################## +# Getproperty syntax with a string on right hand side +x."b" +#--------------------- +1 TestMod.x +2 (call top.getproperty %₁ "b") +3 (return %₂) + +######################################## +# Standalone dot syntax +.* +#--------------------- +1 TestMod.* +2 (call top.BroadcastFunction %₁) +3 (return %₂) + +######################################## +# Error: Wrong number of children in `.` +@ast_ [K"." "x"::K"Identifier" "a"::K"Identifier" 3::K"Integer"] +#--------------------- +LoweringError: +#= line 1 =# - `.` form requires either one or two children + +######################################## +# Error: Placeholder value used +_ + 1 +#--------------------- +LoweringError: +_ + 1 +╙ ── all-underscore identifiers are write-only and their values cannot be used in expressions + +######################################## +# Named tuple +(a=1, b=2) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 (return %₄) + +######################################## +# Named tuple with parameters +(; a=1, b=2) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 (return %₄) + +######################################## +# Empty named tuple +(;) +#--------------------- +1 (call core.NamedTuple) +2 (return %₁) + +######################################## +# Named tuple with implicit field names +(;x, a.b.c, y._) +#--------------------- +1 (call core.tuple :x :c :_) +2 (call core.apply_type core.NamedTuple %₁) +3 TestMod.x +4 TestMod.a +5 (call top.getproperty %₄ :b) +6 (call top.getproperty %₅ :c) +7 TestMod.y +8 (call top.getproperty %₇ :_) +9 (call core.tuple %₃ %₆ %₈) +10 (call %₂ %₉) +11 (return %₁₀) + +######################################## +# Named tuple with splats +(; a=1, b=2, bs..., c=3, ds...) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 TestMod.bs +6 (call top.merge %₄ %₅) +7 (call core.tuple :c) +8 (call core.apply_type core.NamedTuple %₇) +9 (call core.tuple 3) +10 (call %₈ %₉) +11 (call top.merge %₆ %₁₀) +12 TestMod.ds +13 (call top.merge %₁₁ %₁₂) +14 (return %₁₃) + +######################################## +# Named tuple with only splats +(; as..., bs...) +#--------------------- +1 (call core.NamedTuple) +2 TestMod.as +3 (call top.merge %₁ %₂) +4 TestMod.bs +5 (call top.merge %₃ %₄) +6 (return %₅) + +######################################## +# Named tuple with dynamic names +(; a=1, b=2, c=>d) +#--------------------- +1 (call core.tuple :a :b) +2 (call core.apply_type core.NamedTuple %₁) +3 (call core.tuple 1 2) +4 (call %₂ %₃) +5 TestMod.c +6 (call core.tuple %₅) +7 (call core.apply_type core.NamedTuple %₆) +8 TestMod.d +9 (call core.tuple %₈) +10 (call %₇ %₉) +11 (call top.merge %₄ %₁₀) +12 (return %₁₁) + +######################################## +# Error: Named tuple with repeated fields +(; a=1, bs..., c=3, a=2) +#--------------------- +LoweringError: +(; a=1, bs..., c=3, a=2) +# ╙ ── Repeated named tuple field name + +######################################## +# Error: Named tuple frankentuple +(a=1; b=2, c=3) +#--------------------- +LoweringError: +(a=1; b=2, c=3) +# └────────┘ ── unexpected semicolon in tuple - use `,` to separate tuple elements + +######################################## +# Error: Named tuple field dots in rhs +(; a=xs...) +#--------------------- +LoweringError: +(; a=xs...) +# └───┘ ── `...` cannot be used in a value for a named tuple field + +######################################## +# Error: Named tuple field invalid lhs +(; a[]=1) +#--------------------- +LoweringError: +(; a[]=1) +# └─┘ ── invalid named tuple field name + +######################################## +# Error: Named tuple element with weird dot syntax +(; a."b") +#--------------------- +LoweringError: +(; a."b") +# └───┘ ── invalid named tuple element + +######################################## +# Error: Named tuple element without valid name +(; a=1, f()) +#--------------------- +LoweringError: +(; a=1, f()) +# └─┘ ── Invalid named tuple element + +######################################## +# Error: Modules not allowed inside blocks +begin + module C + end +end +#--------------------- +LoweringError: +begin +# ┌─────── + module C + end +#─────┘ ── `module` is only allowed at top level +end + +######################################## +# Error: Modules not allowed in local scope +function f() + module C + end +end +#--------------------- +LoweringError: +function f() +# ┌─────── + module C + end +#─────┘ ── `module` is only allowed at top level +end + +######################################## +# Basic type assert +x::T +#--------------------- +1 TestMod.x +2 TestMod.T +3 (call core.typeassert %₁ %₂) +4 (return %₃) + +######################################## +# Error: Invalid :: syntax outside function arg list +::T +#--------------------- +LoweringError: +::T +└─┘ ── `::` must be written `value::type` outside function argument lists + +######################################## +# Error: braces vector syntax +{x, y} +#--------------------- +LoweringError: +{x, y} +└────┘ ── { } syntax is reserved for future use + +######################################## +# Error: braces matrix syntax +{x y; y z} +#--------------------- +LoweringError: +{x y; y z} +└────────┘ ── { } syntax is reserved for future use + +######################################## +# Error: Test AST which has no source form and thus must have been constructed +# programmatically (eg, a malformed if) +@ast_ [K"if"] +#--------------------- +LoweringError: +#= line 1 =# - expected `numchildren(ex) >= 2` + +######################################## +# Error: @atomic in wrong position +let + @atomic x +end +#--------------------- +LoweringError: +let + @atomic x +# └───────┘ ── unimplemented or unsupported atomic declaration +end + +######################################## +# GC.@preserve support +GC.@preserve a b begin + f(a,b) +end +#--------------------- +1 TestMod.a +2 TestMod.b +3 (= slot₂/s (gc_preserve_begin %₁ %₂)) +4 TestMod.f +5 TestMod.a +6 TestMod.b +7 (= slot₁/r (call %₄ %₅ %₆)) +8 (gc_preserve_end slot₂/s) +9 slot₁/r +10 (return %₉) + +######################################## +# Error: GC.@preserve bad args +GC.@preserve a b g() begin + body +end +#--------------------- +MacroExpansionError while expanding GC.@preserve in module Main.TestMod: +GC.@preserve a b g() begin +# └─┘ ── Preserved variable must be a symbol + body +end + +######################################## +# @eval without module +@eval $f(x, y) +#--------------------- +1 TestMod.f +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₂) +4 (= slot₁/eval_result (call JuliaLowering.eval TestMod %₃)) +5 latestworld +6 slot₁/eval_result +7 (return %₆) + +######################################## +# @eval with module +@eval mod $f(x, y) +#--------------------- +1 TestMod.mod +2 TestMod.f +3 (call core.tuple %₂) +4 (call JuliaLowering.interpolate_ast SyntaxTree (inert (call ($ f) x y)) %₃) +5 (= slot₁/eval_result (call JuliaLowering.eval %₁ %₄)) +6 latestworld +7 slot₁/eval_result +8 (return %₇) + +######################################## +# Juxtaposition +20x +#--------------------- +1 TestMod.* +2 TestMod.x +3 (call %₁ 20 %₂) +4 (return %₃) + +######################################## +# Juxtaposition - check the juxtapose multiply is resolved to `JuxtuposeTest.*` when +# emitted by the macro in the JuxtuposeTest module. +# +# This is consistent with Julia's existing system but it's not entirely clear +# this is good - perhaps we should resolve to Base.* instead? Resolving to the +# module-local version makes it exactly equivalent to `*`. But one might argue +# this is confusing because the symbol `*` appears nowhere in the user's source +# code. +JuxtuposeTest.@emit_juxtupose +#--------------------- +1 TestMod.JuxtuposeTest.* +2 TestMod.JuxtuposeTest.x +3 (call %₁ 10 %₂) +4 (return %₃) + +######################################## +# @cfunction expansion with global generic function as function argument +@cfunction(callable, Int, (Int, Float64)) +#--------------------- +1 (cfunction Ptr{Nothing} (static_eval TestMod.callable) (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall) +2 (return %₁) + +######################################## +# @cfunction expansion with closed-over callable argument +@cfunction($close_over, Int, (Int, Float64)) +#--------------------- +1 TestMod.close_over +2 (cfunction Base.CFunction %₁ (static_eval TestMod.Int) (static_eval (call core.svec TestMod.Int TestMod.Float64)) :ccall) +3 (return %₂) + +######################################## +# Error: Bad arg types to @cfunction +@cfunction(f, Int, NotATuple) +#--------------------- +MacroExpansionError while expanding @cfunction in module Main.TestMod: +@cfunction(f, Int, NotATuple) +# └───────┘ ── @cfunction argument types must be a literal tuple + +######################################## +# Error: Locals used in @cfunction return type +let T=Float64 + @cfunction(f, T, (Float64,)) +end +#--------------------- +LoweringError: +let T=Float64 + @cfunction(f, T, (Float64,)) +# ╙ ── cfunction return type cannot reference local variable +end + +######################################## +# Error: Locals used in @cfunction arg type +let T=Float64 + @cfunction(f, Float64, (Float64,T)) +end +#--------------------- +LoweringError: +let T=Float64 + @cfunction(f, Float64, (Float64,T)) +# ╙ ── cfunction argument type cannot reference local variable +end + +######################################## +# Basic @ccall lowering +@ccall foo(x::X, y::Y)::R +#--------------------- +1 JuliaLowering.Base +2 (call top.getproperty %₁ :cconvert) +3 TestMod.X +4 TestMod.x +5 (= slot₁/arg1 (call %₂ %₃ %₄)) +6 JuliaLowering.Base +7 (call top.getproperty %₆ :cconvert) +8 TestMod.Y +9 TestMod.y +10 (= slot₂/arg2 (call %₇ %₈ %₉)) +11 JuliaLowering.Base +12 (call top.getproperty %₁₁ :unsafe_convert) +13 TestMod.X +14 slot₁/arg1 +15 (call %₁₂ %₁₃ %₁₄) +16 JuliaLowering.Base +17 (call top.getproperty %₁₆ :unsafe_convert) +18 TestMod.Y +19 slot₂/arg2 +20 (call %₁₇ %₁₈ %₁₉) +21 slot₁/arg1 +22 slot₂/arg2 +23 (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 0 :($(QuoteNode((:ccall, 0x0000, false)))) %₁₅ %₂₀ %₂₁ %₂₂) +24 (return %₂₃) + +######################################## +# @ccall lowering with gc_safe +@ccall foo(x::X; y::Y)::R gc_safe=true +#--------------------- +1 JuliaLowering.Base +2 (call top.getproperty %₁ :cconvert) +3 TestMod.X +4 TestMod.x +5 (= slot₁/arg1 (call %₂ %₃ %₄)) +6 JuliaLowering.Base +7 (call top.getproperty %₆ :cconvert) +8 TestMod.Y +9 TestMod.y +10 (= slot₂/arg2 (call %₇ %₈ %₉)) +11 JuliaLowering.Base +12 (call top.getproperty %₁₁ :unsafe_convert) +13 TestMod.X +14 slot₁/arg1 +15 (call %₁₂ %₁₃ %₁₄) +16 JuliaLowering.Base +17 (call top.getproperty %₁₆ :unsafe_convert) +18 TestMod.Y +19 slot₂/arg2 +20 (call %₁₇ %₁₈ %₁₉) +21 slot₁/arg1 +22 slot₂/arg2 +23 (foreigncall :foo (static_eval TestMod.R) (static_eval (call core.svec TestMod.X TestMod.Y)) 1 :($(QuoteNode((:ccall, 0x0000, true)))) %₁₅ %₂₀ %₂₁ %₂₂) +24 (return %₂₃) + +######################################## +# non-macro ccall with vararg in signature, but none provided +ccall(:fcntl, Cint, (RawFD, Cint, Cint...), s, F_GETFL) +#--------------------- +1 TestMod.RawFD +2 TestMod.Cint +3 TestMod.Cint +4 TestMod.s +5 (call top.cconvert %₁ %₄) +6 TestMod.F_GETFL +7 (call top.cconvert %₂ %₆) +8 (call top.unsafe_convert %₁ %₅) +9 (call top.unsafe_convert %₂ %₇) +10 (foreigncall :fcntl (static_eval TestMod.Cint) (static_eval (call core.svec TestMod.RawFD TestMod.Cint TestMod.Cint)) 2 :ccall %₈ %₉ %₅ %₇) +11 (return %₁₀) + +######################################## +# Error: No return annotation on @ccall +@ccall strlen("foo"::Cstring) +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall strlen("foo"::Cstring) +# └ ── Expected a return type annotation `::SomeType` + +######################################## +# Error: No argument type on @ccall +@ccall foo("blah"::Cstring, "bad")::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo("blah"::Cstring, "bad")::Int +# └───┘ ── argument needs a type annotation + +######################################## +# Error: @ccall varags without one fixed argument +@ccall foo(; x::Int)::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(; x::Int)::Int +# └──────┘ ── C ABI prohibits varargs without one required argument + +######################################## +# Error: Multiple varargs blocks +@ccall foo(; x::Int; y::Float64)::Int +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(; x::Int; y::Float64)::Int +# └──────────┘ ── Multiple parameter blocks not allowed + +######################################## +# Error: Bad @ccall option +@ccall foo(x::Int)::Int bad_opt +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int bad_opt +# └─────┘ ── Bad option to ccall + +######################################## +# Error: Unknown @ccall option name +@ccall foo(x::Int)::Int bad_opt=true +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int bad_opt=true +# └─────┘ ── Unknown option name for ccall + +######################################## +# Error: Unknown option type +@ccall foo(x::Int)::Int gc_safe="hi" +#--------------------- +MacroExpansionError while expanding @ccall in module Main.TestMod: +@ccall foo(x::Int)::Int gc_safe="hi" +# └──┘ ── gc_safe must be true or false + +######################################## +# Error: unary & syntax +&x +#--------------------- +LoweringError: +&x +└┘ ── invalid syntax + +######################################## +# Error: $ outside quote/string +$x +#--------------------- +LoweringError: +$x +└┘ ── `$` expression outside string or quote block + +######################################## +# Error: splat outside call +x... +#--------------------- +LoweringError: +x... +└──┘ ── `...` expression outside call + +######################################## +# `include` should increment world age +include("hi.jl") +#--------------------- +1 TestMod.include +2 (call %₁ "hi.jl") +3 latestworld +4 (return %₂) + +######################################## +# Const function assignment syntax (legacy) +const f(x::Int)::Int = x+1 +#--------------------- +1 TestMod.f +2 TestMod.x +3 TestMod.Int +4 (call core.typeassert %₂ %₃) +5 (call %₁ %₄) +6 TestMod.Int +7 (call core.typeassert %₅ %₆) +8 (return %₇) + +######################################## +# Error: Destructuring assignment method definitions (broken, legacy) +f(x)::Int, g() = [1.0, 2.0] +#--------------------- +LoweringError: +f(x)::Int, g() = [1.0, 2.0] +└──┘ ── invalid assignment location + +######################################## +# Error: Destructuring assignment typedef, variable, and function (broken, legacy) +T{U}, (x::Float64, g()) = [Bool, (1, 2)] +#--------------------- +LoweringError: +T{U}, (x::Float64, g()) = [Bool, (1, 2)] +# └─┘ ── invalid assignment location diff --git a/JuliaLowering/test/modules.jl b/JuliaLowering/test/modules.jl new file mode 100644 index 0000000000000..a68c5f8a8b6e2 --- /dev/null +++ b/JuliaLowering/test/modules.jl @@ -0,0 +1,54 @@ +@testset "modules" begin + +test_mod = Module() + +A = JuliaLowering.include_string(test_mod, """ +module A + function g() + return "hi" + end +end +""", "module_test") +@test A isa Module +@test A.g() == "hi" +@test A.include isa Base.IncludeInto +@test A.eval isa Core.EvalInto +@test A.Base === Base +@test A.eval(:(x = -2)) == -2 +@test A.x == -2 + +B = JuliaLowering.include_string(test_mod, """ +baremodule B +end +""", "baremodule_test") +@test B.Core === Core +@test !isdefined(B, :include) +@test !isdefined(B, :eval) +@test !isdefined(B, :Base) + +# Module init order +Amod = JuliaLowering.include_string(test_mod, """ +module A + init_order = [] + __init__() = push!(init_order, "A") + module B + using ..A + __init__() = push!(A.init_order, "B") + end + module C + using ..A + __init__() = push!(A.init_order, "C") + module D + using ...A + __init__() = push!(A.init_order, "D") + end + module E + using ...A + __init__() = push!(A.init_order, "E") + end + end +end +""") +@test Amod.init_order == ["B", "D", "E", "C", "A"] + +end diff --git a/JuliaLowering/test/quoting.jl b/JuliaLowering/test/quoting.jl new file mode 100644 index 0000000000000..93ace74e948f2 --- /dev/null +++ b/JuliaLowering/test/quoting.jl @@ -0,0 +1,283 @@ +@testset "Syntax quoting & interpolation" begin + +test_mod = Module() + +ex = JuliaLowering.include_string(test_mod, """ +begin + x = 10 + y = :(g(z)) + quote + f(\$(x+1), \$y) + end +end +""") +@test ex ≈ @ast_ [K"block" + [K"call" + "f"::K"Identifier" + 11::K"Value" + [K"call" + "g"::K"Identifier" + "z"::K"Identifier" + ] + ] +] +@test sourcetext(ex[1]) == "f(\$(x+1), \$y)" +@test sourcetext(ex[1][2]) == "\$(x+1)" +@test sourcetext.(flattened_provenance(ex[1][3])) == ["\$y", "g(z)"] +@test sprint(io->showprov(io, ex[1][3], tree=true)) == raw""" + (call g z) + ├─ (call g z) + │ └─ (call g z) + │ └─ @ string:3 + └─ ($ y) + └─ @ string:5 + """ +@test sprint(io->showprov(io, ex[1][3])) == raw""" + begin + x = 10 + y = :(g(z)) + # └──┘ ── in source + quote + f($(x+1), $y) + # @ string:3 + + y = :(g(z)) + quote + f($(x+1), $y) + # └┘ ── interpolated here + end + end + # @ string:5""" +@test sprint(io->showprov(io, ex[1][3]; note="foo")) == raw""" + begin + x = 10 + y = :(g(z)) + # └──┘ ── foo + quote + f($(x+1), $y) + # @ string:3 + + y = :(g(z)) + quote + f($(x+1), $y) + # └┘ ── foo + end + end + # @ string:5""" + + +# Test expression flags are preserved during interpolation +@test JuliaSyntax.is_infix_op_call(JuliaLowering.include_string(test_mod, """ +let + x = 1 + :(\$x + \$x) +end +""")) + +# Test that trivial interpolation without any nesting works. +ex = JuliaLowering.include_string(test_mod, """ +let + x = 123 + :(\$x) +end +""") +@test kind(ex) == K"Value" +@test ex.value == 123 + +# Test that interpolation with field access works +# (the field name can be interpolated into +ex = JuliaLowering.include_string(test_mod, """ +let + field_name = :(a) + :(x.\$field_name) +end +""") +@test kind(ex[2]) == K"Identifier" +@test ex[2].name_val == "a" + +# Test quoted property access syntax like `Core.:(foo)` and `Core.:(!==)` +@test JuliaLowering.include_string(test_mod, """ + x = (a=1, b=2) + x.:(a) +""") == 1 +@test JuliaLowering.include_string(test_mod, """ + Core.:(!==) +""") === (!==) + +# Test quoted operator function definitions (issue #20) +@test JuliaLowering.include_string(test_mod, """ +begin + struct Issue20 + x::Int + end + Base.:(==)(a::Issue20, b::Issue20) = a.x == b.x + Issue20(1) == Issue20(1) +end +""") === true + +@test JuliaLowering.include_string(test_mod, """ +begin + Base.:(<)(a::Issue20, b::Issue20) = a.x < b.x + Issue20(1) < Issue20(2) +end +""") === true + +# interpolations at multiple depths +ex = JuliaLowering.include_string(test_mod, raw""" +let + args = (:(x),:(y)) + quote + x = 1 + y = 2 + quote + f($$(args...)) + end + end +end +""") +@test ex ≈ @ast_ [K"block" + [K"=" + "x"::K"Identifier" + 1::K"Integer" + ] + [K"=" + "y"::K"Identifier" + 2::K"Integer" + ] + [K"quote" + [K"block" + [K"call" + "f"::K"Identifier" + [K"$" + "x"::K"Identifier" + "y"::K"Identifier" + ] + ] + ] + ] +] +@test sourcetext(ex[3][1][1][2]) == "\$\$(args...)" +@test sourcetext(ex[3][1][1][2][1]) == "x" +@test sourcetext(ex[3][1][1][2][2]) == "y" + +ex2 = JuliaLowering.eval(test_mod, ex) +@test sourcetext(ex2[1][2]) == "x" +@test sourcetext(ex2[1][3]) == "y" + +@test JuliaLowering.include_string(test_mod, ":x") isa Symbol +@test JuliaLowering.include_string(test_mod, ":(x)") isa SyntaxTree + +# Double interpolation +double_interp_ex = JuliaLowering.include_string(test_mod, raw""" +let + args = (:(xxx),) + :(:($$(args...))) +end +""") +Base.eval(test_mod, :(xxx = 111)) +dinterp_eval = JuliaLowering.eval(test_mod, double_interp_ex) +@test kind(dinterp_eval) == K"Value" +@test dinterp_eval.value == 111 + +multi_interp_ex = JuliaLowering.include_string(test_mod, raw""" +let + args = (:(x), :(y)) + :(:($$(args...))) +end +""") +@test try + JuliaLowering.eval(test_mod, multi_interp_ex) + nothing +catch exc + @test exc isa LoweringError + sprint(io->Base.showerror(io, exc, show_detail=false)) +end == raw""" +LoweringError: +let + args = (:(x), :(y)) + :(:($$(args...))) +# └─────────┘ ── More than one value in bare `$` expression +end""" + +@test try + JuliaLowering.eval(test_mod, multi_interp_ex, expr_compat_mode=true) + nothing +catch exc + @test exc isa LoweringError + sprint(io->Base.showerror(io, exc, show_detail=false)) +end == raw""" +LoweringError: +No source for expression +└ ── More than one value in bare `$` expression""" +# ^ TODO: Improve error messages involving expr_to_syntaxtree! + +# Interpolation of SyntaxTree Identifier vs plain Symbol +symbol_interp = JuliaLowering.include_string(test_mod, raw""" +let + x = :xx # Plain Symbol + y = :(yy) # SyntaxTree K"Identifier" + :(f($x, $y, z)) +end +""") +@test symbol_interp ≈ @ast_ [K"call" + "f"::K"Identifier" + "xx"::K"Identifier" + "yy"::K"Identifier" + "z"::K"Identifier" +] +@test sourcetext(symbol_interp[2]) == "\$x" # No provenance for plain Symbol +@test sourcetext(symbol_interp[3]) == "yy" + +# Mixing Expr into a SyntaxTree doesn't graft it onto the SyntaxTree AST but +# treats it as a plain old value. (This is the conservative API choice and also +# encourages ASTs to be written in the new form. However we may choose to +# change this if necessary for compatibility.) +expr_interp_is_value = JuliaLowering.include_string(test_mod, raw""" +let + x = Expr(:call, :f, :x) + :(g($x)) +end +""") +@test expr_interp_is_value ≈ @ast_ [K"call" + "g"::K"Identifier" + Expr(:call, :f, :x)::K"Value" + # ^^ NB not [K"call" "f"::K"Identifier" "x"::K"Identifier"] +] +@test Expr(expr_interp_is_value) == Expr(:call, :g, QuoteNode(Expr(:call, :f, :x))) + +@testset "Interpolation in Expr compat mode" begin + expr_interp = JuliaLowering.include_string(test_mod, raw""" + let + x = :xx + :(f($x, z)) + end + """, expr_compat_mode=true) + @test expr_interp == Expr(:call, :f, :xx, :z) + + double_interp_expr = JuliaLowering.include_string(test_mod, raw""" + let + x = :xx + :(:(f($$x, $y))) + end + """, expr_compat_mode=true) + @test double_interp_expr == Expr(:quote, Expr(:call, :f, Expr(:$, :xx), Expr(:$, :y))) + + # Test that ASTs are copied before they're seen by the user + @test JuliaLowering.include_string(test_mod, raw""" + exs = [] + for i = 1:2 + push!(exs, :(f(x,y))) + push!(exs[end].args, :z) + end + exs + """, expr_compat_mode=true) == Any[Expr(:call, :f, :x, :y, :z), Expr(:call, :f, :x, :y, :z)] + + # Test interpolation into QuoteNode + @test JuliaLowering.include_string(test_mod, raw""" + let x = :push! + @eval Base.$x + end + """; expr_compat_mode=true) == Base.push! +end + +end diff --git a/JuliaLowering/test/quoting_ir.jl b/JuliaLowering/test/quoting_ir.jl new file mode 100644 index 0000000000000..ccc61be3cf796 --- /dev/null +++ b/JuliaLowering/test/quoting_ir.jl @@ -0,0 +1,90 @@ +######################################## +# Simple interpolation +quote + $x + 1 +end +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (call-i ($ x) + 1))) %₂) +4 (return %₃) + +######################################## +# Trivial interpolation +:($x) +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert ($ x)) %₂) +4 (return %₃) + +######################################## +# Double escape +quote + quote + $$x + 1 + end +end +#--------------------- +1 TestMod.x +2 (call core.tuple %₁) +3 (call JuliaLowering.interpolate_ast SyntaxTree (inert (block (quote (block (call-i ($ ($ x)) + 1))))) %₂) +4 (return %₃) + +######################################## +# Symbols on `.` right hand side need to be scoped correctly +let x = 1 + :(A.$x) +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 slot₁/x +4 (call core.tuple %₃) +5 (call JuliaLowering.interpolate_ast SyntaxTree (inert (. A ($ x))) %₄) +6 (return %₅) + +######################################## +# Error: Double escape +quote + $$x + 1 +end +#--------------------- +LoweringError: +quote + $$x + 1 +# └┘ ── `$` expression outside string or quote block +end + +######################################## +# Quoted property access with identifier +Core.:(foo) +#--------------------- +1 TestMod.Core +2 (call top.getproperty %₁ :foo) +3 (return %₂) + +######################################## +# Quoted property access with operator +Core.:(!==) +#--------------------- +1 TestMod.Core +2 (call top.getproperty %₁ :!==) +3 (return %₂) + +######################################## +# Quoted operator function definition (issue #20) +function Base.:(==)() end +#--------------------- +1 TestMod.Base +2 (call top.getproperty %₁ :==) +3 (call core.Typeof %₂) +4 (call core.svec %₃) +5 (call core.svec) +6 SourceLocation::1:10 +7 (call core.svec %₄ %₅ %₆) +8 --- method core.nothing %₇ + slots: [slot₁/#self#(!read)] + 1 (return core.nothing) +9 latestworld +10 (return core.nothing) diff --git a/JuliaLowering/test/repl_mode.jl b/JuliaLowering/test/repl_mode.jl new file mode 100644 index 0000000000000..cf85717c03cbf --- /dev/null +++ b/JuliaLowering/test/repl_mode.jl @@ -0,0 +1,84 @@ +# JuliaLowering REPL mode: an interactive test utility for lowering code (not +# part of the unit tests) + +module JuliaLoweringREPL + +import ReplMaker +import REPL + +using JuliaLowering: JuliaLowering, SyntaxTree, children +using JuliaSyntax + +function is_incomplete(prompt_state) + str = String(take!(copy(REPL.LineEdit.buffer(prompt_state)))) + stream = JuliaSyntax.ParseStream(str) + JuliaSyntax.parse!(stream, rule=:all) + if JuliaSyntax.any_error(stream) + tree = JuliaSyntax.build_tree(SyntaxNode, stream) + tag = JuliaSyntax._incomplete_tag(tree, 1) + return tag != :none + else + return false + end +end + +function eval_ish(mod::Module, ex::SyntaxTree, do_eval::Bool, do_print_ir::Bool) + k = kind(ex) + if k == K"toplevel" + x = nothing + for e in children(ex) + x = eval_ish(mod, e, do_eval, do_print_ir) + end + return x + end + linear_ir = JuliaLowering.lower(mod, ex) + if do_print_ir + JuliaLowering.print_ir(stdout, linear_ir) + end + if do_eval + println(stdout, "#----------------------") + expr_form = JuliaLowering.to_lowered_expr(linear_ir) + Base.eval(mod, expr_form) + end +end + +PRINT_IR::Bool = true +DO_EVAL::Bool = false +function opts(; do_eval=false, print_ir=false) + global DO_EVAL = do_eval + global PRINT_IR = print_ir +end + +function handle_input(str) + global DO_EVAL, PRINT_IR + if str == "DO_EVAL" + DO_EVAL = true + return + elseif str == "!DO_EVAL" + DO_EVAL = false + return + elseif str == "PRINT_IR" + PRINT_IR = true + return + elseif str == "!PRINT_IR" + PRINT_IR = false + return + end + ex = parseall(SyntaxTree, str; filename="REPL") + eval_ish(Main, ex, DO_EVAL, PRINT_IR) +end + +function init() + ReplMaker.initrepl(handle_input, + valid_input_checker = !is_incomplete, + prompt_text="Lowering> ", + prompt_color = :blue, + start_key=")", + mode_name=:JuliaLowering) +end + +function __init__() + init() +end + +end diff --git a/JuliaLowering/test/runtests.jl b/JuliaLowering/test/runtests.jl new file mode 100644 index 0000000000000..7451ecb5c179f --- /dev/null +++ b/JuliaLowering/test/runtests.jl @@ -0,0 +1,28 @@ +include("utils.jl") + +@testset "JuliaLowering.jl" begin + include("syntax_graph.jl") + + include("ir_tests.jl") + + include("arrays.jl") + include("assignments.jl") + include("branching.jl") + include("closures.jl") + include("decls.jl") + include("destructuring.jl") + include("desugaring.jl") + include("exceptions.jl") + include("functions.jl") + include("generators.jl") + include("import.jl") + include("loops.jl") + @testset "macros" include("macros.jl") + include("misc.jl") + include("modules.jl") + include("quoting.jl") + include("scopes.jl") + include("typedefs.jl") + include("compat.jl") + include("hooks.jl") +end diff --git a/JuliaLowering/test/scopes.jl b/JuliaLowering/test/scopes.jl new file mode 100644 index 0000000000000..e327343eb03e6 --- /dev/null +++ b/JuliaLowering/test/scopes.jl @@ -0,0 +1,80 @@ +@testset "Scopes" begin + +test_mod = Module() + +#------------------------------------------------------------------------------- +# Scopes +@test JuliaLowering.include_string(test_mod, +""" +let + y = 0 + x = 1 + let x = x + 1 + y = x + end + (x, y) +end +""") == (1, 2) + +JuliaLowering.include_string(test_mod, """ +x = 101 +y = 202 +""") +@test test_mod.x == 101 +@test test_mod.y == 202 +@test JuliaLowering.include_string(test_mod, "x + y") == 303 + +@test JuliaLowering.include_string(test_mod, """ +begin + local x = 1 + local x = 2 + let (x,y) = (:x,:y) + (y,x) + end +end +""") === (:y,:x) + +# Types on left hand side of type decls refer to the outer scope +# (In the flisp implementation they refer to the inner scope, but this seems +# like a bug.) +@test JuliaLowering.include_string(test_mod, """ +let x::Int = 10.0 + local Int = Float64 + x +end +""") === 10 + +# Closures in let syntax can only capture values from the outside +# (In the flisp implementation it captures from inner scope, but this is +# inconsistent with let assignment where the rhs refers to the outer scope and +# thus seems like a bug.) +@test JuliaLowering.include_string(test_mod, """ +begin + local y = :outer_y + let f() = y + local y = :inner_y + f() + end +end +""") === :outer_y + +# wrap expression in scope block of `scope_type` +function wrapscope(ex, scope_type) + g = JuliaLowering.ensure_attributes(ex._graph, scope_type=Symbol) + ex = JuliaLowering.reparent(g, ex) + makenode(ex, ex, K"scope_block", ex; scope_type=scope_type) +end + +assign_z_2 = parsestmt(SyntaxTree, "begin z = 2 end", filename="foo.jl") +Base.eval(test_mod, :(z=1)) +@test test_mod.z == 1 +# neutral (eg, for loops) and hard (eg, let) scopes create a new binding for z +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :neutral)) +@test test_mod.z == 1 +JuliaLowering.eval(test_mod, wrapscope(assign_z_2, :hard)) +@test test_mod.z == 1 +# but wrapping neutral scope in soft scope uses the existing binding in test_mod +JuliaLowering.eval(test_mod, wrapscope(wrapscope(assign_z_2, :neutral), :soft)) +@test test_mod.z == 2 + +end diff --git a/JuliaLowering/test/scopes_ir.jl b/JuliaLowering/test/scopes_ir.jl new file mode 100644 index 0000000000000..fc00174f144ff --- /dev/null +++ b/JuliaLowering/test/scopes_ir.jl @@ -0,0 +1,532 @@ +using .JuliaLowering: @islocal +using Base: @locals + +#******************************************************************************* +######################################## +# let syntax with decl in binding list +let x::T = rhs + local T = 1 + T # <- This is a different `T` from the T in `x::T` +end +#--------------------- +1 TestMod.rhs +2 TestMod.T +3 (newvar slot₁/T) +4 (= slot₃/tmp %₁) +5 slot₃/tmp +6 (call core.isa %₅ %₂) +7 (gotoifnot %₆ label₉) +8 (goto label₁₂) +9 slot₃/tmp +10 (call top.convert %₂ %₉) +11 (= slot₃/tmp (call core.typeassert %₁₀ %₂)) +12 slot₃/tmp +13 (= slot₂/x %₁₂) +14 (= slot₁/T 1) +15 slot₁/T +16 (return %₁₅) + +######################################## +# let syntax with tuple on lhs +let (x,y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (call top.indexed_iterate %₁ 1) +3 (= slot₂/x (call core.getfield %₂ 1)) +4 (= slot₁/iterstate (call core.getfield %₂ 2)) +5 slot₁/iterstate +6 (call top.indexed_iterate %₁ 2 %₅) +7 (= slot₃/y (call core.getfield %₆ 1)) +8 (return core.nothing) + +######################################## +# let syntax with named tuple on lhs creates locals for the unpacked vars +let (; x,y) = rhs +end +#--------------------- +1 TestMod.rhs +2 (= slot₁/x (call top.getproperty %₁ :x)) +3 (= slot₂/y (call top.getproperty %₁ :y)) +4 (return core.nothing) + +######################################## +# Let syntax with the same name creates nested bindings +let x = f(x), x = g(x) +end +#--------------------- +1 TestMod.f +2 TestMod.x +3 (call %₁ %₂) +4 (= slot₁/x %₃) +5 TestMod.g +6 slot₁/x +7 (call %₅ %₆) +8 (= slot₂/x %₇) +9 (return core.nothing) + +######################################## +# let syntax with a function definition in the binding list creates a closure +let f() = body +end +#--------------------- +1 (call core.svec) +2 (call core.svec) +3 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₁ %₂) +4 latestworld +5 TestMod.#f##0 +6 (new %₅) +7 (= slot₁/f %₆) +8 TestMod.#f##0 +9 (call core.svec %₈) +10 (call core.svec) +11 SourceLocation::1:5 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read)] + 1 TestMod.body + 2 (return %₁) +14 latestworld +15 (return core.nothing) + +######################################## +# Error: Invalid `let` var with K"::" +let f[]::T = rhs +end +#--------------------- +LoweringError: +let f[]::T = rhs +# └─┘ ── Invalid assignment location in let syntax +end + +######################################## +# Error: Invalid `let` var +let f[] = rhs +end +#--------------------- +LoweringError: +let f[] = rhs +# └─┘ ── Invalid assignment location in let syntax +end + +######################################## +# Error: Invalid function def in `let` +let (obj::Callable)() = rhs +end +#--------------------- +LoweringError: +let (obj::Callable)() = rhs +# └───────────────┘ ── Function signature does not define a local function name +end + +######################################## +# @islocal with locals and undefined vars +let x = 1 + @islocal(a), @islocal(x) +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 (call core.tuple false true) +4 (return %₃) + +######################################## +# @islocal with function arguments +begin + local y = 2 + function f(x) + @islocal(a), @islocal(x), @islocal(y) + end +end +#--------------------- +1 (= slot₁/y (call core.Box)) +2 2 +3 slot₁/y +4 (call core.setfield! %₃ :contents %₂) +5 (method TestMod.f) +6 latestworld +7 TestMod.f +8 (call core.Typeof %₇) +9 (call core.svec %₈ core.Any) +10 (call core.svec) +11 SourceLocation::3:14 +12 (call core.svec %₉ %₁₀ %₁₁) +13 --- method core.nothing %₁₂ + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 (call core.tuple false true true) + 2 (return %₁) +14 latestworld +15 TestMod.f +16 (return %₁₅) + +######################################## +# @islocal with global +begin + global x + @islocal(x) +end +#--------------------- +1 (call core.declare_global TestMod :x false) +2 latestworld +3 (return false) + +######################################## +# @locals with local and global +begin + global x + local y + @locals +end +#--------------------- +1 (newvar slot₁/y) +2 (call core.declare_global TestMod :x false) +3 latestworld +4 (call core.apply_type top.Dict core.Symbol core.Any) +5 (call %₄) +6 (isdefined slot₁/y) +7 (gotoifnot %₆ label₁₀) +8 slot₁/y +9 (call top.setindex! %₅ %₈ :y) +10 (return %₅) + +######################################## +# @locals with function args (TODO: static parameters) +function f(z) + @locals +end +#--------------------- +1 (method TestMod.f) +2 latestworld +3 TestMod.f +4 (call core.Typeof %₃) +5 (call core.svec %₄ core.Any) +6 (call core.svec) +7 SourceLocation::1:10 +8 (call core.svec %₅ %₆ %₇) +9 --- method core.nothing %₈ + slots: [slot₁/#self#(!read) slot₂/z] + 1 (call core.apply_type top.Dict core.Symbol core.Any) + 2 (call %₁) + 3 (gotoifnot true label₅) + 4 (call top.setindex! %₂ slot₂/z :z) + 5 (return %₂) +10 latestworld +11 TestMod.f +12 (return %₁₁) + +######################################## +# Error: Duplicate function argument names +function f(x, x) +end +#--------------------- +LoweringError: +function f(x, x) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Duplicate function argument with destructured arg +function f(x, (x,)) +end +#--------------------- +LoweringError: +function f(x, (x,)) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Static parameter name not unique +function f(::T) where T where T +end +#--------------------- +LoweringError: +function f(::T) where T where T +# ╙ ── function static parameter name not unique +end + +######################################## +# Error: static parameter colliding with argument names +function f(x::x) where x +end +#--------------------- +LoweringError: +function f(x::x) where x +# ╙ ── static parameter name not distinct from function argument +end + +######################################## +# Error: duplicate destructure args +function f((x,), (x,)) +end +#--------------------- +LoweringError: +function f((x,), (x,)) +# ╙ ── function argument name not unique +end + +######################################## +# Error: Conflicting local and global decls +let + local x + global x +end +#--------------------- +LoweringError: +let + local x + global x +# ╙ ── Variable `x` declared both local and global +end + +######################################## +# Error: Conflicting argument and local +function f(x) + local x +end +#--------------------- +LoweringError: +function f(x) + local x +# ╙ ── local variable name `x` conflicts with an argument +end + +######################################## +# Error: Conflicting argument and global +function f(x) + global x +end +#--------------------- +LoweringError: +function f(x) + global x +# ╙ ── global variable name `x` conflicts with an argument +end + +######################################## +# Error: Conflicting destructured argument and global +# TODO: The error could probably be a bit better here +function f((x,)) + global x +end +#--------------------- +LoweringError: +function f((x,)) + global x +# ╙ ── Variable `x` declared both local and global +end + +######################################## +# Error: Conflicting static parameter and local +function f(::T) where T + local T +end +#--------------------- +LoweringError: +function f(::T) where T + local T +# ╙ ── local variable name `T` conflicts with a static parameter +end + +######################################## +# Error: Conflicting static parameter and global +function f(::T) where T + global T +end +#--------------------- +LoweringError: +function f(::T) where T + global T +# ╙ ── global variable name `T` conflicts with a static parameter +end + +######################################## +# Error: Conflicting static parameter and local in nested scope +function f(::T) where T + let + local T + end +end +#--------------------- +LoweringError: +function f(::T) where T + let + local T +# ╙ ── local variable name `T` conflicts with a static parameter + end +end + +######################################## +# Error: Conflicting static parameter and global in nested scope +function f(::T) where T + let + global T + end +end +#--------------------- +LoweringError: +function f(::T) where T + let + global T +# ╙ ── global variable name `T` conflicts with a static parameter + end +end + +######################################## +# Error: Conflicting static parameter and implicit local +function f(::T) where T + let + T = rhs + end +end +#--------------------- +LoweringError: +function f(::T) where T + let + T = rhs +# ╙ ── local variable name `T` conflicts with a static parameter + end +end + +######################################## +# Error: Attempt to add methods to a function argument +function f(g) + function g() + end +end +#--------------------- +LoweringError: +function f(g) + function g() +# ╙ ── Cannot add method to a function argument + end +end + +######################################## +# Error: Global method definition inside function scope +function f() + global global_method + function global_method() + end +end +#--------------------- +LoweringError: +function f() + global global_method + function global_method() +# └───────────┘ ── Global method definition needs to be placed at the top level, or use `eval()` + end +end + +######################################## +# @isdefined with defined variables +let x = 1 + @isdefined x + @isdefined y +end +#--------------------- +1 1 +2 (= slot₁/x %₁) +3 (call core.isdefinedglobal TestMod :y false) +4 (return %₃) + +######################################## +# Global function defined inside let (let over lambda) +let x = 1 + global f(y) = x = y + global g() = x +end +#--------------------- +1 1 +2 (= slot₁/x (call core.Box)) +3 slot₁/x +4 (call core.setfield! %₃ :contents %₁) +5 (call core.declare_global TestMod :f false) +6 latestworld +7 (method TestMod.f) +8 latestworld +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀ core.Any) +12 (call core.svec) +13 SourceLocation::2:12 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- code_info + slots: [slot₁/#self#(!read) slot₂/y] + 1 slot₂/y + 2 (captured_local 1) + 3 (call core.setfield! %₂ :contents %₁) + 4 (return %₁) +16 slot₁/x +17 (call core.svec %₁₆) +18 (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇) +19 --- method core.nothing %₁₄ %₁₈ +20 latestworld +21 (call core.declare_global TestMod :g false) +22 latestworld +23 (method TestMod.g) +24 latestworld +25 TestMod.g +26 (call core.Typeof %₂₅) +27 (call core.svec %₂₆) +28 (call core.svec) +29 SourceLocation::3:12 +30 (call core.svec %₂₇ %₂₈ %₂₉) +31 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 (captured_local 1) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/x) + 6 slot₂/x + 7 (call core.getfield %₁ :contents) + 8 (return %₇) +32 slot₁/x +33 (call core.svec %₃₂) +34 (call JuliaLowering.replace_captured_locals! %₃₁ %₃₃) +35 --- method core.nothing %₃₀ %₃₄ +36 latestworld +37 TestMod.g +38 (return %₃₇) + +######################################## +# Modify assignment operator on closure variable +let x = 1 + global f() = x += 1 +end +#--------------------- +1 1 +2 (= slot₁/x (call core.Box)) +3 slot₁/x +4 (call core.setfield! %₃ :contents %₁) +5 (call core.declare_global TestMod :f false) +6 latestworld +7 (method TestMod.f) +8 latestworld +9 TestMod.f +10 (call core.Typeof %₉) +11 (call core.svec %₁₀) +12 (call core.svec) +13 SourceLocation::2:12 +14 (call core.svec %₁₁ %₁₂ %₁₃) +15 --- code_info + slots: [slot₁/#self#(!read) slot₂/x(!read)] + 1 TestMod.+ + 2 (captured_local 1) + 3 (call core.isdefined %₂ :contents) + 4 (gotoifnot %₃ label₆) + 5 (goto label₈) + 6 (newvar slot₂/x) + 7 slot₂/x + 8 (call core.getfield %₂ :contents) + 9 (call %₁ %₈ 1) + 10 (captured_local 1) + 11 (call core.setfield! %₁₀ :contents %₉) + 12 (return %₉) +16 slot₁/x +17 (call core.svec %₁₆) +18 (call JuliaLowering.replace_captured_locals! %₁₅ %₁₇) +19 --- method core.nothing %₁₄ %₁₈ +20 latestworld +21 TestMod.f +22 (return %₂₁) diff --git a/JuliaLowering/test/syntax_graph.jl b/JuliaLowering/test/syntax_graph.jl new file mode 100644 index 0000000000000..60fd10dde0c20 --- /dev/null +++ b/JuliaLowering/test/syntax_graph.jl @@ -0,0 +1,110 @@ +@testset "SyntaxGraph attrs" begin + st = parsestmt(SyntaxTree, "function foo end") + g_init = JuliaLowering.unfreeze_attrs(st._graph) + gf1 = JuliaLowering.freeze_attrs(g_init) + gu1 = JuliaLowering.unfreeze_attrs(gf1) + + # Check that freeze/unfreeze do their jobs + @test gf1.attributes isa NamedTuple + @test gu1.attributes isa Dict + @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes)) + + # ensure_attributes + gf2 = JuliaLowering.ensure_attributes(gf1, test_attr=Symbol, foo=Type) + gu2 = JuliaLowering.ensure_attributes(gu1, test_attr=Symbol, foo=Type) + # returns a graph with the same attribute storage + @test gf2.attributes isa NamedTuple + @test gu2.attributes isa Dict + # does its job + @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2) + @test (:foo=>Type) in JuliaLowering.attrdefs(gf2) + @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes)) + # no mutation + @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf1)) + @test !((:foo=>Type) in JuliaLowering.attrdefs(gf1)) + @test Set(keys(gf1.attributes)) == Set(keys(gu1.attributes)) + + # delete_attributes + gf3 = JuliaLowering.delete_attributes(gf2, :test_attr, :foo) + gu3 = JuliaLowering.delete_attributes(gu2, :test_attr, :foo) + # returns a graph with the same attribute storage + @test gf3.attributes isa NamedTuple + @test gu3.attributes isa Dict + # does its job + @test !((:test_attr=>Symbol) in JuliaLowering.attrdefs(gf3)) + @test !((:foo=>Type) in JuliaLowering.attrdefs(gf3)) + @test Set(keys(gf3.attributes)) == Set(keys(gu3.attributes)) + # no mutation + @test (:test_attr=>Symbol) in JuliaLowering.attrdefs(gf2) + @test (:foo=>Type) in JuliaLowering.attrdefs(gf2) + @test Set(keys(gf2.attributes)) == Set(keys(gu2.attributes)) +end + +@testset "SyntaxTree" begin + # Expr conversion + @test Expr(parsestmt(SyntaxTree, "begin a + b ; c end", filename="none")) == + Meta.parse("begin a + b ; c end") + + tree1 = JuliaLowering.@SyntaxTree :(some_unique_identifier) + @test tree1 isa SyntaxTree + @test kind(tree1) == K"Identifier" + @test tree1.name_val == "some_unique_identifier" + + tree2 = JuliaLowering.@SyntaxTree quote + x + $tree1 + end + @test tree2 isa SyntaxTree + @test kind(tree2) == K"block" + @test kind(tree2[1]) == K"Identifier" && tree2[1].name_val == "x" + @test kind(tree2[2]) == K"Identifier" && tree2[2].name_val == "some_unique_identifier" + + "For filling required attrs in graphs created by hand" + function testgraph(edge_ranges, edges, more_attrs...) + kinds = Dict(map(i->(i=>K"block"), eachindex(edge_ranges))) + sources = Dict(map(i->(i=>LineNumberNode(i)), eachindex(edge_ranges))) + SyntaxGraph( + edge_ranges, + edges, + Dict(:kind => kinds, :source => sources, more_attrs...)) + end + + @testset "copy_ast" begin + # 1 --> 2 --> 3 src(7-9) = line 7-9 + # 4 --> 5 --> 6 src(i) = i + 3 + # 7 --> 8 --> 9 + g = testgraph([1:1, 2:2, 0:-1, 3:3, 4:4, 0:-1, 5:5, 6:6, 0:-1], + [2, 3, 5, 6, 8, 9], + :source => Dict(enumerate([ + map(i->i+3, 1:6)... + map(LineNumberNode, 7:9)...]))) + st = SyntaxTree(g, 1) + stcopy = JuliaLowering.copy_ast(g, st) + # Each node should be copied once + @test length(g.edge_ranges) === 18 + @test st._id != stcopy._id + @test st ≈ stcopy + @test st.source !== stcopy.source + @test st.source[1] !== stcopy.source[1] + @test st.source[1][1] !== stcopy.source[1][1] + + stcopy2 = JuliaLowering.copy_ast(g, st; copy_source=false) + # Only nodes 1-3 should be copied + @test length(g.edge_ranges) === 21 + @test st._id != stcopy2._id + @test st ≈ stcopy2 + @test st.source === stcopy2.source + @test st.source[1] === stcopy2.source[1] + @test st.source[1][1] === stcopy2.source[1][1] + + # Copy into a new graph + new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...) + stcopy3 = JuliaLowering.copy_ast(new_g, st) + @test length(new_g.edge_ranges) === 9 + @test st ≈ stcopy3 + + new_g = ensure_attributes!(SyntaxGraph(); JuliaLowering.attrdefs(g)...) + # Disallow for now, since we can't prevent dangling sourcerefs + @test_throws ErrorException JuliaLowering.copy_ast(new_g, st; copy_source=false) + end +end diff --git a/JuliaLowering/test/typedefs.jl b/JuliaLowering/test/typedefs.jl new file mode 100644 index 0000000000000..be509eeb88156 --- /dev/null +++ b/JuliaLowering/test/typedefs.jl @@ -0,0 +1,271 @@ +@testset "Type definitions" begin + +test_mod = Module(:TestMod) + +Base.eval(test_mod, :(struct XX{S,T,U,W} end)) + +@test JuliaLowering.include_string(test_mod, """ +XX{Int, <:Integer, Float64, >:AbstractChar} +""") == (test_mod.XX{Int, T, Float64, S} where {T <: Integer, S >: AbstractChar}) + +@test JuliaLowering.include_string(test_mod, """ +abstract type A end +""") === nothing +@test supertype(test_mod.A) === Any +@test isabstracttype(test_mod.A) + +@test JuliaLowering.include_string(test_mod, """ +abstract type B <: A end +""") === nothing +@test supertype(test_mod.B) === test_mod.A + +@test JuliaLowering.include_string(test_mod, """ +abstract type C{X} end +""") === nothing + +@test JuliaLowering.include_string(test_mod, """ +abstract type D{X<:A} end +""") === nothing +@test test_mod.D{test_mod.B} isa Type +@test_throws Exception test_mod.D{Int} + +@test JuliaLowering.include_string(test_mod, """ +abstract type E <: C{E} end +""") === nothing +@test test_mod.E isa Type + +@test JuliaLowering.include_string(test_mod, """ +primitive type P <: A 16 end +""") === nothing +@test isconcretetype(test_mod.P) +@test supertype(test_mod.P) === test_mod.A +@test reinterpret(test_mod.P, 0x0001) isa test_mod.P +@test reinterpret(UInt16, reinterpret(test_mod.P, 0x1337)) === 0x1337 + +@test JuliaLowering.include_string(test_mod, """ +struct S1{X,Y} <: A + x::X + y::Y + z +end +""") === nothing +@test !isconcretetype(test_mod.S1) +@test fieldnames(test_mod.S1) == (:x, :y, :z) +@test fieldtypes(test_mod.S1) == (Any, Any, Any) +@test isconcretetype(test_mod.S1{Int,String}) +@test fieldtypes(test_mod.S1{Int,String}) == (Int, String, Any) +@test supertype(test_mod.S1) == test_mod.A + +# Inner constructors: one field non-Any +@test JuliaLowering.include_string(test_mod, """ +struct S2 + x::Int + y +end +""") === nothing +@test length(methods(test_mod.S2)) == 2 +let s = test_mod.S2(42, "hi") + # exact types + @test s.x === 42 + @test s.y == "hi" +end +let s = test_mod.S2(42.0, "hi") + # converted types + @test s.x === 42 + @test s.y == "hi" +end + +# Constructors: All fields Any +@test JuliaLowering.include_string(test_mod, """ +struct S3 + x + y +end +""") === nothing +@test length(methods(test_mod.S3)) == 1 +let s = test_mod.S3(42, "hi") + @test s.x === 42 + @test s.y == "hi" +end + +# Inner constructors: All fields Any; dynamically tested against whatever +# S4_Field resolves to +@test JuliaLowering.include_string(test_mod, """ +S4_Field = Any # actually Any! + +struct S4 + x::S4_Field + y +end +""") === nothing +@test length(methods(test_mod.S4)) == 1 +let s = test_mod.S4(42, "hi") + @test s.x === 42 + @test s.y == "hi" +end + +# Inner & outer constructors; parameterized types +@test JuliaLowering.include_string(test_mod, """ +struct S5{U} + x::U + y +end +""") === nothing +@test length(methods(test_mod.S5)) == 1 +let s = test_mod.S5(42, "hi") + @test s isa test_mod.S5{Int} + @test s.x === 42 + @test s.y == "hi" +end +@test length(methods(test_mod.S5{Int})) == 1 +let s = test_mod.S5{Int}(42.0, "hi") + @test s isa test_mod.S5{Int} + @test s.x === 42 + @test s.y == "hi" +end +let s = test_mod.S5{Any}(42.0, "hi") + @test s isa test_mod.S5{Any} + @test s.x === 42.0 + @test s.y == "hi" +end + +# User defined inner constructors and helper functions for structs without type params +@test JuliaLowering.include_string(test_mod, """ +struct S6 + x + S6_f() = new(42) + + "some docs" + S6() = S6_f() + S6(x) = new(x) +end +""") === nothing +let s = test_mod.S6() + @test s isa test_mod.S6 + @test s.x === 42 +end +let s = test_mod.S6(2) + @test s isa test_mod.S6 + @test s.x === 2 +end +@test docstrings_equal(@doc(test_mod.S6), Markdown.doc"some docs") + +# User defined inner constructors and helper functions for structs with type params +@test JuliaLowering.include_string(test_mod, """ +struct S7{S,T} + x::S + y + + # Cases where full struct type may be deduced and used in body + S7{Int,String}() = new(10.0, "y1") + S7{S,T}() where {S,T} = new(10.0, "y2") + S7{Int,T}() where {T} = new(10.0, "y3") + (::Type{S7{Int,UInt8}})() = new{Int,UInt8}(10.0, "y4") + + # Cases where new{...} is called + S7() = new{Int,Int}(10.0, "y5") + S7{UInt8}() = S7_f() + S7_f() = new{UInt8,UInt8}(10.0, "y6") +end +""") === nothing +let s = test_mod.S7{Int,String}() + @test s isa test_mod.S7{Int,String} + @test s.x === 10 + @test s.y === "y1" +end +let s = test_mod.S7{UInt16,UInt16}() + @test s isa test_mod.S7{UInt16,UInt16} + @test s.x === UInt16(10) + @test s.y === "y2" +end +let s = test_mod.S7{Int,UInt16}() + @test s isa test_mod.S7{Int,UInt16} + @test s.x === 10 + @test s.y === "y3" +end +let s = test_mod.S7{Int,UInt8}() + @test s isa test_mod.S7{Int,UInt8} + @test s.x === 10 + @test s.y === "y4" +end +let s = test_mod.S7() + @test s isa test_mod.S7{Int,Int} + @test s.x === 10 + @test s.y === "y5" +end +let s = test_mod.S7{UInt8}() + @test s isa test_mod.S7{UInt8,UInt8} + @test s.x === UInt8(10) + @test s.y === "y6" +end + +# new() with splats and typed fields +@test JuliaLowering.include_string(test_mod, """ +struct S8 + x::Int + y::Float64 + + S8(xs, ys) = new(xs..., ys...) +end +""") === nothing +let s = test_mod.S8((10.0,), (20,)) + @test s isa test_mod.S8 + @test s.x === 10 + @test s.y === 20.0 +end +# Wrong number of args checked by lowering +@test_throws ArgumentError test_mod.S8((1,), ()) +@test_throws ArgumentError test_mod.S8((1,2,3), ()) + +# new() with splats and untyped fields +@test JuliaLowering.include_string(test_mod, """ +struct S9 + x + y + + S9(xs) = new(xs...) +end +""") === nothing +let s = test_mod.S9((10.0,20)) + @test s isa test_mod.S9 + @test s.x === 10.0 + @test s.y === 20 +end +# Wrong number of args checked by the runtime +@test_throws ArgumentError test_mod.S9((1,)) +@test_throws ArgumentError test_mod.S9((1,2,3)) + +# Test cases from +# https://github.com/JuliaLang/julia/issues/36104 +# https://github.com/JuliaLang/julia/pull/36121 +JuliaLowering.include_string(test_mod, """ +# issue #36104 +module M36104 +struct T36104 + v::Vector{M36104.T36104} +end +struct T36104 # check that redefining it works, issue #21816 + v::Vector{T36104} +end +end +""") +@test fieldtypes(test_mod.M36104.T36104) == (Vector{test_mod.M36104.T36104},) +@test_throws ErrorException("expected") JuliaLowering.include_string(test_mod, """struct X36104; x::error("expected"); end""") +@test !isdefined(test_mod, :X36104) +JuliaLowering.include_string(test_mod, "struct X36104; x::Int; end") +@test fieldtypes(test_mod.X36104) == (Int,) +JuliaLowering.include_string(test_mod, "primitive type P36104 8 end") +JuliaLowering.include_string(test_mod, "const orig_P36104 = P36104") +JuliaLowering.include_string(test_mod, "primitive type P36104 16 end") +@test test_mod.P36104 !== test_mod.orig_P36104 + +# Struct with outer constructor where one typevar is constrained by the other +# See https://github.com/JuliaLang/julia/issues/27269) +@test JuliaLowering.include_string(test_mod, """ +struct X27269{T, S <: Vector{T}} + v::Vector{S} +end +""") === nothing +@test test_mod.X27269([[1,2]]) isa test_mod.X27269{Int, Vector{Int}} + +end diff --git a/JuliaLowering/test/typedefs_ir.jl b/JuliaLowering/test/typedefs_ir.jl new file mode 100644 index 0000000000000..260e3c2f4bfab --- /dev/null +++ b/JuliaLowering/test/typedefs_ir.jl @@ -0,0 +1,1348 @@ +######################################## +# where expression without type bounds +A where X +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 TestMod.A +5 (call core.UnionAll %₃ %₄) +6 (return %₅) + +######################################## +# where expression with upper bound +A where X <: UB +#--------------------- +1 TestMod.UB +2 (call core.TypeVar :X %₁) +3 (= slot₁/X %₂) +4 slot₁/X +5 TestMod.A +6 (call core.UnionAll %₄ %₅) +7 (return %₆) + +######################################## +# where expression with lower bound +A where X >: LB +#--------------------- +1 TestMod.LB +2 (call core.TypeVar :X %₁ core.Any) +3 (= slot₁/X %₂) +4 slot₁/X +5 TestMod.A +6 (call core.UnionAll %₄ %₅) +7 (return %₆) + +######################################## +# where expression with both bounds +A where LB <: X <: UB +#--------------------- +1 TestMod.LB +2 TestMod.UB +3 (call core.TypeVar :X %₁ %₂) +4 (= slot₁/X %₃) +5 slot₁/X +6 TestMod.A +7 (call core.UnionAll %₅ %₆) +8 (return %₇) + +######################################## +# where expression with braces +A where {X, Y<:X} +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 slot₁/X +5 (call core.TypeVar :Y %₄) +6 (= slot₂/Y %₅) +7 slot₂/Y +8 TestMod.A +9 (call core.UnionAll %₇ %₈) +10 (call core.UnionAll %₃ %₉) +11 (return %₁₀) + +######################################## +# Equivalent nested where expression without braces +A where Y<:X where X +#--------------------- +1 (call core.TypeVar :X) +2 (= slot₁/X %₁) +3 slot₁/X +4 slot₁/X +5 (call core.TypeVar :Y %₄) +6 (= slot₂/Y %₅) +7 slot₂/Y +8 TestMod.A +9 (call core.UnionAll %₇ %₈) +10 (call core.UnionAll %₃ %₉) +11 (return %₁₀) + +######################################## +# Error: bad type bounds +A where f() +#--------------------- +LoweringError: +A where f() +# └─┘ ── expected type name or type bounds + +######################################## +# Error: bad type bounds +A where X < Y < Z +#--------------------- +LoweringError: +A where X < Y < Z +# └───────┘ ── invalid type bounds + +######################################## +# Error: bad type bounds +A where X <: f() <: Z +#--------------------- +LoweringError: +A where X <: f() <: Z +# └─┘ ── expected type name + +######################################## +# Error: bad type bounds +A where f() <: Y +#--------------------- +LoweringError: +A where f() <: Y +# └─┘ ── expected type name + +######################################## +# Error: bad type bounds +A where Y >: f() +#--------------------- +LoweringError: +A where Y >: f() +# └─┘ ── expected type name + +######################################## +# Simple type application +X{A,B,C} +#--------------------- +1 TestMod.X +2 TestMod.A +3 TestMod.B +4 TestMod.C +5 (call core.apply_type %₁ %₂ %₃ %₄) +6 (return %₅) + +######################################## +# Type with implicit where param upper bound +X{<:A} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁) +3 TestMod.X +4 (call core.apply_type %₃ %₂) +5 (call core.UnionAll %₂ %₄) +6 (return %₅) + +######################################## +# Type with implicit where param lower bound +X{>:A} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁ core.Any) +3 TestMod.X +4 (call core.apply_type %₃ %₂) +5 (call core.UnionAll %₂ %₄) +6 (return %₅) + +######################################## +# Type with several implicit where params +X{S, <:A, T, >:B} +#--------------------- +1 TestMod.A +2 (call core.TypeVar :#T1 %₁) +3 TestMod.B +4 (call core.TypeVar :#T2 %₃ core.Any) +5 TestMod.X +6 TestMod.S +7 TestMod.T +8 (call core.apply_type %₅ %₆ %₂ %₇ %₄) +9 (call core.UnionAll %₄ %₈) +10 (call core.UnionAll %₂ %₉) +11 (return %₁₀) + +######################################## +# Error: parameters in type application +X{S, T; W} +#--------------------- +LoweringError: +X{S, T; W} +# └─┘ ── unexpected semicolon in type parameter list + +######################################## +# Error: assignment in type application +X{S, T=w} +#--------------------- +LoweringError: +X{S, T=w} +# └──┘ ── misplace assignment in type parameter list + +######################################## +# Simple abstract type definition +abstract type A end +#--------------------- +1 (call core.svec) +2 (call core._abstracttype TestMod :A %₁) +3 (= slot₁/A %₂) +4 (call core._setsuper! %₂ core.Any) +5 slot₁/A +6 (call core._typebody! false %₅) +7 (call core.declare_global TestMod :A false) +8 latestworld +9 (call core.isdefinedglobal TestMod :A false) +10 (gotoifnot %₉ label₁₅) +11 TestMod.A +12 (call core._equiv_typedef %₁₁ %₂) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₇) +15 (call core.declare_const TestMod :A %₂) +16 latestworld +17 (return core.nothing) + +######################################## +# Abstract type definition with supertype +abstract type A <: B end +#--------------------- +1 (call core.svec) +2 (call core._abstracttype TestMod :A %₁) +3 (= slot₁/A %₂) +4 TestMod.B +5 (call core._setsuper! %₂ %₄) +6 slot₁/A +7 (call core._typebody! false %₆) +8 (call core.declare_global TestMod :A false) +9 latestworld +10 (call core.isdefinedglobal TestMod :A false) +11 (gotoifnot %₁₀ label₁₆) +12 TestMod.A +13 (call core._equiv_typedef %₁₂ %₂) +14 (gotoifnot %₁₃ label₁₆) +15 (goto label₁₈) +16 (call core.declare_const TestMod :A %₂) +17 latestworld +18 (return core.nothing) + +######################################## +# Abstract type definition with multiple typevars +abstract type A{X, Y <: X} end +#--------------------- +1 (= slot₂/X (call core.TypeVar :X)) +2 slot₂/X +3 (= slot₃/Y (call core.TypeVar :Y %₂)) +4 slot₂/X +5 slot₃/Y +6 (call core.svec %₄ %₅) +7 (call core._abstracttype TestMod :A %₆) +8 (= slot₁/A %₇) +9 (call core._setsuper! %₇ core.Any) +10 slot₁/A +11 (call core._typebody! false %₁₀) +12 (call core.declare_global TestMod :A false) +13 latestworld +14 (call core.isdefinedglobal TestMod :A false) +15 (gotoifnot %₁₄ label₂₀) +16 TestMod.A +17 (call core._equiv_typedef %₁₆ %₇) +18 (gotoifnot %₁₇ label₂₀) +19 (goto label₂₂) +20 (call core.declare_const TestMod :A %₇) +21 latestworld +22 (return core.nothing) + +######################################## +# Error: Abstract type definition with bad signature +abstract type A() end +#--------------------- +LoweringError: +abstract type A() end +# └─┘ ── invalid type signature + +######################################## +# Error: Abstract type definition with bad signature +abstract type A(){T} end +#--------------------- +LoweringError: +abstract type A(){T} end +# └────┘ ── invalid type signature + +######################################## +# Error: Abstract type definition with bad signature +abstract type A() <: B end +#--------------------- +LoweringError: +abstract type A() <: B end +# └───────┘ ── invalid type signature + +######################################## +# Error: Abstract type definition in function scope +function f() + abstract type A end +end +#--------------------- +LoweringError: +function f() + abstract type A end +# └─────────────────┘ ── this syntax is only allowed in top level code +end + +######################################## +# Simple primitive type definition +primitive type P 8 end +#--------------------- +1 (call core.svec) +2 (call core._primitivetype TestMod :P %₁ 8) +3 (= slot₁/P %₂) +4 (call core._setsuper! %₂ core.Any) +5 slot₁/P +6 (call core._typebody! false %₅) +7 (call core.declare_global TestMod :P false) +8 latestworld +9 (call core.isdefinedglobal TestMod :P false) +10 (gotoifnot %₉ label₁₅) +11 TestMod.P +12 (call core._equiv_typedef %₁₁ %₂) +13 (gotoifnot %₁₂ label₁₅) +14 (goto label₁₇) +15 (call core.declare_const TestMod :P %₂) +16 latestworld +17 (return core.nothing) + +######################################## +# Complex primitive type definition +primitive type P{X,Y} <: Z 32 end +#--------------------- +1 (= slot₂/X (call core.TypeVar :X)) +2 (= slot₃/Y (call core.TypeVar :Y)) +3 slot₂/X +4 slot₃/Y +5 (call core.svec %₃ %₄) +6 (call core._primitivetype TestMod :P %₅ 32) +7 (= slot₁/P %₆) +8 TestMod.Z +9 (call core._setsuper! %₆ %₈) +10 slot₁/P +11 (call core._typebody! false %₁₀) +12 (call core.declare_global TestMod :P false) +13 latestworld +14 (call core.isdefinedglobal TestMod :P false) +15 (gotoifnot %₁₄ label₂₀) +16 TestMod.P +17 (call core._equiv_typedef %₁₆ %₆) +18 (gotoifnot %₁₇ label₂₀) +19 (goto label₂₂) +20 (call core.declare_const TestMod :P %₆) +21 latestworld +22 (return core.nothing) + +######################################## +# Primitive type definition with computed size (should this be allowed??) +primitive type P P_nbits() end +#--------------------- +1 (call core.svec) +2 TestMod.P_nbits +3 (call %₂) +4 (call core._primitivetype TestMod :P %₁ %₃) +5 (= slot₁/P %₄) +6 (call core._setsuper! %₄ core.Any) +7 slot₁/P +8 (call core._typebody! false %₇) +9 (call core.declare_global TestMod :P false) +10 latestworld +11 (call core.isdefinedglobal TestMod :P false) +12 (gotoifnot %₁₁ label₁₇) +13 TestMod.P +14 (call core._equiv_typedef %₁₃ %₄) +15 (gotoifnot %₁₄ label₁₇) +16 (goto label₁₉) +17 (call core.declare_const TestMod :P %₄) +18 latestworld +19 (return core.nothing) + +######################################## +# Empty struct +struct X +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 0) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (call core.declare_const TestMod :X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::1:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read)] + 1 TestMod.X + 2 (new %₁) + 3 (return %₂) +34 latestworld +35 (return core.nothing) + +######################################## +# Empty struct with empty ctor +struct X + X() = new() +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 0) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (call core.declare_const TestMod :X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈) +30 (call core.svec) +31 SourceLocation::2:5 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#ctor-self#] + 1 slot₁/#ctor-self# + 2 (new %₁) + 3 (return %₂) +34 latestworld +35 (return core.nothing) + +######################################## +# Basic struct +struct X + a + b::T + c +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec :a :b :c) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 TestMod.T +24 (call core.svec core.Any %₂₃ core.Any) +25 (call core._typebody! %₂₁ %₆ %₂₄) +26 (call core.declare_const TestMod :X %₂₅) +27 latestworld +28 TestMod.T +29 (call core.=== core.Any %₂₈) +30 (gotoifnot %₂₉ label₃₂) +31 (goto label₄₀) +32 TestMod.X +33 (call core.apply_type core.Type %₃₂) +34 (call core.svec %₃₃ core.Any core.Any core.Any) +35 (call core.svec) +36 SourceLocation::1:1 +37 (call core.svec %₃₄ %₃₅ %₃₆) +38 --- method core.nothing %₃₇ + slots: [slot₁/#ctor-self# slot₂/a slot₃/b slot₄/c slot₅/tmp] + 1 (call core.fieldtype slot₁/#ctor-self# 2) + 2 slot₃/b + 3 (= slot₅/tmp %₂) + 4 slot₅/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₅/tmp + 9 (= slot₅/tmp (call top.convert %₁ %₈)) + 10 slot₅/tmp + 11 (new slot₁/#ctor-self# slot₂/a %₁₀ slot₄/c) + 12 (return %₁₁) +39 latestworld +40 TestMod.X +41 (call core.apply_type core.Type %₄₀) +42 TestMod.T +43 (call core.svec %₄₁ core.Any %₄₂ core.Any) +44 (call core.svec) +45 SourceLocation::1:1 +46 (call core.svec %₄₃ %₄₄ %₄₅) +47 --- method core.nothing %₄₆ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b slot₄/c) + 3 (return %₂) +48 latestworld +49 (return core.nothing) + +######################################## +# Struct with supertype and type params +struct X{U, S <: V <: T} <: Z +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (= slot₂/U (call core.TypeVar :U)) +4 TestMod.S +5 TestMod.T +6 (= slot₃/V (call core.TypeVar :V %₄ %₅)) +7 slot₂/U +8 slot₃/V +9 (call core.svec %₇ %₈) +10 (call core.svec) +11 (call core.svec) +12 (call core._structtype TestMod :X %₉ %₁₀ %₁₁ false 0) +13 (= slot₄/X %₁₂) +14 TestMod.Z +15 (call core._setsuper! %₁₂ %₁₄) +16 (call core.isdefinedglobal TestMod :X false) +17 (gotoifnot %₁₆ label₂₁) +18 TestMod.X +19 (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₂)) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₂₂ label₂₇) +24 TestMod.X +25 (= slot₆/if_val %₂₄) +26 (goto label₂₈) +27 (= slot₆/if_val false) +28 slot₆/if_val +29 (gotoifnot %₂₂ label₄₀) +30 TestMod.X +31 (call top.getproperty %₃₀ :body) +32 (call top.getproperty %₃₁ :body) +33 (call top.getproperty %₃₂ :parameters) +34 (call top.indexed_iterate %₃₃ 1) +35 (= slot₂/U (call core.getfield %₃₄ 1)) +36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) +37 slot₁/iterstate +38 (call top.indexed_iterate %₃₃ 2 %₃₇) +39 (= slot₃/V (call core.getfield %₃₈ 1)) +40 (call core.svec) +41 (call core._typebody! %₂₈ %₁₂ %₄₀) +42 (call core.declare_const TestMod :X %₄₁) +43 latestworld +44 slot₂/U +45 slot₃/V +46 TestMod.X +47 slot₂/U +48 slot₃/V +49 (call core.apply_type %₄₆ %₄₇ %₄₈) +50 (call core.apply_type core.Type %₄₉) +51 (call core.UnionAll %₄₅ %₅₀) +52 (call core.UnionAll %₄₄ %₅₁) +53 (call core.svec %₅₂) +54 (call core.svec) +55 SourceLocation::1:1 +56 (call core.svec %₅₃ %₅₄ %₅₅) +57 --- method core.nothing %₅₆ + slots: [slot₁/#ctor-self#] + 1 (new slot₁/#ctor-self#) + 2 (return %₁) +58 latestworld +59 (return core.nothing) + +######################################## +# Struct with const and atomic fields +struct X + const a + @atomic b + const @atomic c +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec :a :b :c) +5 (call core.svec 1 :const 2 :atomic 3 :atomic 3 :const) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 3) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec core.Any core.Any core.Any) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (call core.declare_const TestMod :X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any core.Any core.Any) +30 (call core.svec) +31 SourceLocation::1:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b slot₄/c] + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b slot₄/c) + 3 (return %₂) +34 latestworld +35 (return core.nothing) + +######################################## +# Documented struct +""" +X docs +""" +struct X + "field a docs" + a + "field b docs" + b +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec :a :b) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec core.Any core.Any) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (call core.declare_const TestMod :X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any core.Any) +30 (call core.svec) +31 SourceLocation::4:1 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#self#(!read) slot₂/a slot₃/b] + 1 TestMod.X + 2 (new %₁ slot₂/a slot₃/b) + 3 (return %₂) +34 latestworld +35 JuliaLowering.bind_docs! +36 (call core.tuple :field_docs) +37 (call core.apply_type core.NamedTuple %₃₆) +38 (call core.svec 1 "field a docs" 2 "field b docs") +39 (call core.tuple %₃₈) +40 (call %₃₇ %₃₉) +41 TestMod.X +42 SourceLocation::4:1 +43 (call core.kwcall %₄₀ %₃₅ %₄₁ "X docs\n" %₄₂) +44 (return core.nothing) + +######################################## +# Struct with outer constructor +struct X{U} + x::U +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (= slot₁/U (call core.TypeVar :U)) +4 slot₁/U +5 (call core.svec %₄) +6 (call core.svec :x) +7 (call core.svec) +8 (call core._structtype TestMod :X %₅ %₆ %₇ false 1) +9 (= slot₂/X %₈) +10 (call core._setsuper! %₈ core.Any) +11 (call core.isdefinedglobal TestMod :X false) +12 (gotoifnot %₁₁ label₁₆) +13 TestMod.X +14 (= slot₃/if_val (call core._equiv_typedef %₁₃ %₈)) +15 (goto label₁₇) +16 (= slot₃/if_val false) +17 slot₃/if_val +18 (gotoifnot %₁₇ label₂₂) +19 TestMod.X +20 (= slot₄/if_val %₁₉) +21 (goto label₂₃) +22 (= slot₄/if_val false) +23 slot₄/if_val +24 (gotoifnot %₁₇ label₃₀) +25 TestMod.X +26 (call top.getproperty %₂₅ :body) +27 (call top.getproperty %₂₆ :parameters) +28 (call top.indexed_iterate %₂₇ 1) +29 (= slot₁/U (call core.getfield %₂₈ 1)) +30 slot₁/U +31 (call core.svec %₃₀) +32 (call core._typebody! %₂₃ %₈ %₃₁) +33 (call core.declare_const TestMod :X %₃₂) +34 latestworld +35 slot₁/U +36 TestMod.X +37 slot₁/U +38 (call core.apply_type %₃₆ %₃₇) +39 (call core.apply_type core.Type %₃₈) +40 (call core.UnionAll %₃₅ %₃₉) +41 (call core.svec %₄₀ core.Any) +42 (call core.svec) +43 SourceLocation::1:1 +44 (call core.svec %₄₁ %₄₂ %₄₃) +45 --- method core.nothing %₄₄ + slots: [slot₁/#ctor-self# slot₂/x slot₃/tmp] + 1 (call core.fieldtype slot₁/#ctor-self# 1) + 2 slot₂/x + 3 (= slot₃/tmp %₂) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₃/tmp + 9 (= slot₃/tmp (call top.convert %₁ %₈)) + 10 slot₃/tmp + 11 (new slot₁/#ctor-self# %₁₀) + 12 (return %₁₁) +46 latestworld +47 TestMod.X +48 (call core.apply_type core.Type %₄₇) +49 slot₁/U +50 (call core.svec %₄₈ %₄₉) +51 slot₁/U +52 (call core.svec %₅₁) +53 SourceLocation::1:1 +54 (call core.svec %₅₀ %₅₂ %₅₃) +55 --- method core.nothing %₅₄ + slots: [slot₁/#self#(!read) slot₂/x] + 1 TestMod.X + 2 static_parameter₁ + 3 (call core.apply_type %₁ %₂) + 4 (new %₃ slot₂/x) + 5 (return %₄) +56 latestworld +57 (return core.nothing) + +######################################## +# Struct with outer constructor where one typevar is constrained by the other +# See https://github.com/JuliaLang/julia/issues/27269) +struct X{T, S <: Vector{T}} + v::Vector{S} +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (= slot₃/T (call core.TypeVar :T)) +4 TestMod.Vector +5 slot₃/T +6 (call core.apply_type %₄ %₅) +7 (= slot₂/S (call core.TypeVar :S %₆)) +8 slot₃/T +9 slot₂/S +10 (call core.svec %₈ %₉) +11 (call core.svec :v) +12 (call core.svec) +13 (call core._structtype TestMod :X %₁₀ %₁₁ %₁₂ false 1) +14 (= slot₄/X %₁₃) +15 (call core._setsuper! %₁₃ core.Any) +16 (call core.isdefinedglobal TestMod :X false) +17 (gotoifnot %₁₆ label₂₁) +18 TestMod.X +19 (= slot₅/if_val (call core._equiv_typedef %₁₈ %₁₃)) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₂₂ label₂₇) +24 TestMod.X +25 (= slot₆/if_val %₂₄) +26 (goto label₂₈) +27 (= slot₆/if_val false) +28 slot₆/if_val +29 (gotoifnot %₂₂ label₄₀) +30 TestMod.X +31 (call top.getproperty %₃₀ :body) +32 (call top.getproperty %₃₁ :body) +33 (call top.getproperty %₃₂ :parameters) +34 (call top.indexed_iterate %₃₃ 1) +35 (= slot₃/T (call core.getfield %₃₄ 1)) +36 (= slot₁/iterstate (call core.getfield %₃₄ 2)) +37 slot₁/iterstate +38 (call top.indexed_iterate %₃₃ 2 %₃₇) +39 (= slot₂/S (call core.getfield %₃₈ 1)) +40 TestMod.Vector +41 slot₂/S +42 (call core.apply_type %₄₀ %₄₁) +43 (call core.svec %₄₂) +44 (call core._typebody! %₂₈ %₁₃ %₄₃) +45 (call core.declare_const TestMod :X %₄₄) +46 latestworld +47 slot₃/T +48 slot₂/S +49 TestMod.X +50 slot₃/T +51 slot₂/S +52 (call core.apply_type %₄₉ %₅₀ %₅₁) +53 (call core.apply_type core.Type %₅₂) +54 (call core.UnionAll %₄₈ %₅₃) +55 (call core.UnionAll %₄₇ %₅₄) +56 (call core.svec %₅₅ core.Any) +57 (call core.svec) +58 SourceLocation::1:1 +59 (call core.svec %₅₆ %₅₇ %₅₈) +60 --- method core.nothing %₅₉ + slots: [slot₁/#ctor-self# slot₂/v slot₃/tmp] + 1 (call core.fieldtype slot₁/#ctor-self# 1) + 2 slot₂/v + 3 (= slot₃/tmp %₂) + 4 slot₃/tmp + 5 (call core.isa %₄ %₁) + 6 (gotoifnot %₅ label₈) + 7 (goto label₁₀) + 8 slot₃/tmp + 9 (= slot₃/tmp (call top.convert %₁ %₈)) + 10 slot₃/tmp + 11 (new slot₁/#ctor-self# %₁₀) + 12 (return %₁₁) +61 latestworld +62 TestMod.X +63 (call core.apply_type core.Type %₆₂) +64 TestMod.Vector +65 slot₂/S +66 (call core.apply_type %₆₄ %₆₅) +67 (call core.svec %₆₃ %₆₆) +68 slot₃/T +69 slot₂/S +70 (call core.svec %₆₈ %₆₉) +71 SourceLocation::1:1 +72 (call core.svec %₆₇ %₇₀ %₇₁) +73 --- method core.nothing %₇₂ + slots: [slot₁/#self#(!read) slot₂/v] + 1 TestMod.X + 2 static_parameter₁ + 3 static_parameter₂ + 4 (call core.apply_type %₁ %₂ %₃) + 5 (new %₄ slot₂/v) + 6 (return %₅) +74 latestworld +75 (return core.nothing) + +######################################## +# User defined inner constructors and helper functions for structs without type params +struct X + x + f() = new(1) + X() = f() # this X() captures `f` (in flisp, as a Box :-/ ) + X(x) = new(x) + X(y,z)::ReallyXIPromise = new(y+z) + """ + Docs for X constructor + """ + X(a,b,c) = new(a) +end +#--------------------- +1 (= slot₂/f (call core.Box)) +2 (call core.declare_global TestMod :X false) +3 latestworld +4 (call core.svec) +5 (call core.svec :x) +6 (call core.svec) +7 (call core._structtype TestMod :X %₄ %₅ %₆ false 1) +8 (= slot₁/X %₇) +9 (call core._setsuper! %₇ core.Any) +10 (call core.isdefinedglobal TestMod :X false) +11 (gotoifnot %₁₀ label₁₅) +12 TestMod.X +13 (= slot₄/if_val (call core._equiv_typedef %₁₂ %₇)) +14 (goto label₁₆) +15 (= slot₄/if_val false) +16 slot₄/if_val +17 (gotoifnot %₁₆ label₂₁) +18 TestMod.X +19 (= slot₅/if_val %₁₈) +20 (goto label₂₂) +21 (= slot₅/if_val false) +22 slot₅/if_val +23 (gotoifnot %₁₆ label₂₄) +24 (call core.svec core.Any) +25 (call core._typebody! %₂₂ %₇ %₂₄) +26 (call core.declare_const TestMod :X %₂₅) +27 latestworld +28 (call core.svec) +29 (call core.svec) +30 (call JuliaLowering.eval_closure_type TestMod :#f##0 %₂₈ %₂₉) +31 latestworld +32 TestMod.#f##0 +33 (new %₃₂) +34 slot₂/f +35 (call core.setfield! %₃₄ :contents %₃₃) +36 TestMod.#f##0 +37 (call core.svec %₃₆) +38 (call core.svec) +39 SourceLocation::3:5 +40 (call core.svec %₃₇ %₃₈ %₃₉) +41 --- method core.nothing %₄₀ + slots: [slot₁/#self#(!read)] + 1 TestMod.X + 2 (new %₁ 1) + 3 (return %₂) +42 latestworld +43 TestMod.X +44 (call core.apply_type core.Type %₄₃) +45 (call core.svec %₄₄) +46 (call core.svec) +47 SourceLocation::4:5 +48 (call core.svec %₄₅ %₄₆ %₄₇) +49 --- code_info + slots: [slot₁/#ctor-self#(!read) slot₂/f(!read)] + 1 (captured_local 1) + 2 (call core.isdefined %₁ :contents) + 3 (gotoifnot %₂ label₅) + 4 (goto label₇) + 5 (newvar slot₂/f) + 6 slot₂/f + 7 (call core.getfield %₁ :contents) + 8 (call %₇) + 9 (return %₈) +50 slot₂/f +51 (call core.svec %₅₀) +52 (call JuliaLowering.replace_captured_locals! %₄₉ %₅₁) +53 --- method core.nothing %₄₈ %₅₂ +54 latestworld +55 TestMod.X +56 (call core.apply_type core.Type %₅₅) +57 (call core.svec %₅₆ core.Any) +58 (call core.svec) +59 SourceLocation::5:5 +60 (call core.svec %₅₇ %₅₈ %₅₉) +61 --- method core.nothing %₆₀ + slots: [slot₁/#ctor-self# slot₂/x] + 1 slot₁/#ctor-self# + 2 (new %₁ slot₂/x) + 3 (return %₂) +62 latestworld +63 TestMod.X +64 (call core.apply_type core.Type %₆₃) +65 (call core.svec %₆₄ core.Any core.Any) +66 (call core.svec) +67 SourceLocation::6:5 +68 (call core.svec %₆₅ %₆₆ %₆₇) +69 --- method core.nothing %₆₈ + slots: [slot₁/#ctor-self# slot₂/y slot₃/z slot₄/tmp(!read)] + 1 TestMod.ReallyXIPromise + 2 slot₁/#ctor-self# + 3 TestMod.+ + 4 (call %₃ slot₂/y slot₃/z) + 5 (= slot₄/tmp (new %₂ %₄)) + 6 slot₄/tmp + 7 (call core.isa %₆ %₁) + 8 (gotoifnot %₇ label₁₀) + 9 (goto label₁₃) + 10 slot₄/tmp + 11 (call top.convert %₁ %₁₀) + 12 (= slot₄/tmp (call core.typeassert %₁₁ %₁)) + 13 slot₄/tmp + 14 (return %₁₃) +70 latestworld +71 TestMod.X +72 (call core.apply_type core.Type %₇₁) +73 (call core.svec %₇₂ core.Any core.Any core.Any) +74 (call core.svec) +75 SourceLocation::10:5 +76 (call core.svec %₇₃ %₇₄ %₇₅) +77 --- method core.nothing %₇₆ + slots: [slot₁/#ctor-self# slot₂/a slot₃/b(!read) slot₄/c(!read)] + 1 slot₁/#ctor-self# + 2 (new %₁ slot₂/a) + 3 (return %₂) +78 latestworld +79 TestMod.X +80 (call core.apply_type core.Type %₇₉) +81 (call JuliaLowering.bind_docs! %₈₀ "Docs for X constructor\n" %₇₆) +82 (return core.nothing) + +######################################## +# User defined inner constructors and helper functions for structs with type params +struct X{S,T} + x + X{A,B}() = new(1) + X{U,V}() where {U,V} = new(1) + f() = new{A,B}(1) +end +#--------------------- +1 (newvar slot₅/f) +2 (call core.declare_global TestMod :X false) +3 latestworld +4 (= slot₂/S (call core.TypeVar :S)) +5 (= slot₃/T (call core.TypeVar :T)) +6 slot₂/S +7 slot₃/T +8 (call core.svec %₆ %₇) +9 (call core.svec :x) +10 (call core.svec) +11 (call core._structtype TestMod :X %₈ %₉ %₁₀ false 1) +12 (= slot₄/X %₁₁) +13 (call core._setsuper! %₁₁ core.Any) +14 (call core.isdefinedglobal TestMod :X false) +15 (gotoifnot %₁₄ label₁₉) +16 TestMod.X +17 (= slot₈/if_val (call core._equiv_typedef %₁₆ %₁₁)) +18 (goto label₂₀) +19 (= slot₈/if_val false) +20 slot₈/if_val +21 (gotoifnot %₂₀ label₂₅) +22 TestMod.X +23 (= slot₉/if_val %₂₂) +24 (goto label₂₆) +25 (= slot₉/if_val false) +26 slot₉/if_val +27 (gotoifnot %₂₀ label₃₈) +28 TestMod.X +29 (call top.getproperty %₂₈ :body) +30 (call top.getproperty %₂₉ :body) +31 (call top.getproperty %₃₀ :parameters) +32 (call top.indexed_iterate %₃₁ 1) +33 (= slot₂/S (call core.getfield %₃₂ 1)) +34 (= slot₁/iterstate (call core.getfield %₃₂ 2)) +35 slot₁/iterstate +36 (call top.indexed_iterate %₃₁ 2 %₃₅) +37 (= slot₃/T (call core.getfield %₃₆ 1)) +38 (call core.svec core.Any) +39 (call core._typebody! %₂₆ %₁₁ %₃₈) +40 (call core.declare_const TestMod :X %₃₉) +41 latestworld +42 TestMod.X +43 TestMod.A +44 TestMod.B +45 (call core.apply_type %₄₂ %₄₃ %₄₄) +46 (call core.apply_type core.Type %₄₅) +47 (call core.svec %₄₆) +48 (call core.svec) +49 SourceLocation::3:5 +50 (call core.svec %₄₇ %₄₈ %₄₉) +51 --- method core.nothing %₅₀ + slots: [slot₁/#ctor-self#] + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +52 latestworld +53 (= slot₆/U (call core.TypeVar :U)) +54 (= slot₇/V (call core.TypeVar :V)) +55 TestMod.X +56 slot₆/U +57 slot₇/V +58 (call core.apply_type %₅₅ %₅₆ %₅₇) +59 (call core.apply_type core.Type %₅₈) +60 (call core.svec %₅₉) +61 slot₆/U +62 slot₇/V +63 (call core.svec %₆₁ %₆₂) +64 SourceLocation::4:5 +65 (call core.svec %₆₀ %₆₃ %₆₄) +66 --- method core.nothing %₆₅ + slots: [slot₁/#ctor-self#] + 1 slot₁/#ctor-self# + 2 (new %₁ 1) + 3 (return %₂) +67 latestworld +68 (call core.svec) +69 (call core.svec) +70 (call JuliaLowering.eval_closure_type TestMod :#f##1 %₆₈ %₆₉) +71 latestworld +72 TestMod.#f##1 +73 (new %₇₂) +74 (= slot₅/f %₇₃) +75 TestMod.#f##1 +76 (call core.svec %₇₅) +77 (call core.svec) +78 SourceLocation::5:5 +79 (call core.svec %₇₆ %₇₇ %₇₈) +80 --- method core.nothing %₇₉ + slots: [slot₁/#self#(!read)] + 1 TestMod.X + 2 TestMod.A + 3 TestMod.B + 4 (call core.apply_type %₁ %₂ %₃) + 5 (new %₄ 1) + 6 (return %₅) +81 latestworld +82 (return core.nothing) + +######################################## +# new() calls with splats; `Any` fields +struct X + x + y + X(xs) = new(xs...) +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (call core.svec) +4 (call core.svec :x :y) +5 (call core.svec) +6 (call core._structtype TestMod :X %₃ %₄ %₅ false 2) +7 (= slot₁/X %₆) +8 (call core._setsuper! %₆ core.Any) +9 (call core.isdefinedglobal TestMod :X false) +10 (gotoifnot %₉ label₁₄) +11 TestMod.X +12 (= slot₂/if_val (call core._equiv_typedef %₁₁ %₆)) +13 (goto label₁₅) +14 (= slot₂/if_val false) +15 slot₂/if_val +16 (gotoifnot %₁₅ label₂₀) +17 TestMod.X +18 (= slot₃/if_val %₁₇) +19 (goto label₂₁) +20 (= slot₃/if_val false) +21 slot₃/if_val +22 (gotoifnot %₁₅ label₂₃) +23 (call core.svec core.Any core.Any) +24 (call core._typebody! %₂₁ %₆ %₂₃) +25 (call core.declare_const TestMod :X %₂₄) +26 latestworld +27 TestMod.X +28 (call core.apply_type core.Type %₂₇) +29 (call core.svec %₂₈ core.Any) +30 (call core.svec) +31 SourceLocation::4:5 +32 (call core.svec %₂₉ %₃₀ %₃₁) +33 --- method core.nothing %₃₂ + slots: [slot₁/#ctor-self# slot₂/xs] + 1 slot₁/#ctor-self# + 2 (call core._apply_iterate top.iterate core.tuple slot₂/xs) + 3 (splatnew %₁ %₂) + 4 (return %₃) +34 latestworld +35 (return core.nothing) + +######################################## +# new() calls with splats; typed fields +struct X{T} + x::T + y::A + X{T}(xs) where {T} = new(xs...) +end +#--------------------- +1 (call core.declare_global TestMod :X false) +2 latestworld +3 (= slot₁/T (call core.TypeVar :T)) +4 slot₁/T +5 (call core.svec %₄) +6 (call core.svec :x :y) +7 (call core.svec) +8 (call core._structtype TestMod :X %₅ %₆ %₇ false 2) +9 (= slot₂/X %₈) +10 (call core._setsuper! %₈ core.Any) +11 (call core.isdefinedglobal TestMod :X false) +12 (gotoifnot %₁₁ label₁₆) +13 TestMod.X +14 (= slot₄/if_val (call core._equiv_typedef %₁₃ %₈)) +15 (goto label₁₇) +16 (= slot₄/if_val false) +17 slot₄/if_val +18 (gotoifnot %₁₇ label₂₂) +19 TestMod.X +20 (= slot₅/if_val %₁₉) +21 (goto label₂₃) +22 (= slot₅/if_val false) +23 slot₅/if_val +24 (gotoifnot %₁₇ label₃₀) +25 TestMod.X +26 (call top.getproperty %₂₅ :body) +27 (call top.getproperty %₂₆ :parameters) +28 (call top.indexed_iterate %₂₇ 1) +29 (= slot₁/T (call core.getfield %₂₈ 1)) +30 slot₁/T +31 TestMod.A +32 (call core.svec %₃₀ %₃₁) +33 (call core._typebody! %₂₃ %₈ %₃₂) +34 (call core.declare_const TestMod :X %₃₃) +35 latestworld +36 (= slot₃/T (call core.TypeVar :T)) +37 TestMod.X +38 slot₃/T +39 (call core.apply_type %₃₇ %₃₈) +40 (call core.apply_type core.Type %₃₉) +41 (call core.svec %₄₀ core.Any) +42 slot₃/T +43 (call core.svec %₄₂) +44 SourceLocation::4:5 +45 (call core.svec %₄₁ %₄₃ %₄₄) +46 --- method core.nothing %₄₅ + slots: [slot₁/#ctor-self# slot₂/xs slot₃/tmp slot₄/tmp] + 1 (call core._apply_iterate top.iterate core.tuple slot₂/xs) + 2 (call core.nfields %₁) + 3 (call top.ult_int %₂ 2) + 4 (gotoifnot %₃ label₇) + 5 (call top.ArgumentError "too few arguments in `new` (expected 2)") + 6 (call core.throw %₅) + 7 (call top.ult_int 2 %₂) + 8 (gotoifnot %₇ label₁₁) + 9 (call top.ArgumentError "too many arguments in `new` (expected 2)") + 10 (call core.throw %₉) + 11 slot₁/#ctor-self# + 12 (call core.fieldtype %₁₁ 1) + 13 (= slot₃/tmp (call core.getfield %₁ 1)) + 14 slot₃/tmp + 15 (call core.isa %₁₄ %₁₂) + 16 (gotoifnot %₁₅ label₁₈) + 17 (goto label₂₀) + 18 slot₃/tmp + 19 (= slot₃/tmp (call top.convert %₁₂ %₁₈)) + 20 slot₃/tmp + 21 (call core.fieldtype %₁₁ 2) + 22 (= slot₄/tmp (call core.getfield %₁ 2)) + 23 slot₄/tmp + 24 (call core.isa %₂₃ %₂₁) + 25 (gotoifnot %₂₄ label₂₇) + 26 (goto label₂₉) + 27 slot₄/tmp + 28 (= slot₄/tmp (call top.convert %₂₁ %₂₇)) + 29 slot₄/tmp + 30 (new %₁₁ %₂₀ %₂₉) + 31 (return %₃₀) +47 latestworld +48 (return core.nothing) + +######################################## +# Error: new doesn't accept keywords +struct X + X() = new(a=1) +end +#--------------------- +LoweringError: +struct X + X() = new(a=1) +# └─┘ ── `new` does not accept keyword arguments +end + +######################################## +# Error: new doesn't accept keywords (params block) +struct X + X() = new(; a=1) +end +#--------------------- +LoweringError: +struct X + X() = new(; a=1) +# └───┘ ── `new` does not accept keyword arguments +end + +######################################## +# Error: User defined inner constructors without enough type params +struct X{S,T} + X() = new{A}() +end +#--------------------- +LoweringError: +struct X{S,T} + X() = new{A}() +# └────┘ ── too few type parameters specified in `new{...}` +end + +######################################## +# Error: User defined inner constructors without enough type params +struct X{S,T} + X{A}() = new() +end +#--------------------- +LoweringError: +struct X{S,T} + X{A}() = new() +# └─┘ ── too few type parameters specified in `new` +end + +######################################## +# Error: User defined inner constructors with too many type params +struct X{S,T} + X() = new{A,B,C}() +end +#--------------------- +LoweringError: +struct X{S,T} + X() = new{A,B,C}() +# └────────┘ ── too many type parameters specified in `new{...}` +end + +######################################## +# Error: Struct not at top level +function f() + struct X + end +end +#--------------------- +LoweringError: +function f() +# ┌─────── + struct X + end +#─────┘ ── this syntax is only allowed in top level code +end diff --git a/JuliaLowering/test/utils.jl b/JuliaLowering/test/utils.jl new file mode 100644 index 0000000000000..3460ecdee8906 --- /dev/null +++ b/JuliaLowering/test/utils.jl @@ -0,0 +1,389 @@ +# Shared testing code which should be included before running individual test files. +using Test + +using JuliaLowering +using JuliaSyntax + +import FileWatching + +# The following are for docstrings testing. We need to load the REPL module +# here for `Base.@doc` lookup to work at all. Yes this does seem really, +# really, REALLY messed up. +using Markdown +import REPL + +using .JuliaSyntax: sourcetext, set_numeric_flags + +using .JuliaLowering: + SyntaxGraph, newnode!, ensure_attributes!, + Kind, SourceRef, SyntaxTree, NodeId, + makenode, makeleaf, setattr!, sethead!, + is_leaf, numchildren, children, + @ast, flattened_provenance, showprov, LoweringError, MacroExpansionError, + syntax_graph, Bindings, ScopeLayer, mapchildren + +function _ast_test_graph() + graph = SyntaxGraph() + ensure_attributes!(graph, + kind=Kind, syntax_flags=UInt16, + source=Union{SourceRef,NodeId,Tuple,LineNumberNode}, + var_id=Int, value=Any, name_val=String, is_toplevel_thunk=Bool, + toplevel_pure=Bool) +end + +function _source_node(graph, src) + id = newnode!(graph) + sethead!(graph, id, K"None") + setattr!(graph, id, source=src) + SyntaxTree(graph, id) +end + +macro ast_(tree) + # TODO: Implement this in terms of new-style macros. + quote + graph = _ast_test_graph() + srcref = _source_node(graph, $(QuoteNode(__source__))) + @ast graph srcref $tree + end +end + +function ≈(ex1, ex2) + if kind(ex1) != kind(ex2) || is_leaf(ex1) != is_leaf(ex2) + return false + end + if is_leaf(ex1) + return get(ex1, :value, nothing) == get(ex2, :value, nothing) && + get(ex1, :name_val, nothing) == get(ex2, :name_val, nothing) + else + if numchildren(ex1) != numchildren(ex2) + return false + end + return all(c1 ≈ c2 for (c1,c2) in zip(children(ex1), children(ex2))) + end +end + + +#------------------------------------------------------------------------------- +function _format_as_ast_macro(io, ex, indent) + k = kind(ex) + kind_str = repr(k) + if !is_leaf(ex) + println(io, indent, "[", kind_str) + ind2 = indent*" " + for c in children(ex) + _format_as_ast_macro(io, c, ind2) + end + println(io, indent, "]") + else + val_str = if k == K"Identifier" || k == K"core" || k == K"top" + repr(ex.name_val) + elseif k == K"BindingId" + repr(ex.var_id) + else + repr(get(ex, :value, nothing)) + end + println(io, indent, val_str, "::", kind_str) + end +end + +function format_as_ast_macro(io::IO, ex) + print(io, "@ast_ ") + _format_as_ast_macro(io, ex, "") +end + +""" + format_as_ast_macro(ex) + +Format AST `ex` as a Juila source code call to the `@ast_` macro for generating +test case comparisons with the `≈` function. +""" +format_as_ast_macro(ex) = format_as_ast_macro(stdout, ex) + +#------------------------------------------------------------------------------- + +# Test tools + +function desugar(mod::Module, src::String) + ex = parsestmt(SyntaxTree, src, filename="foo.jl") + ctx = JuliaLowering.DesugaringContext(syntax_graph(ex), Bindings(), ScopeLayer[], mod) + JuliaLowering.expand_forms_2(ctx, ex) +end + +function uncomment_description(desc) + replace(desc, r"^# ?"m=>"") +end + +function comment_description(desc) + replace(desc, r"^"m=>"# ") +end + +function match_ir_test_case(case_str) + m = match(r"(^#(?:.|\n)*?)^([^#](?:.|\n)*)"m, strip(case_str)) + if isnothing(m) + error("Malformatted IR test case:\n$(repr(case_str))") + end + description = uncomment_description(m[1]) + inout = split(m[2], r"#----*") + input, output = length(inout) == 2 ? inout : + length(inout) == 1 ? (inout[1], "") : + error("Too many sections in IR test case") + expect_error = startswith(description, "Error") + is_broken = startswith(description, "FIXME") + method_filter = begin + mf = match(r"\[method_filter: *(.*)\]", description) + isnothing(mf) ? nothing : strip(mf[1]) + end + (; expect_error=expect_error, is_broken=is_broken, + description=strip(description), + method_filter=method_filter, + input=strip(input), output=strip(output)) +end + +function read_ir_test_cases(filename) + str = read(filename, String) + parts = split(str, r"#\*+") + if length(parts) == 2 + preamble_str = strip(parts[1]) + cases_str = parts[2] + else + preamble_str = "" + cases_str = only(parts) + end + (preamble_str, + [match_ir_test_case(s) for s in split(cases_str, r"######*") if strip(s) != ""]) +end + +function setup_ir_test_module(preamble) + test_mod = Module(:TestMod) + Base.eval(test_mod, :(const JuliaLowering = $JuliaLowering)) + Base.eval(test_mod, :(const var"@ast_" = $(var"@ast_"))) + JuliaLowering.include_string(test_mod, preamble) + test_mod +end + +function format_ir_for_test(mod, case) + ex = parsestmt(SyntaxTree, case.input) + try + if kind(ex) == K"macrocall" && kind(ex[1]) == K"macro_name" && ex[1][1].name_val == "ast_" + # Total hack, until @ast_ can be implemented in terms of new-style + # macros. + ex = Base.eval(mod, Expr(ex)) + end + x = JuliaLowering.lower(mod, ex) + if case.expect_error + error("Expected a lowering error in test case \"$(case.description)\"") + end + ir = strip(sprint(JuliaLowering.print_ir, x, case.method_filter)) + return replace(ir, string(mod)=>"TestMod") + catch exc + if exc isa InterruptException + rethrow() + elseif case.expect_error && (exc isa LoweringError) + return sprint(io->Base.showerror(io, exc, show_detail=false)) + elseif case.expect_error && (exc isa MacroExpansionError) + return sprint(io->Base.showerror(io, exc)) + elseif case.is_broken + return sprint(io->Base.showerror(io, exc)) + else + throw("Error in test case \"$(case.description)\"") + end + end +end + +function test_ir_cases(filename::AbstractString) + preamble, cases = read_ir_test_cases(filename) + test_mod = setup_ir_test_module(preamble) + for case in cases + if case.is_broken + continue + end + output = format_ir_for_test(test_mod, case) + @testset "$(case.description)" begin + if output != case.output + # Do additional error dumping, as @test will not format errors in a nice way + @error "Test \"$(case.description)\" failed" output=Text(output) ref=Text(case.output) + end + @test output == case.output + end + end +end + +""" +Update all IR test cases in `filename` when the IR format has changed. + +When `pattern` is supplied, update only those tests where +`occursin(pattern, description)` is true. +""" +function refresh_ir_test_cases(filename, pattern=nothing) + preamble, cases = read_ir_test_cases(filename) + test_mod = setup_ir_test_module(preamble) + io = IOBuffer() + if !isempty(preamble) + println(io, preamble, "\n") + println(io, "#*******************************************************************************") + end + for case in cases + if isnothing(pattern) || occursin(pattern, case.description) + ir = format_ir_for_test(test_mod, case) + if rstrip(ir) != case.output + @info "Refreshing test case $(repr(case.description)) in $filename" + end + else + ir = case.output + end + println(io, + """ + ######################################## + $(comment_description(case.description)) + $(strip(case.input)) + #--------------------- + $ir + """ + ) + end + # Write only at the end to ensure we don't write rubbish if we crash! + write(filename, take!(io)) + nothing +end + +function refresh_all_ir_test_cases(test_dir=".") + foreach(refresh_ir_test_cases, filter(fn->endswith(fn, "ir.jl"), readdir(test_dir, join=true))) +end + +function watch_ir_tests(dir, delay=0.5) + dir = abspath(dir) + while true + (name, event) = FileWatching.watch_folder(dir) + if endswith(name, "_ir.jl") && (event.changed || event.renamed) + FileWatching.unwatch_folder(dir) + sleep(delay) + try + refresh_ir_test_cases(joinpath(dir, name)) + catch + @error "Error refreshing test case" exception=current_exceptions() + end + end + end +end + +function lower_str(mod::Module, s::AbstractString) + ex = parsestmt(JuliaLowering.SyntaxTree, s) + return JuliaLowering.to_lowered_expr(JuliaLowering.lower(mod, ex)) +end + +# See Julia Base tests in "test/docs.jl" +function docstrings_equal(d1, d2; debug=true) + io1 = IOBuffer() + io2 = IOBuffer() + show(io1, MIME"text/markdown"(), d1) + show(io2, MIME"text/markdown"(), d2) + s1 = String(take!(io1)) + s2 = String(take!(io2)) + if debug && s1 != s2 + print(s1) + println("--------------------------------------------------------------------------------") + print(s2) + println("================================================================================") + end + return s1 == s2 +end +docstrings_equal(d1::Docs.DocStr, d2) = docstrings_equal(Docs.parsedoc(d1), d2) + +#------------------------------------------------------------------------------- +# Tools for test case reduction + +function block_reduction_1(is_lowering_error::Function, orig_ex::ST, ex::ST, + curr_path = Int[]) where {ST <: SyntaxTree} + if !is_leaf(ex) + if kind(ex) == K"block" + for i in 1:numchildren(ex) + trial_ex = delete_block_child(orig_ex, orig_ex, curr_path, i) + if is_lowering_error(trial_ex) + # @info "Reduced expression" curr_path i + return trial_ex + end + end + end + for (i,e) in enumerate(children(ex)) + push!(curr_path, i) + res = block_reduction_1(is_lowering_error, orig_ex, e, curr_path) + if !isnothing(res) + return res + end + pop!(curr_path) + end + end + return nothing +end + +# Find children of all `K"block"`s in an expression and try deleting them while +# preserving the invariant `is_lowering_error(reduced) == true`. +function block_reduction(is_lowering_error, ex) + reduced = ex + was_reduced = false + while true + r = block_reduction_1(is_lowering_error, reduced, reduced) + if isnothing(r) + return (reduced, was_reduced) + end + reduced = r + was_reduced = true + end +end + +function delete_block_child(ctx, ex, block_path, child_idx, depth=1) + if depth > length(block_path) + cs = copy(children(ex)) + deleteat!(cs, child_idx) + @ast ctx ex [ex cs...] + else + j = block_path[depth] + mapchildren(ctx, ex, j:j) do e + delete_block_child(ctx, e, block_path, child_idx, depth+1) + end + end +end + +function throws_lowering_exc(mod, ex) + try + debug_lower(mod, ex) + return false + catch exc + if exc isa LoweringError + return true + else + rethrow() + end + end +end + +# Parse a file and lower the top level expression one child at a time, finding +# any top level statement that fails lowering and producing a partially reduced +# test case. +function reduce_any_failing_toplevel(mod::Module, filename::AbstractString; do_eval::Bool=false) + text = read(filename, String) + ex0 = parseall(SyntaxTree, text; filename) + for ex in children(ex0) + try + ex_compiled = JuliaLowering.lower(mod, ex) + ex_expr = JuliaLowering.to_lowered_expr(ex_compiled) + if do_eval + Base.eval(mod, ex_expr) + end + catch exc + @error "Failure lowering code" ex + if !(exc isa LoweringError) + rethrow() + end + (reduced,was_reduced) = block_reduction(e->throws_lowering_exc(mod,e), ex) + if !was_reduced + @info "No reduction possible" + return ex + else + @info "Reduced code" reduced + return reduced + end + end + end + nothing +end diff --git a/JuliaSyntax/.github/workflows/CI.yml b/JuliaSyntax/.github/workflows/CI.yml new file mode 100644 index 0000000000000..b6ceedb5e4f63 --- /dev/null +++ b/JuliaSyntax/.github/workflows/CI.yml @@ -0,0 +1,121 @@ +name: CI +on: + push: + branches: + - main + - release-* + tags: '*' + pull_request: +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.0' + - '1.1' + - '1.2' + - '1.3' + - '1.4' + - '1.5' + - '1.6' + - '1.7' + - '1.8' + - '1.9' + - '1' + - 'pre' + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x86 + - x64 + exclude: + # Test all OS's and arch possibilities on + # - 1.0 + # - 1.6 + # - 1 + # - pre + # - nightly + # but remove some configurations from the build matrix to reduce CI time. + # See https://github.com/marketplace/actions/setup-julia-environment + - {os: 'macOS-latest', version: '1.1'} + - {os: 'macOS-latest', version: '1.2'} + - {os: 'macOS-latest', version: '1.3'} + - {os: 'macOS-latest', version: '1.4'} + - {os: 'macOS-latest', version: '1.5'} + - {os: 'macOS-latest', version: '1.7'} + - {os: 'macOS-latest', version: '1.8'} + - {os: 'macOS-latest', version: '1.9'} + # MacOS not available on x86 + - {os: 'macOS-latest', arch: 'x86'} + - {os: 'windows-latest', version: '1.1'} + - {os: 'windows-latest', version: '1.2'} + - {os: 'windows-latest', version: '1.3'} + - {os: 'windows-latest', version: '1.4'} + - {os: 'windows-latest', version: '1.5'} + - {os: 'windows-latest', version: '1.7'} + - {os: 'windows-latest', version: '1.8'} + - {os: 'windows-latest', version: '1.9'} + - {os: 'ubuntu-latest', version: '1.1', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.2', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.3', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.4', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.5', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.7', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.8', arch: 'x86'} + - {os: 'ubuntu-latest', version: '1.9', arch: 'x86'} + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@latest + - uses: codecov/codecov-action@v3 + with: + file: lcov.info + test_sysimage: + name: JuliaSyntax sysimage build - ${{ github.event_name }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v2 + with: + version: 1.6 + arch: x64 + - uses: actions/cache@v4 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - run: julia sysimage/compile.jl + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: '1.10' + - run: julia --project=docs -e ' + using Pkg; + Pkg.develop(PackageSpec(; path=pwd())); + Pkg.instantiate();' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/JuliaSyntax/.github/workflows/CompatHelper.yml b/JuliaSyntax/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000000000..cba9134c670f0 --- /dev/null +++ b/JuliaSyntax/.github/workflows/CompatHelper.yml @@ -0,0 +1,16 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/JuliaSyntax/.github/workflows/TagBot.yml b/JuliaSyntax/.github/workflows/TagBot.yml new file mode 100644 index 0000000000000..f49313b662013 --- /dev/null +++ b/JuliaSyntax/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/JuliaSyntax/.gitignore b/JuliaSyntax/.gitignore new file mode 100644 index 0000000000000..8a934c09e33c3 --- /dev/null +++ b/JuliaSyntax/.gitignore @@ -0,0 +1,7 @@ +/Manifest.toml +/tools/pkgs +/tools/pkg_tars +/tools/logs.txt +/docs/build +*.cov +/.vscode/settings.json diff --git a/JuliaSyntax/.mailmap b/JuliaSyntax/.mailmap new file mode 100644 index 0000000000000..a16a55c644b31 --- /dev/null +++ b/JuliaSyntax/.mailmap @@ -0,0 +1,2 @@ +Claire Foster +Claire Foster diff --git a/JuliaSyntax/LICENSE.md b/JuliaSyntax/LICENSE.md new file mode 100644 index 0000000000000..7efd19088a06f --- /dev/null +++ b/JuliaSyntax/LICENSE.md @@ -0,0 +1,45 @@ +The JuliaSyntax.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2021 Julia Computing and contributors +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. + +The code in src/tokenize*.jl and test/tokenize.jl is derived from the Tokenize.jl +package and is also licensed under the MIT "Expat" License: + +> Copyright (c) 2016: Kristoffer Carlsson. +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> diff --git a/JuliaSyntax/Project.toml b/JuliaSyntax/Project.toml new file mode 100644 index 0000000000000..6ab84fdfb4733 --- /dev/null +++ b/JuliaSyntax/Project.toml @@ -0,0 +1,16 @@ +name = "JuliaSyntax" +uuid = "70703baa-626e-46a2-a12c-08ffd08c73b4" +authors = ["Claire Foster and contributors"] +version = "2.0.0-DEV" + +[compat] +Serialization = "1.0" +julia = "1.0" + +[extras] +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test", "Serialization", "Logging"] diff --git a/JuliaSyntax/README.md b/JuliaSyntax/README.md new file mode 100644 index 0000000000000..ae9b2b9760b36 --- /dev/null +++ b/JuliaSyntax/README.md @@ -0,0 +1,46 @@ +# JuliaSyntax + +[![Build Status](https://github.com/c42f/JuliaSyntax.jl/workflows/CI/badge.svg)](https://github.com/c42f/JuliaSyntax.jl/actions) +[![codecov.io](https://codecov.io/github/JuliaLang/JuliaSyntax.jl/coverage.svg?branch=main)](https://codecov.io/github/JuliaLang/JuliaSyntax.jl?branch=main) + +A Julia compiler frontend, written in Julia. + +Read the [documentation](https://JuliaLang.github.io/JuliaSyntax.jl/dev) for +more information. + +### Status + +JuliaSyntax.jl is used as the new default Julia parser in Julia 1.10. +It's highly compatible with Julia's older +[femtolisp-based parser](https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm) - +It parses all of Base, the standard libraries and General registry. Some minor +difference remain where we've decided to fix bugs or strange behaviors in the +reference parser. + +The AST and tree data structures are usable but their APIs will evolve as we +try out various use cases. Parsing to the standard `Expr` AST is always +possible and will be stable. + +The intention is to extend this library over time to cover more of the Julia +compiler frontend. + +# Getting involved + +For people who want to help improve Julia's error messages by contributing to +JuliaSyntax, I'd suggest looking through the issue list at +https://github.com/JuliaLang/JuliaSyntax.jl/issues and choosing a small issue +or two to work on to familiarize yourself with the code. Anything marked with +the labels `intro issue` or `bug` might be a good place to start. + +Also watching the [2022 JuliaCon talk](https://www.youtube.com/watch?v=CIiGng9Brrk) +and reading the [design](https://julialang.github.io/JuliaSyntax.jl/dev/design/) and +[reference](https://julialang.github.io/JuliaSyntax.jl/dev/reference/) +documentation should be good for an overview. + +As of May 2023, we've got really good positional tracking within the source, +but JuliaSyntax really needs a better system for parser recovery before the +errors are really nice. This requires some research. For example, you could +read up on how rust-analyzer does recovery, or rslint - both these are +event-based recursive decent parsers with similar structure to JuliaSyntax +(though in Rust). I also want to investigate whether we can do data-driven +parser recovery using an ML technique. But again, this is a research project. diff --git a/JuliaSyntax/docs/Manifest.toml b/JuliaSyntax/docs/Manifest.toml new file mode 100644 index 0000000000000..ce4d6bed870e8 --- /dev/null +++ b/JuliaSyntax/docs/Manifest.toml @@ -0,0 +1,254 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.0" +manifest_format = "2.0" +project_hash = "46b5b82f24e4b5d97afc2843032730b022086b31" + +[[deps.ANSIColoredPrinters]] +git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" +uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" +version = "0.0.1" + +[[deps.AbstractTrees]] +git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.4" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Documenter]] +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"] +git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac" +uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +version = "1.2.1" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.5.0+0" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.0" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.42.0+0" + +[[deps.IOCapture]] +deps = ["Logging", "Random"] +git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.2.3" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.2.2" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.4.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+0" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MarkdownAST]] +deps = ["AbstractTrees", "Markdown"] +git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" +uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" +version = "0.1.2" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+1" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.1.10" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.12+0" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.1" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.10.0" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.0" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.1" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.52.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/JuliaSyntax/docs/Project.toml b/JuliaSyntax/docs/Project.toml new file mode 100644 index 0000000000000..1814eb3304f3c --- /dev/null +++ b/JuliaSyntax/docs/Project.toml @@ -0,0 +1,5 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "1" diff --git a/JuliaSyntax/docs/make.jl b/JuliaSyntax/docs/make.jl new file mode 100644 index 0000000000000..5c3a094ba8691 --- /dev/null +++ b/JuliaSyntax/docs/make.jl @@ -0,0 +1,26 @@ +using Documenter, JuliaSyntax + +makedocs(; + modules=[JuliaSyntax], + format=Documenter.HTML( + repolink="https://github.com/JuliaLang/JuliaSyntax.jl" + ), + pages=[ + "Overview" => "index.md" + "How To" => "howto.md" + "Reference" => [ + "reference.md" + "api.md" + ] + "Design Discussion" => "design.md" + ], + repo="https://github.com/JuliaLang/JuliaSyntax.jl/blob/{commit}{path}#L{line}", + sitename="JuliaSyntax.jl", + authors = "Claire Foster and contributors: https://github.com/JuliaLang/JuliaSyntax.jl/graphs/contributors", + warnonly = true +) + +deploydocs(; + repo="github.com/JuliaLang/JuliaSyntax.jl", + push_preview=true +) diff --git a/JuliaSyntax/docs/src/api.md b/JuliaSyntax/docs/src/api.md new file mode 100644 index 0000000000000..5dfbec6e4fcc3 --- /dev/null +++ b/JuliaSyntax/docs/src/api.md @@ -0,0 +1,170 @@ +# API Reference + +## Parsing + +```@docs +JuliaSyntax.parsestmt +JuliaSyntax.parseall +JuliaSyntax.parseatom +``` + +### Low level parsing API + +The `ParseStream` interface which provides a low-level stream-like I/O +interface for writing the parser. The parser does not depend on or produce any +concrete tree data structure as part of the parsing phase but the output spans +can be post-processed into various tree data structures as required using +[`JuliaSyntax.build_tree`](@ref). + +```@docs +JuliaSyntax.parse! +JuliaSyntax.ParseStream +JuliaSyntax.build_tree +``` + +## Tokenization + +```@docs +JuliaSyntax.tokenize +JuliaSyntax.untokenize +JuliaSyntax.Token +``` + +## Source code handling + +This section describes the generic functions for source text, source location +computation and formatting functions. + +Contiguous syntax objects like nodes in the syntax tree should implement the +following where possible: + +```@docs +JuliaSyntax.sourcefile +JuliaSyntax.byte_range +``` + +This will provide implementations of the following which include range +information, line numbers, and fancy highlighting of source ranges: + +```@docs +JuliaSyntax.first_byte +JuliaSyntax.last_byte +JuliaSyntax.filename +JuliaSyntax.source_line +JuliaSyntax.source_location +JuliaSyntax.char_range +JuliaSyntax.sourcetext +JuliaSyntax.highlight +``` + +`SourceFile`-specific functions: + +```@docs +JuliaSyntax.SourceFile +JuliaSyntax.source_line_range +``` + +## Expression predicates, kinds and flags + +Expressions are tagged with a kind - like a type, but represented as an integer +tag rather than a full Julia type for efficiency. (Very like the tag of a "sum +type".) `Kind`s are constructed with the `@K_str` macro. + +```@docs +JuliaSyntax.@K_str +JuliaSyntax.Kind +``` + +The kind of an expression `ex` in a tree should be accessed with `kind(ex)` + +```@docs +JuliaSyntax.kind +``` + +In addition to the `kind`, a small integer set of "flags" is included to +further distinguish details of each expression, accessed with the `flags` +function. The kind and flags can be wrapped into a `SyntaxHead` which is +accessed with the `head` function. + +```@docs +JuliaSyntax.flags +JuliaSyntax.SyntaxHead +JuliaSyntax.head +``` + +Details about the flags may be extracted using various predicates: + +```@docs +JuliaSyntax.is_trivia +JuliaSyntax.is_prefix_call +JuliaSyntax.is_infix_op_call +JuliaSyntax.is_prefix_op_call +JuliaSyntax.is_postfix_op_call +JuliaSyntax.is_dotted +JuliaSyntax.is_suffixed +JuliaSyntax.is_decorated +JuliaSyntax.numeric_flags +``` + +Some of the more unusual predicates are accessed merely with `has_flags(x, +flag_bits)`, where any of the following uppercase constants may be used for +`flag_bits` after checking that the `kind` is correct. + +```@docs +JuliaSyntax.has_flags +JuliaSyntax.TRIPLE_STRING_FLAG +JuliaSyntax.RAW_STRING_FLAG +JuliaSyntax.PARENS_FLAG +JuliaSyntax.TRAILING_COMMA_FLAG +JuliaSyntax.COLON_QUOTE +JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG +JuliaSyntax.MUTABLE_FLAG +JuliaSyntax.BARE_MODULE_FLAG +JuliaSyntax.SHORT_FORM_FUNCTION_FLAG +``` + +## Syntax trees + +Access to the children of a tree node is provided by the functions + +```@docs +JuliaSyntax.is_leaf +JuliaSyntax.numchildren +JuliaSyntax.children +``` + +For convenient access to the children, we also provide `node[i]`, `node[i:j]` +and `node[begin:end]` by implementing `Base.getindex()`, `Base.firstindex()` and +`Base.lastindex()`. We choose to return a view from `node[i:j]` to make it +non-allocating. + +Tree traversal is supported by using these functions along with the predicates +such as [`kind`](@ref) listed above. + +### Trees referencing the source + +```@docs +JuliaSyntax.SyntaxNode +``` + +Functions applicable to `SyntaxNode` include everything in the sections on +heads/kinds as well as the accessor functions in the source code handling +section. + +### Relocatable syntax trees + +[`GreenNode`](@ref) is a special low level syntax tree: it's "relocatable" in +the sense that it doesn't carry an absolute position in the source code or even +a reference to the source text. This allows it to be reused for incremental +parsing, but does make it a pain to work with directly! + +```@docs +JuliaSyntax.GreenNode +``` + +Green nodes only have a relative position so implement `span()` instead of +`byte_range()`: + +```@docs +JuliaSyntax.span +``` diff --git a/JuliaSyntax/docs/src/design.md b/JuliaSyntax/docs/src/design.md new file mode 100644 index 0000000000000..a11d1b64140ed --- /dev/null +++ b/JuliaSyntax/docs/src/design.md @@ -0,0 +1,850 @@ +# Design discussion and developer documentation + +## Goals + +* Lossless parsing of Julia code with precise source mapping +* Production quality error recovery, reporting and unit testing +* Parser structure similar to Julia's flisp-based parser +* Speedy enough for interactive editing +* "Compilation as an API" to support all sorts of tooling +* Grow to encompass the rest of the compiler frontend: macro expansion, + desugaring and other lowering steps. +* Replace Julia's flisp-based reference frontend + +## Design Opinions + +* Parser implementation should be independent from tree data structures. So + we have the `ParseStream` interface. +* Tree data structures should be *layered* to balance losslessness with + abstraction and generality. So we have `SyntaxNode` (an AST) layered on top + of `GreenNode` (a lossless parse tree). We might need other tree types later. +* Fancy parser generators still seem marginal for production compilers. We use + a boring but flexible recursive descent parser. + +# Parser implementation + +Our goal is to losslessly represent the source text with a tree; this may be +called a "lossless syntax tree". (This is sometimes called a "concrete syntax +tree", but that term has also been used for the parse tree of the full formal +grammar for a language including any grammar hacks required to solve +ambiguities, etc. So we avoid this term.) + +`JuliaSyntax` uses a mostly recursive descent parser which closely +follows the high level structure of the flisp reference parser. This makes the +code familiar and reduces porting bugs. It also gives a lot of flexibility for +designing the diagnostics, tree data structures, compatibility with different +Julia versions, etc. I didn't choose a parser generator as they still seem +marginal for production compilers — for the parsing itself they don't seem +*greatly* more expressive and they can be less flexible for the important +"auxiliary" code which needs to be written in either case. + +### Lexing + +We use a hand-written lexer (a heavily modified version of +[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl)) +* Newline-containing whitespace is emitted as a separate kind +* Tokens inside string interpolations are emitted separately from the string +* Strings delimiters are separate tokens and the actual string always has the + `String` kind +* Additional contextual keywords (`as`, `var`, `doc`) have been added and + moved to a subcategory of keywords. +* Nonterminal kinds were added (though these should probably be factored out again) +* Various bugs fixed and additions for newer Julia versions + +### Parsing with ParseStream + +The main parser innovation is the `ParseStream` interface which provides a +stream-like I/O interface for writing the parser. The parser does not +depend on or produce any concrete tree data structure as part of the parsing +phase but the output nodes can be post-processed into various tree data +structures as required. This is like the design of rust-analyzer though with a +simpler implementation. + +Parsing proceeds by recursive descent; + +* The parser consumes a flat list of lexed tokens as *input* using `peek()` to + examine tokens and `bump()` to consume them. +* The parser produces a flat list of `RawGreenNode`s as *output* using `bump()` to + transfer tokens to the output and `position()`/`emit()` for nonterminal nodes. +* Diagnostics are emitted as separate text spans +* Whitespace and comments are automatically `bump()`ed and don't need to be + handled explicitly. The exception is syntactically relevant newlines in space + sensitive mode. +* Parser modes are passed down the call tree using `ParseState`. + +The output nodes track the byte range, a syntax "kind" stored as an integer +tag, and some flags. Each node also stores either the number of child nodes +(for non-terminals) or the original token kind (for terminals). The kind tag +makes the nodes a [sum type](https://blog.waleedkhan.name/union-vs-sum-types/) +but where the type is tracked explicitly outside of Julia's type system. + +For lossless parsing the output nodes must cover the entire input text. Using +`bump()`, `position()` and `emit()` in a natural way also ensures that: +* Nodes are cleanly nested with children contained entirely within their parents +* Sibling nodes are emitted in source order +* Parent nodes are emitted after all their children. + +These properties make the output nodes a post-order traversal of a +["green tree"](#raw-syntax-tree--green-tree) +in the terminology of C#'s Roslyn compiler, with the tree structure +implicit in the node spans. + +### Tree construction + +The `build_tree` function uses the implicit tree structure in the `ParseStream` +output to assemble concrete tree data structures. Since the output is already +a post-order traversal of `RawGreenNode`s with node spans encoding parent-child +relationships, tree construction is straightforward. We build on top of this to +define `build_tree` for various tree types including `GreenNode`, the AST type +`SyntaxNode`, and for normal Julia `Expr`. + +### Error recovery + +The goal of the parser is to produce well-formed hierarchical structure from +the source text. For interactive tools we need this to work even when the +source text contains errors; it's the job of the parser to include the recovery +heuristics to make this work. + +Concretely, the parser in `JuliaSyntax` should always produce a green tree +which is *well formed* in the sense that `GreenNode`s of a given `Kind` have +well-defined layout of children. This means the `GreenNode` to `SyntaxNode` +transformation is deterministic and tools can assume they're working with a +"mostly valid" AST. + +What does "mostly valid" mean? We allow the tree to contain the following types +of error nodes: + +* Missing tokens or nodes may be **added** as placeholders when they're needed + to complete a piece of syntax. For example, we could parse `a + (b *` as + `(call-i a + (call-i * b XXX))` where `XXX` is a placeholder error node. +* A sequence of unexpected tokens may be **removed** by collecting + them as children of an error node and treating them as syntax trivia during + AST construction. For example, `a + b end * c` could be parsed as the green + tree `(call-i a + b (error-t end * c))`, and turned into the AST `(call + a b)`. + +We want to encode both these cases in a way which is simplest for downstream +tools to use. This is an open question, but for now we use `K"error"` as the +kind, with the `TRIVIA_FLAG` set for unexpected syntax. + +# Syntax trees + +Julia's `Expr` abstract syntax tree can't store precise source locations or +deal with syntax trivia like whitespace or comments. So we need some new tree +types in `JuliaSyntax`. + +JuliaSyntax currently deals in three types of trees: +* `GreenNode` is a minimal *lossless syntax tree* where + - Nodes store a kind and length in bytes, but no text + - Syntax trivia are included in the list of children + - Children are strictly in source order +* `SyntaxNode` is an *abstract syntax tree* which has + - An absolute position and pointer to the source text + - Children strictly in source order + - Leaf nodes store values, not text + - Trivia are ignored, but there is a 1:1 mapping of non-trivia nodes to the + associated `GreenTree` nodes. +* `Expr` is used as a conversion target for compatibility + +## More about syntax kinds + +We generally track the type of syntax nodes with a syntax "kind", stored +explicitly in each node an integer tag. This effectively makes the node type a +[sum type](https://blog.waleedkhan.name/union-vs-sum-types/) in the type system +sense, but with the type tracked explicitly outside of Julia's type system. + +Managing the type explicitly brings a few benefits: +* Code and data structures for manipulating syntax nodes is always concretely + typed from the point of view of the compiler. +* We control the data layout and can pack the kind into very few bits along + with other flags bits, as desired. +* Predicates such as `is_operator` can be extremely efficient, given that we + know the meaning of the kind's bits. +* The kind can be applied to several different tree data structures, or + manipulated by itself. +* Pattern matching code is efficient when the full set of kinds is closed and + known during compilation. + +There's arguably a few downsides: +* Normal Julia dispatch can't express dispatch over syntax kind. Luckily, + a pattern matching macro can provide a very elegant way of expressing such + algorithms over a non-extensible set of kinds, so this is not a big problem. +* Different node kinds could come with different data fields, but a syntax + tree must have generic fields to cater for all kinds. (Consider as an analogy + the normal Julia AST `QuoteNode` with a single field vs `Expr` with generic + `head` and `args` fields.) This could be a disadvantage for code which + processes one specific kind but for generic code processing many kinds + having a generic but *concrete* data layout should be faster. + +# Differences from the flisp parser + +_See also the [§ Comparisons to other packages](#comparisons-to-other-packages) section._ + +Practically the flisp parser is not quite a classic [recursive descent +parser](https://en.wikipedia.org/wiki/Recursive_descent_parser), because it +often looks back and modifies the output tree it has already produced. We've +tried to eliminate this pattern in favor of lookahead where possible because + +* It works poorly when the parser is emitting a stream of node spans with + strict source ordering constraints. +* It's confusing to reason about this kind of code + +However, on occasion it seems to solve genuine ambiguities where Julia code +can't be parsed top-down with finite lookahead. Eg for the `kw` vs `=` +ambiguity within parentheses. In these cases we put up with using the +functions `look_behind` and `reset_node!()`. + +## Code structure + +Large structural changes were generally avoided while porting. In particular, +nearly all function names for parsing productions are the same with `-` +replaced by `_` and predicates prefixed by `is_`. + +Some notable differences: + +* `parse-arglist` and a parts of `parse-paren-` have been combined into a + general function `parse_brackets`. This function deals with all the odd + corner cases of how the AST is emitted when mixing `,` and `;` within + parentheses. In particular regard to: + - Determining whether `;` are block syntax separators or keyword parameters + - Determining whether to emit `parameter` sections based on context + - Emitting key-value pairs either as `kw` or `=` depending on context +* The way that `parse-resword` is entered has been rearranged to avoid parsing + reserved words with `parse-atom` inside `parse-unary-prefix`. Instead, we + detect reserved words and enter `parse_resword` earlier. + +## Flisp parser bugs + +Here's some behaviors which seem to be bugs. (Some of these we replicate in the +name of compatibility, perhaps with a warning.) + +* Macro module paths allow calls which gives weird stateful semantics! + ```julia + b() = rand() > 0.5 ? Base : Core + b().@info "hi" + ``` +* Misplaced `@` in macro module paths like `A.@B.x` is parsed as odd + broken-looking AST like `(macrocall (. A (quote (. B @x))))`. It should + probably be rejected. +* Operator prefix call syntax doesn't work in the cases like `+(a;b,c)` where + keyword parameters are separated by commas. A tuple is produced instead. +* `const` and `global` allow chained assignment, but the right hand side is not + constant. `a` const here but not `b`. + ```julia + const a = b = 1 + ``` +* Parsing the `ncat` array concatenation syntax within braces gives + strange AST: `{a ;; b}` parses to `(bracescat 2 a b)` which is the same as + `{2 ; a ; b}`, but should probably be `(bracescat (nrow 2 a b))` in analogy + to how `{a b}` produces `(bracescat (row a b))`. +* `export a, \n $b` is rejected, but `export a, \n b` parses fine. +* In try-catch-finally, the `finally` clause is allowed before the `catch`, but + always executes afterward. (Presumably was this a mistake? It seems pretty awful!) +* When parsing `"[x \n\n ]"` the flisp parser gets confused, but `"[x \n ]"` is + correctly parsed as `Expr(:vect)` (maybe fixed in 1.7?) +* `f(x for x in in xs)` is accepted, and parsed very strangely. +* Octal escape sequences saturate rather than being reported as errors. Eg, + `"\777"` results in `"\xff"`. This is inconsistent with + `Base.parse(::Type{Int}, ...)` +* Leading dots in import paths with operator-named modules are parsed into + dotted operators rather than a relative path. Ie, we have `import .⋆` parsing + to `(import (. .⋆))` whereas it should be `(import (. . ⋆))` for consistency + with the parsing of `import .A`. +* Looking back on the output disregards grouping parentheses which can lead to + odd results in some cases. For example, `f(((((x=1)))))` parses as a keyword + call to function `f` with the keyword `x=1`, but arguably it should be an + assignment. +* Hexfloat literals can have a trailing `f` for example, `0x1p1f` + but this doesn't do anything. In the `flisp` C code such cases are treated as + Float32 literals and this was intentional https://github.com/JuliaLang/julia/pull/2925 + but this has never been officially supported in Julia. It seems this bug + arises from `(set! pred char-hex?)` in `parse-number` accepting hex exponent + digits, all of which are detected as invalid except for a trailing `f` when + processed by `isnumtok_base`. +* `begin` and `end` are not parsed as keywords when indexing. Typed comprehensions + initially look the same, but can be distinguished from indexing once we handle + a `for` token; it is safe to treat `begin` and `end` as keywords afterwards. The + reference parser *only* handles this well when there's a newline before `for`: + ```julia + Any[foo(i) + for i in x if begin + true + end + ] + ``` + works, while + ```julia + Any[foo(i) for i in x if begin + true + end + ] + ``` + does not. JuliaSyntax handles both cases. + +## Parsing / AST oddities and warts + +### Questionable allowed forms + +There's various allowed syntaxes which are fairly easily detected in the +parser, but which will be rejected later during lowering. To allow building +DSLs this is fine and good but some such allowed syntaxes don't seem very +useful, even for DSLs: + +* `macro (x) end` is allowed but there are no anonymous macros. +* `abstract type A < B end` and other subtype comparisons are allowed, but + only `A <: B` makes sense. +* `x where {S T}` produces `(where x (bracescat (row S T)))`. This seems pretty weird! +* `[x for outer x in xs]` parses, but `outer` makes no real sense in this + context (and using this form is a lowering error) + +### `kw` and `=` inconsistencies + +There's many apparent inconsistencies between how `kw` and `=` are used when +parsing `key=val` pairs inside parentheses. + +* Inconsistent parsing of tuple keyword args inside vs outside of dot calls + ```julia + (a=1,) # (tuple (= a 1)) + f.(a=1) # (tuple (kw a 1)) + ``` +* Mixtures of `,` and `;` in calls give nested parameter AST which parses + strangely, and is kind-of-horrible to use. + ```julia + # (tuple (parameters (parameters e f) c d) a b) + (a,b; c,d; e,f) + ``` +* Long-form anonymous functions have argument lists which are parsed + as tuples (or blocks!) rather than argument lists and this mess appears to be + papered over as part of lowering. For example, in `function (a;b) end` the + `(a;b)` is parsed as a block! This leads to more inconsistency in the use of + `kw` for keywords. + + +### Other oddities + +* Operators with suffixes don't seem to always be parsed consistently as the + same operator without a suffix. Unclear whether this is by design or mistake. + For example, `[x +y] ==> (hcat x (+ y))`, but `[x +₁y] ==> (hcat (call +₁ x y))` + +* `global const x=1` is normalized by the parser into `(const (global (= x 1)))`. + I suppose this is somewhat useful for AST consumers, but reversing the source + order is pretty weird and inconvenient when moving to a lossless parser. + +* `let` bindings might be stored in a block, or they might not be, depending on + special cases: + ```julia + # Special cases not in a block + let x=1 ; end # ==> (let (= x 1) (block)) + let x::1 ; end # ==> (let (:: x 1) (block)) + let x ; end # ==> (let x (block)) + + # In a block + let x=1,y=2 ; end # ==> (let (block (= x 1) (= y 2) (block))) + let x+=1 ; end # ==> (let (block (+= x 1)) (block)) + ``` + +* The `elseif` condition is always in a block but not the `if` condition. + Presumably because of the need to add a line number node in the flisp parser + `if a xx elseif b yy end ==> (if a (block xx) (elseif (block b) (block yy)))` + +* Spaces are allowed between import dots — `import . .A` is allowed, and + parsed the same as `import ..A` + +* `import A..` produces `(import (. A .))` which is arguably nonsensical, as `.` + can't be a normal identifier. + +* The raw string escaping rules are *super* confusing for backslashes near + the end of the string: `raw"\\\\ "` contains four backslashes, whereas + `raw"\\\\"` contains only two. However this was an intentional feature to + allow all strings to be represented and it's unclear whether the situation + can be improved. + +* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and + `@S {a b}` parse. Conversely, `@S[a b]` parses. + +* Macro names and invocations are post-processed from the output of + `parse-atom` / `parse-call`, which leads to some surprising and questionable + constructs which "work": + - Absurdities like `@(((((a))))) x ==> (macrocall @a x)` + - Infix macros!? `@(x + y) ==> (macrocall @+ x y)` (ok, kinda cute and has + some weird logic to it... but what?) + - Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)` + +* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`) + seems like unnecessary variation in syntax. It makes parsing valid macro + module paths more complex and leads to oddities like `@$.x y ==> (macrocall + ($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out + to be the module name after the `.` is parsed. But `$` can never be a valid + module name in normal Julia code so this makes no sense. + +* Triple quoted `var"""##"""` identifiers are allowed. But it's not clear these + are required or desired given that they come with the complex triple-quoted + string deindentation rules. + +* Deindentation of triple quoted strings with mismatched whitespace is weird + when there's nothing but whitespace. For example, we have + `"\"\"\"\n \n \n \"\"\"" ==> "\n \n"` so the middle line of whitespace + here isn't dedented but the other two longer lines are?? Here it seems more + consistent that either (a) the middle line should be deindented completely, + or (b) all lines should be dedented only one character, as that's the + matching prefix. + +* Parsing of anonymous function arguments is somewhat inconsistent. + `function (xs...) \n body end` parses the argument list as `(... xs)`, whereas + `function (x) \n body end` parses the argument list as `(tuple x)`. + +* The difference between multidimensional vs flattened iterators is subtle, and + perhaps too syntactically permissive. For example, + - `[(x,y) for x * in 1:10, y in 1:10]` is a multidimensional iterator + - `[(x,y) for x * in 1:10 for y in 1:10]` is a flattened iterator + - `[(x,y) for x in 1:10, y in 1:10 if y < x]` is a flattened iterator + + It's this last case which seems problematic (why not *require* the second + form as a more explicit way to indicate flattening?). It's not even pretty + printed correctly: + ```julia-repl + julia> :([(x,y) for x in 1:10, y in 1:10 if y < x]) + :([(x, y) for $(Expr(:filter, :(y < x), :(x = 1:10), :(y = 1:10)))]) + ``` + +* The character `'` may be written without escaping as `'''` rather than + requiring the form `'\''`. + +# Comparisons to other packages + +### Official Julia compiler + +_See also the [§ Differences from the flisp parser](#differences-from-the-flisp-parser) section._ + +The official Julia compiler frontend lives in the Julia source tree. It's +mostly contained in just a few files: +* The parser in [src/julia-parser.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-parser.scm) +* Macro expansion in [src/ast.c](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/ast.c) and [src/macroexpand.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/macroexpand.scm) +* Syntax lowering in [src/julia-syntax.scm](https://github.com/JuliaLang/julia/blob/9c4b75d7f63d01d12b67aaf7ce8bb4a078825b52/src/julia-syntax.scm) +* The flisp runtime and C extensions for Julia in [src/flisp](https://github.com/JuliaLang/julia/tree/master/src/flisp) +* Supporting utility functions in a few other `.scm` and `.c` files. + +There's two issues with the official reference frontend which suggest a rewrite. + +First, there's no support for precise source locations and the existing data +structures (bare flisp lists) can't easily be extended to add these. Fixing +this would require changes to nearly all of the code. + +Second, it's written in flisp: an aesthetically pleasing, minimal but obscure +implementation of Scheme. Learning Scheme is actually a good way to appreciate +some of Julia's design inspiration, but it's quite a barrier for developers of +Julia language tooling. (Flisp has no user-level documentation but non-schemers +can refer to the [Racket documentation](https://docs.racket-lang.org) which is +quite compatible for basic things.) In addition to the social factors, having +the embedded flisp interpreter and runtime with its own separate data +structures and FFI is complex and inefficient. + +### JuliaParser.jl + +[JuliaParser.jl](https://github.com/JuliaLang/JuliaParser.jl) +was a direct port of Julia's flisp reference parser, but was abandoned around +Julia 0.5 or so. Furthermore, it doesn't support lossless parsing, and adding +that feature would amount to a full rewrite. Given its divergence with the flisp +reference parser since Julia-0.5, it seemed better just to start anew from the +reference parser instead. + +### Tokenize.jl + +[Tokenize.jl](https://github.com/JuliaLang/Tokenize.jl) +is a fast lexer for Julia code. The code from Tokenize has been +imported and used in JuliaSyntax, with some major modifications as discussed in +the [lexer implementation](#lexing) section. + +### CSTParser.jl + +[CSTParser.jl](https://github.com/julia-vscode/CSTParser.jl) +is a ([mostly?](https://github.com/domluna/JuliaFormatter.jl/issues/52#issuecomment-529945126)) +lossless parser with goals quite similar to JuliaParser. It is used extensively +in the VSCode / LanguageServer / JuliaFormatter ecosystem. CSTParser is very +useful, but I do find the implementation hard to understand, and I wanted to try +a fresh approach with a focus on: + +* "Production readiness": Good docs, tests, diagnostics and maximum similarity + with the flisp parser, with the goal of getting the new parser into `Core`. +* Learning from the latest ideas about composable parsing and data structures + from outside Julia. In particular the implementation of `rust-analyzer` is + very clean, well documented, and was a great source of inspiration. +* Composability of tree data structures — I feel like the trees should be + layered somehow with a really lightweight [green tree](#raw-syntax-tree--green-tree) + at the most basic level, similar to Roslyn or rust-analyzer. In comparison, + CSTParser uses a more heavyweight non-layered data structure. Alternatively or + additionally, have a common tree API with many concrete task-specific + implementations. + +A big benefit of the JuliaSyntax parser is that it separates the parser code +from the tree data structures entirely, which should give a lot of flexibility +in experimenting with various tree representations. + +I also want JuliaSyntax to tackle macro expansion and other lowering steps, and +provide APIs for this which can be used by both the core language and the +editor tooling. + +### tree-sitter-julia + +Using a modern production-ready parser generator like `tree-sitter` is an +interesting option and some progress has already been made in +[tree-sitter-julia](https://github.com/tree-sitter/tree-sitter-julia). +But I feel like the grammars for parser generators are only marginally more +expressive than writing the parser by hand, after accounting for the effort +spent on the weird edge cases of a real language and writing the parser's tests +and "supporting code". + +On the other hand, a hand-written parser is completely flexible and can be +mutually understood with the reference implementation, so I chose that approach +for JuliaSyntax. + +# Resources + +## Julia issues + +Here's a few links to relevant Julia issues. + +#### Macro expansion + +* Automatic hygiene for macros https://github.com/JuliaLang/julia/pull/6910 — + would be interesting to implement this in a new frontend. + +#### Lowering + +* A partial implementation of lowering in Julia https://github.com/JuliaLang/julia/pull/32201 — + some of this should be ported. (Last commit at https://github.com/JuliaLang/julia/tree/df61138fcf97d03dcbbba10e962571af9700db56/ ) +* The closure capture problem https://github.com/JuliaLang/julia/issues/15276 — + would be interesting to see whether we can tackle some of the harder cases in + a new implementation. + +## C# Roslyn + +[Persistence, façades and Roslyn’s red-green trees](https://ericlippert.com/2012/06/08/red-green-trees/) +* [Roslyn optimization overview](https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees) +* [Literate C# Usage Example](https://johtela.github.io/LiterateCS/LiterateCS/BlockBuilder.html) + + +## Rust-analyzer + +`rust-analyzer` seems to be very close to what I'm building here, and has come +to the same conclusions on green tree layout with explicit trivia nodes. Their +document on internals +[here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md) +is great. Points of note: + +* They have *three* trees! + 1. Green trees exactly like mine (pretty much all the same design + decisions, including trivia storage). Though note that the team are still + [toying with](https://github.com/rust-analyzer/rust-analyzer/issues/6584) + the idea of using the Roslyn model of trivia. + 2. Untyped red syntax trees somewhat like mine, but much more minimal. For + example, these don't attempt to reorder children. + 3. A typed AST layer with a type for each expression head. The AST searches + for children by dynamically traversing the child list each time, rather + than having a single canonical ordering or remembering the placement of + children which the parser knew. +* "Parser does not see whitespace nodes. Instead, they are attached to the + tree in the TreeSink layer." This may be relevant to us - it's a pain to + attach whitespace to otherwise significant tokens, and inefficient to + allocate and pass around a dynamic list of whitespace trivia. +* "In practice, incremental reparsing doesn't actually matter much for IDE + use-cases, parsing from scratch seems to be fast enough." (I wonder why + they've implemented incremental parsing then?) +* There's various comments about macros... Rust macro expansion seems quite + different from Julia (it appears it may be interleaved with parsing??) + +In general I think it's unclear whether we want typed ASTs in Julia and we +particularly need to deal with the fact that `Expr` is the existing public +interface. Could we have `Expr2` wrap `SyntaxNode`? + +* A related very useful set of blog posts which discuss using the rust syntax + tree library (rowan) for representing of a non-rust toy language is here + https://dev.to/cad97/lossless-syntax-trees-280c + +Not all the design decisions in `rust-analyzer` are finalized but the +[architecture document](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/architecture.md) +is a fantastic source of design inspiration. + +Highlights: +* "The parser is independent of the particular tree structure and particular + representation of the tokens. It transforms one flat stream of events into + another flat stream of events." This seems great, let's adopt it! +* TODO + +## RSLint + +[RSLint](https://rslint.org/dev) is a linter for javascript, built in Rust. It +uses the same parsing infrastructure and green tree libraries `rust-analyzer`. +There's an excellent and friendly high level overview of how all this works in +the rslint [parsing devdocs](https://rslint.org/dev/parsing.html). + +Points of note: + +* Backtracking and restarting the parser on error is actually quite simple in + the architecture we (mostly) share with `rust-analyzer`: + > ... events allow us to cheaply backtrack the parser by simply draining + > the events and resetting the token source cursor back to some place. + +* The section on [error + recovery](https://rslint.org/dev/parsing.html#error-recovery) is interesting; + they talk about various error recovery strategies. + +## Diagnostics + +The paper [P2429 - Concepts Error Messages for +Humans](https://wg21.tartanllama.xyz/P2429%20-%20Concepts%20Error%20Messages%20for%20Humans.pdf) +is C++ centric, but has a nice review of quality error reporting in various +compilers including Elm, ReasonML, Flow, D and Rust. + +Some Rust-specific resources: +* [rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html) +* The source of the Rust compiler's diagnostics system: + - The [`println!` macro](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_builtin_macros/src/format.rs) + shows how these can be emitted from macros + - The parser's [diagnostics.rs](https://github.com/rust-lang/rust/blob/0b6f079e4987ded15c13a15b734e7cfb8176839f/compiler/rustc_parse/src/parser/diagnostics.rs) + +## General resources about parsing + +* [Modern parser generator](https://matklad.github.io/2018/06/06/modern-parser-generator.html) + has a lot of practical notes on writing parsers. Highlights: + - Encourages writing tests for handwritten parsers as inline comments + - Mentions Pratt parsers for simple operator precedence parsing. Good articles: + - [From Aleksey Kladov (matklad - the main rust-analyzer author, etc)](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) + - [From Bob Nystrom (munificent - one of the Dart devs, etc](http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/) + - Some discussion of error recovery + +* Some notes about stateful lexers for parsing shell-like string interpolations: + http://www.oilshell.org/blog/2017/12/17.html + + +# Design notes + +The following are some fairly disorganized design notes covering a mixture of +things which have already been done and musings about further work. + +## Prototyping approach + +The tree datastructure design here is tricky: + +1. The symbolic part of compilation (the compiler frontend) incrementally + abstracts and transforms the source text, but errors along the way should + refer back to the source. + - The tree must be a lossless representation of the source text + - Some aspects of the source text (comments, most whitespace) are irrelevant + to parsing. + - More aspects of the source text are irrelevant after we have an abstract + syntax tree of the surface syntax. Some good examples here are the + parentheses in `2*(x + y)` and the explicit vs implicit multiplication + symbol in `2*x` vs `2x`. + +2. There's various type of *analyses* +- There's many useful ways to augment a syntax tree depending on use case. +- Analysis algorithms should be able to act on any tree type, ignoring + but carrying augmentations which they don't know about. + +Having so many use cases suggests it might be best to have several different +tree types with a common interface rather than one main abstract syntax tree +type. But it seems useful to figure this out by prototyping several important +work flows: + +* Syntax transformations + - Choose some macros to implement. This is a basic test of mixing source + trees from different files while preserving precise source locations. + (Done in .) +* Formatting + - Re-indent a file. This tests the handling of syntax trivia. +* Refactoring + - A pass to rename local variables. This tests how information from further + down the compilation pipeline can be attached to the syntax tree and used + to modify the source code. +* Precise error reporting in lowering + - Syntax desugaring `[a, b] = (c, d)` should report "invalid assignment + location `[a, b]`". But at a precise source location. + - Try something several layers deeper inside lowering? For example "macro + definition not allowed inside a local scope" +* Incremental reparsing + - Reparse a source file, given a byte range replacement + + +## Tree design + +### Raw syntax tree / Green tree + +Raw syntax tree (or ["Green tree"](https://ericlippert.com/2012/06/08/red-green-trees/) +in the terminology from Roslyn) + +We want GreenNode to be +* *structurally minimal* — For efficiency and generality +* *immutable* — For efficiency (& thread safety) +* *complete* — To preserve parser knowledge +* *token agnostic* — To allow use with any source language + +The simplest idea possible is to have: +* Leaf nodes are a single token +* Children are in source order + +Call represents a challenge for the AST vs Green tree in terms of node +placement / iteration for infix operators vs normal prefix function calls. + +- The normal problem of `a + 1` vs `+(a, 1)` +- Or worse, `a + 1 + 2` vs `+(a, 1, 2)` + +Clearly in the AST's *interface* we need to abstract over this placement. For +example with something like the normal Julia AST's iteration order. + +### Abstract syntax tree + +By pointing to green tree nodes, AST nodes become traceable back to the original +source. + +Unlike most languages, designing a new AST is tricky because the existing +`Expr` is a very public API used in every macro expansion. User-defined +macro expansions interpose between the source text and lowering, and using +`Expr` looses source information in many ways. + +There seems to be a few ways forward: +* Maybe we can give `Expr` some new semi-hidden fields to point back to the + green tree nodes that the `Expr` or its `args` list came from? +* We can use the existing `Expr` during macro expansion and try to recover + source information after macro expansion using heuristics. Likely the + presence of correct hygiene can help with this. +* Introducing a new AST would be possible if it were opt-in for some + hypothetical "new-style macros" only. Fixing hygiene should go along with + this. Design challenge: How do we make manipulating expressions reasonable + when literals need to carry source location? + +One option which may help bridge between locationless ASTs and something new +may be to have wrappers for the small number of literal types we need to cover. +For example: + +```julia +SourceSymbol <: AbstractSymbol +SourceInt <: Integer +SourceString <: AbstractString +``` + +Having source location attached to symbols would potentially solve most of the +hygiene problem. There's still the problem of macro helper functions which use +symbol literals; we can't very well be changing the meaning of `:x`! Perhaps +the trick there is to try capturing the current module at the location of the +interpolation syntax. Eg, if you do `:(y + $x)`, lowering expands this to +`Core._expr(:call, :+, :y, x)`, but it could expand it to something like +`Core._expr(:call, :+, :y, _add_source_symbol(_module_we_are_lowering_into, x))`? + +## Parsing + +### Error recovery + +Some disorganized musings about error recovery + +Different types of errors seem to occur... + +* Disallowed syntax (such as lack of spaces in conditional expressions) + where we can reasonably just continue parsing and emit the node with an error + flag which is otherwise fully formed. In some cases like parsing infix + expressions with a missing tail, emitting a zero width error token can lead + to a fully formed parse tree without the productions up the stack needing to + participate in recovery. +* A token which is disallowed in current context. Eg, `=` in parse_atom, or a + closing token inside an infix expression. Here we can emit a `K"error"`, but + we can't descend further into the parse tree; we must pop several recursive + frames off. Seems tricky! + +A typical structure is as follows: + +```julia +function parse_foo(ps) + mark = position(ps) + parse_bar(ps) # What if this fails? + if peek(ps) == K"some-token" + bump(ps) + parse_baz(ps) # What if this fails? + emit(ps, mark, K"foo") + end +end +``` + +Emitting plain error tokens are good in unfinished infix expressions: + +```julia +begin + a = x + +end +``` + +The "missing end" problem is tricky, as the intermediate syntax is valid; the +problem is often only obvious until we get to EOF. + +Missing end +```julia +function f() + begin + a = 10 +end + +# <-- Indentation would be wrong if g() was an inner function of f. +function g() +end +``` + +It seems like ideal error recovery would need to backtrack in this case. For +example: + +- Pop back to the frame which was parsing `f()` +- Backtrack through the parse events until we find a function with indentation + mismatched to the nesting of the parent. +- Reset ParseStream to a parsing checkpoint before `g()` was called +- Emit error and exit the function parsing `f()` +- Restart parsing +- Somehow make sure all of this can't result in infinite recursion 😅 + +Missing commas or closing brackets in nested structures also present the +existing parser with a problem. + +```julia +f(a, + g(b, + c # -- missing comma? + d), + e) +``` + +Again the local indentation might tell a story + +```julia +f(a, + g(b, + c # -- missing closing `)` ? + d) +``` + +But not always! + +```julia +f(a, + g(b, + c # -- missing closing `,` ? + d)) +``` + +Another particularly difficult problem for diagnostics in the current system is +broken parentheses or double quotes in string interpolations, especially when +nested. + +# Fun research questions + +### Parser Recovery + +Can we learn fast and reasonably accurate recovery heuristics for when the +parser encounters broken syntax, rather than hand-coding these? How would we +set the parser up so that training works and injecting the model is +nonintrusive? If the model is embedded in and works together with the parser, +can it be made compact enough that training is fast and the model itself is +tiny? + +### Formatting + +Given source and syntax tree, can we regress/learn a generative model of +indentation from the syntax tree? Source formatting involves a big pile of +heuristics to get something which "looks nice"... and ML systems have become +very good at heuristics. Also, we've got huge piles of training data — just +choose some high quality, tastefully hand-formatted libraries. diff --git a/JuliaSyntax/docs/src/howto.md b/JuliaSyntax/docs/src/howto.md new file mode 100644 index 0000000000000..c8bd0503591d5 --- /dev/null +++ b/JuliaSyntax/docs/src/howto.md @@ -0,0 +1,37 @@ +# How-To + +This section contains brief recipes for particular tasks + +## Use JuliaSyntax as the default parser + +To use JuliaSyntax as the default Julia parser for the REPL and to `include()` +files, parse code with `Meta.parse()`, etc, put the following in your +startup.jl file: + +```julia +using JuliaSyntax +JuliaSyntax.enable_in_core!() +``` + +This works well in Julia 1.9 but in Julia 1.8 will cause some startup latency. +To reduce that you can create a custom system image by running the code in +`./sysimage/compile.jl` as a Julia script (or directly using the shell, on +unix). Then use `julia -J $resulting_sysimage`. + +Using a custom sysimage has the advantage that package precompilation will also +go through the JuliaSyntax parser. + +### VSCode + +To use JuliaSyntax as the default parser for Julia within VSCode, add the +following to your `startup.jl` file: + +```julia +import JuliaSyntax +JuliaSyntax.enable_in_core!() +``` + +To reduce startup latency you can combine with a custom system as described in +the [Julia VScode docs](https://www.julia-vscode.org/docs/dev/userguide/compilesysimage/#Creating-a-sysimage-for-the-active-environment), +combined with the precompile execution file in `sysimage/precompile_exec.jl` in the source tree. +For additional detail see the discussion in [issue #128](https://github.com/JuliaLang/JuliaSyntax.jl/issues/128). diff --git a/JuliaSyntax/docs/src/index.md b/JuliaSyntax/docs/src/index.md new file mode 100644 index 0000000000000..79b8d83b62e6a --- /dev/null +++ b/JuliaSyntax/docs/src/index.md @@ -0,0 +1,79 @@ +# JuliaSyntax.jl + +A Julia compiler frontend, written in Julia. + +A [talk from JuliaCon 2022](https://youtu.be/CIiGng9Brrk) covered some aspects +of this package. + +## Examples + +Here's what parsing of a small piece of code currently looks like in various +forms. We'll use the `JuliaSyntax.parsestmt` function to demonstrate, there's also +`JuliaSyntax.parse!` offering more fine-grained control. + +First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means +the `call` has the infix `-i` flag): + +```julia +julia> using JuliaSyntax + +julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl") +line:col│ tree │ file_name + 1:1 │[call-i] │foo.jl + 1:1 │ [parens] + 1:2 │ [call-i] + 1:2 │ x + 1:4 │ + + 1:6 │ y + 1:8 │ * + 1:9 │ z +``` + +Internally this has a full representation of all syntax trivia (whitespace and +comments) as can be seen with the more raw ["green tree"](#raw-syntax-tree--green-tree) +representation with `GreenNode`. Here ranges on the left are byte ranges, and +`✔` flags nontrivia tokens. Note that the parentheses are trivia in the tree +representation, despite being important for parsing. + +```julia +julia> text = "(x + y)*z" + greentree = parsestmt(JuliaSyntax.GreenNode, text) + 1:9 │[call] + 1:7 │ [parens] + 1:1 │ ( + 2:6 │ [call] + 2:2 │ Identifier ✔ + 3:3 │ Whitespace + 4:4 │ + ✔ + 5:5 │ Whitespace + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ * ✔ + 9:9 │ Identifier ✔ +``` + +`GreenNode` stores only byte ranges, but the token strings can be shown by +supplying the source text string: + +```julia +julia> show(stdout, MIME"text/plain"(), greentree, text) + 1:9 │[call] + 1:7 │ [parens] + 1:1 │ ( "(" + 2:6 │ [call] + 2:2 │ Identifier ✔ "x" + 3:3 │ Whitespace " " + 4:4 │ + ✔ "+" + 5:5 │ Whitespace " " + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ * ✔ "*" + 9:9 │ Identifier ✔ "z" +``` + +Julia `Expr` can also be produced: + +```julia +julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z") +:((x + y) * z) +``` diff --git a/JuliaSyntax/docs/src/reference.md b/JuliaSyntax/docs/src/reference.md new file mode 100644 index 0000000000000..086bc57ad8224 --- /dev/null +++ b/JuliaSyntax/docs/src/reference.md @@ -0,0 +1,326 @@ +# Syntax Trees + +This section describes the syntax trees produced by JuliaSyntax, mainly in +terms of their similarities and differences with the `Expr` tree data +structures used since Julia 0.1. + +## JuliaSyntax trees vs `Expr` + +The tree structure of `GreenNode`/`SyntaxNode` is similar to Julia's `Expr` +data structure but there are various differences: + +### Source ordered children + +The children of our trees are strictly in source order. This has many +consequences in places where `Expr` reorders child expressions. + +* Infix and postfix operator calls have the operator name in the *second* child position. `a + b` is parsed as `(call-i a + b)` - where the infix `-i` flag indicates infix child position - rather than `Expr(:call, :+, :a, :b)`. +* Generators are represented in source order as a single node rather than multiple nested flatten and generator expressions. + +### No `LineNumberNode`s + +Our syntax nodes inherently stores source position, so there's no need for the +`LineNumberNode`s used by `Expr`. + +### More consistent / less redundant `block`s + +Sometimes `Expr` needs redundant block constructs to store `LineNumberNode`s, +but we don't need these. Also in cases which do use blocks we try to use them +consistently. + +* No block is used on the right hand side of short form function syntax +* No block is used for the conditional in `elseif` +* No block is used for the body of anonymous functions after the `->` +* `let` argument lists always use a block regardless of number or form of bindings + +### Faithful representation of the source text / avoid premature lowering + +Some cases of "premature lowering" have been removed, preferring to represent +the source text more closely. + +* `K"macrocall"` - allow users to easily distinguish macrocalls with parentheses from those without them (#218) +* Grouping parentheses are represented with a node of kind `K"parens"` (#222) +* The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. +* Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) +* `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) +* [`do` syntax](#Do-blocks) is nested as the last child of the call which the `do` lambda will be passed to (#98, #322) +* `@.` is not lowered to `@__dot__` inside the parser (#146) +* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) +* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) +* `return` without a value has zero children, rather than lowering to `return nothing` (#220) +* Command syntax `` `foo` `` parses into a `cmdstring` tree node wrapping the string, as `(cmdstring "foo")` (#438). These are lowered to a macro call later rather than by the parser. + +### Containers for string-like constructs + +String-like constructs always come within a container node, not as a single +token. These are useful for tooling which works with the tokens of the source +text. Also separating the delimiters from the text they delimit removes a whole +class of tokenization errors and lets the parser deal with them. + +* string always use `K"string"` to wrap strings, even when they only contain a single string chunk (#94) +* char literals are wrapped in the `K"char"` kind, containing the character literal string along with their delimiters (#121) +* backticks use the `K"cmdstring"` kind +* `var""` syntax uses `K"var"` as the head (#127) +* The parser splits triple quoted strings into string chunks interspersed with whitespace trivia + +### Improvements for AST inconsistencies + +* Field access syntax like `a.b` is parsed as `(. a b)` rather than `(. a (quote b))` to avoid the inconsistency between this and actual quoted syntax literals like `:(b)` and `quote b end` ([#342](https://github.com/JuliaLang/JuliaSyntax.jl/issues/324)) +* Dotted call syntax like `f.(a,b)` and `a .+ b` has been made consistent with the `K"dotcall"` head (#90) +* Standalone dotted operators are always parsed as `(. op)`. For example `.*(x,y)` is parsed as `(call (. *) x y)` (#240) +* The `K"="` kind is used for keyword syntax rather than `kw`, to avoid various inconsistencies and ambiguities (#103) +* Unadorned postfix adjoint is parsed as `call` rather than as a syntactic operator for consistency with suffixed versions like `x'ᵀ` (#124) +* The argument list in the left hand side of `->` is always a tuple. For example, `x->y` parses as `(-> (tuple x) y)` rather than `(-> x y)` (#522) + +### Improvements to awkward AST forms + +* `FrankenTuple`s with multiple parameter blocks like `(a=1, b=2; c=3; d=4)` are flattened into the parent tuple instead of using nested `K"parameters"` nodes (#133) +* Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) +* The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) +* We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) +* Iterations are represented with the `iteration` and `in` heads rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration (in i is)) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a nested `(iteration (in a as) (in b bs))` rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. +* Short form functions like `f(x) = x + 1` are represented with the `function` head rather than the `=` head. In this case the `SHORT_FORM_FUNCTION_FLAG` flag is set to allow the surface syntactic form to be easily distinguished from long form functions. +* All kinds of updating assignment operators like `+=` are represented with a single `K"op="` head, with the operator itself in infix position. For example, `x += 1` is `(op= x + 1)`, where the plus token is of kind `K"Identifier"`. This greatly reduces the number of distinct forms here from a rather big list (`$=` `%=` `&=` `*=` `+=` `-=` `//=` `/=` `<<=` `>>=` `>>>=` `\=` `^=` `|=` `÷=` `⊻=`) and makes the operator itself appear in the AST as kind `K"Identifier"`, as it should. It also makes it possible to add further unicode updating operators while keeping the AST stable. + +## More detail on tree differences + +### Generators + +Flattened generators are uniquely problematic because the Julia AST doesn't +respect a key rule we normally expect: that the children of an AST node are a +*contiguous* range in the source text. For example, the `for`s in +`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to +mean + +``` +for x in xs + for y in ys + push!(xy, collection) + end +end +``` + +so the `xy` prefix is in the *body* of the innermost for loop. Following this, +the standard Julia AST is like so: + +``` +(flatten + (generator + (generator + xy + (= y ys)) + (= x xs))) +``` + +however, note that if this tree were flattened, the order would be +`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the +source order. + +However, our green tree is strictly source-ordered, so we must deviate from the +Julia AST. We deal with this by grouping cartesian products of iterators +(separated by commas) within `iteration` blocks as in `for` loops, and +use the length of the `iteration` block rather than the `flatten` head to +distinguish flattened iterators. The nested flattens and generators of `Expr` +forms are reconstructed later. In this form the tree structure resembles the +source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as + +``` +(generator + xy + (iteration (in x xs)) + (iteration (in y ys))) +``` + +And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as + +``` +(generator + xy + (iteration (in x xs) (in y ys))) +``` + +### Whitespace trivia inside strings + +For triple quoted strings, the indentation isn't part of the string data so +should also be excluded from the string content within the green tree. That is, +it should be treated as separate whitespace trivia tokens. With this separation +things like formatting should be much easier. The same reasoning goes for +escaping newlines and following whitespace with backslashes in normal strings. + +Detecting string trivia during parsing means that string content is split over +several tokens. Here we wrap these in the K"string" kind (as is already used +for interpolations). The individual chunks can then be reassembled during Expr +construction. (A possible alternative might be to reuse the K"String" and +K"CmdString" kinds for groups of string chunks (without interpolation).) + +Take as an example the following Julia fragment. + +```julia +x = """ + $a + b""" +``` + +Here this is parsed as `(= x (string-s a "\n" "b"))` (the `-s` flag in +`string-s` means "triple quoted string") + +Looking at the green tree, we see the indentation before the `$a` and `b` are +marked as trivia: + +``` +julia> text = "x = \"\"\"\n \$a\n b\"\"\"" + show(stdout, MIME"text/plain"(), parseall(GreenNode, text, rule=:statement), text) + 1:23 │[=] + 1:1 │ Identifier ✔ "x" + 2:2 │ Whitespace " " + 3:3 │ = "=" + 4:4 │ Whitespace " " + 5:23 │ [string] + 5:7 │ """ "\"\"\"" + 8:8 │ String "\n" + 9:12 │ Whitespace " " + 13:13 │ $ "\$" + 14:14 │ Identifier ✔ "a" + 15:15 │ String ✔ "\n" + 16:19 │ Whitespace " " + 20:20 │ String ✔ "b" + 21:23 │ """ "\"\"\"" +``` + +### String nodes always wrapped in `K"string"` or `K"cmdstring"` + +All strings are surrounded by a node of kind `K"string"`, even non-interpolated +literals, so `"x"` parses as `(string "x")`. This makes string handling simpler +and more systematic because interpolations and triple strings with embedded +trivia don't need to be treated differently. It also gives a container in which +to attach the delimiting quotes. + +The same goes for command strings which are always wrapped in `K"cmdstring"` +regardless of whether they have multiple pieces (due to triple-quoted +dedenting) or otherwise. + +### Do blocks + +`do` syntax is represented in the `Expr` AST with the `do` outside the call. +This makes some sense syntactically (do appears as "an operator" after the +function call). + +However semantically this nesting is awkward because the lambda represented by +the do block is passed to the call. This same problem occurs for the macro form +`@f(x) do \n body end` where the macro expander needs a special rule to expand +nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the +expression which are passed to this macro call rather than passing the +expressions up the tree. + +The implied closure is also lowered to a nested `Expr(:->)` expression, though +it this somewhat premature to do this during parsing. + +To resolve these problems we parse + + @f(x, y) do a, b\n body\n end + f(x, y) do a, b\n body\n end + +by tacking the `do` onto the end of the call argument list: + + (macrocall @f x y (do (tuple a b) body)) + (call f x y (do (tuple a b) body)) + +This achieves the following desirable properties +1. Content of `do` is nested inside the call which improves the match between AST and semantics +2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro +3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax +4. `do` head is used uniformly for both call and macrocall +5. We preserve the source ordering properties we need for the green tree. + +## Tree structure reference + +This section may eventually contain a full description of the Julia AST. For +now, we describe a few of the more subtle features. + +### Concatenation syntax + +Concatenation syntax comes in two syntax forms: +* The traditional `hcat`/`vcat`/`row` which deal with concatenation or matrix + construction along dimensions one and two. +* The new `ncat`/`nrow` syntax which deals with concatenation or array + construction along arbitrary dimensions. + +We write `ncat-3` for concatenation along the third dimension. (The `3` is +stored in the head flags for `SyntaxNode` trees, and in the first `arg` for +`Expr` trees.) Semantically the new syntax can work like the old: +* `ncat-1` is the same as `vcat` +* `ncat-2` is the same as `hcat` +* `row` is the same as `nrow-2` + +#### Vertical concatenation (dimension 1) + +Vertical concatenation along dimension 1 can be done with semicolons or newlines + +```julia-repl +julia> print_tree(:([a + b])) +Expr(:vcat) +├─ :a +└─ :b + +julia> print_tree(:([a ; b])) +Expr(:vcat) +├─ :a +└─ :b +``` + +#### Horizontal concatenation (dimension 2) + +For horizontal concatenation along dimension 2, use spaces or double semicolons + +```julia-repl +julia> print_tree(:([a b])) +Expr(:hcat) +├─ :a +└─ :b + +julia> print_tree(:([a ;; b])) +Expr(:ncat) +├─ 2 +├─ :a +└─ :b +``` + +#### Mixed concatenation + +Concatenation along dimensions 1 and 2 can be done with spaces and single +semicolons or newlines, producing a mixture of `vcat` and `row` expressions: + +```julia-repl +julia> print_tree(:([a b + c d])) +# OR +julia> print_tree(:([a b ; c d])) +Expr(:vcat) +├─ Expr(:row) +│ ├─ :a +│ └─ :b +└─ Expr(:row) + ├─ :c + └─ :d +``` + +General n-dimensional concatenation results in nested `ncat` and `nrow`, for +example + +```julia-repl +julia> print_tree(:([a ; b ;; c ; d ;;; x])) +Expr(:ncat) +├─ 3 +├─ Expr(:nrow) +│ ├─ 2 +│ ├─ Expr(:nrow) +│ │ ├─ 1 +│ │ ├─ :a +│ │ └─ :b +│ └─ Expr(:nrow) +│ ├─ 1 +│ ├─ :c +│ └─ :d +└─ :x +``` diff --git a/JuliaSyntax/prototypes/simple_parser.jl b/JuliaSyntax/prototypes/simple_parser.jl new file mode 100644 index 0000000000000..06a408a26860a --- /dev/null +++ b/JuliaSyntax/prototypes/simple_parser.jl @@ -0,0 +1,174 @@ +# Example parser for a very basic Julia-like language of expressions, calls and +# function definitions. + +using JuliaSyntax: @K_str, is_literal, is_keyword, is_operator + +function parse_toplevel(st) + mark = position(st) + while true + bump_trivia(st, skip_newlines=true) + if peek(st) == K"EndMarker" + break + end + parse_statement(st) + end + emit(st, mark, K"toplevel") +end + +function parse_statement(st) + mark = position(st) + if peek(st) == K"function" + parse_function_def(st) + else + parse_assignment(st) + end +end + +function parse_function_def(st) + mark = position(st) + @assert peek(st) == K"function" + bump(st, TRIVIA_FLAG) + parse_call(st) + parse_block(st, K"end") + emit(st, mark, K"function") +end + +function parse_block(st, closing_kind, mark=position(st)) + while true + bump_trivia(st, skip_newlines=true) + if peek(st) == closing_kind + bump(st, TRIVIA_FLAG) + break + elseif peek(st) == K"EndMarker" + emit_diagnostic(st, error="Unexpected end of input") + break + end + parse_assignment(st) + end + emit(st, mark, K"block") +end + +function parse_assignment(st) + mark = position(st) + parse_expression(st) + if peek(st) == K"=" + bump(st, TRIVIA_FLAG) + parse_expression(st) + emit(st, mark, K"=") + end +end + +function parse_expression(st) + mark = position(st) + parse_term(st) + while peek(st) in (K"+", K"-") + bump(st) + parse_term(st) + emit(st, mark, K"call", INFIX_FLAG) + end +end + +function parse_term(st) + mark = position(st) + parse_call(st) + while peek(st) in (K"*", K"/") + bump(st) + parse_call(st) + emit(st, mark, K"call", INFIX_FLAG) + end +end + +function parse_call(st) + mark = position(st) + parse_atom(st) + if peek(st) == K"(" + bump(st, TRIVIA_FLAG) + need_comma = false + while true + k = peek(st) + if need_comma && k == K"," + bump(st, TRIVIA_FLAG) + k = peek(st) + need_comma = false + end + if k == K")" + bump(st, TRIVIA_FLAG) + break + elseif k == K"EndMarker" + emit_diagnostic(st, error="Unexpected end of input") + break + elseif need_comma + bump_invisible(st, K"error", TRIVIA_TOKEN, error="Expected a `,`") + end + parse_expression(st) + need_comma = true + end + emit(st, mark, K"call") + end +end + +function parse_atom(st) + bump_trivia(st, skip_newlines=true) + mark = position(st) + k = peek(st) + if k == K"Identifier" || is_literal(k) + bump(st) + elseif k in (K"-", K"+") + bump(st) + parse_atom(st) + emit(st, mark, K"call") + elseif k == K"(" + bump(st, TRIVIA_FLAG) + parse_expression(st) + if peek(st) == K")" + bump(st, TRIVIA_FLAG) + # emit(st, mark, K"(") + else + bump_invisible(st, K"error", TRIVIA_FLAG, + error="Expected `)` following expression") + end + elseif k == K"begin" + bump(st, TRIVIA_FLAG) + parse_block(st, K"end", mark) + else + bump(st) + emit(st, mark, K"error", + error="Expected literal, identifier or opening parenthesis") + end +end + +function parse_and_show(production::Function, code) + st = ParseStream(code) + production(st) + t = JuliaSyntax.build_tree(GreenNode, st) + show(stdout, MIME"text/plain"(), t, code, show_trivia=true) + if !isempty(st.diagnostics) + println() + for d in st.diagnostics + JuliaSyntax.show_diagnostic(stdout, d, code) + end + end + t +end + +println() +println("Example good parse:") +parse_and_show(parse_toplevel, + """ + function f(x, y) + z = x - y + begin + a + b + end + z * z + end + + f(1,2) + """) + +println() +println("Example diagnostics:") +parse_and_show(parse_expression, "(x + a*y) * (b") + +nothing diff --git a/JuliaSyntax/prototypes/syntax_interpolation.jl b/JuliaSyntax/prototypes/syntax_interpolation.jl new file mode 100644 index 0000000000000..eddf6748bd423 --- /dev/null +++ b/JuliaSyntax/prototypes/syntax_interpolation.jl @@ -0,0 +1,56 @@ +# # Macros and expression interpolation + +using JuliaSyntax: SourceFile, SyntaxNode, parseall, child, setchild! + +# The following shows that SyntaxNode works nicely for simple macros which +# just interpolate expressions into one another. In particular it shows how +# precise source information from multiple files can coexist within the same +# syntax tree. + +# First, here's the functionality that we're going to implement as a normal +# Julia macro. It's similar to the standard @show macro. +macro show2(ex) + name = sprint(Base.show_unquoted, ex) + quote + value = $(esc(ex)) + println($name, " = ", value) + value + end +end + +# Now, let's implement the same expression interpolation but using SyntaxNode +# (and with a normal Julia function which we need to use, absent any deeper +# integration with the Julia runtime) +function at_show2(ex::SyntaxNode) + name = sprint(show, MIME"text/x.sexpression"(), ex) + quote + value = $(esc(ex)) + println($name, " = ", value) + value + end + # The following emulates the expression interpolation lowering which is + # usually done by the compiler. + # 1. Extract the expression literal as `block` + tree = parseall(SyntaxNode, String(read(@__FILE__)), filename=@__FILE__) + block = child(tree, 3, 2, 2, 1) + # 2. Interpolate local variables into the block at positions of $'s + # Interpolating a SyntaxNode `ex` is simple: + setchild!(block, (1, 2), ex) + # The interpolation of a Julia *value* should inherit the source location + # of the $ interpolation expression. This is different to when substituting + # in a SyntaxNode which should just be inserted as-is. + setchild!(block, (2, 2), + JuliaSyntax.interpolate_literal(child(block, 2, 2), name)) + block +end + +# Usage of at_show2() + +# Let's have some simple expression to pass to at_show2. This will be +# attributed to a different file foo.jl +s2 = parseall(SyntaxNode, "foo +\n42", filename="foo.jl", rule=:statement) + +# Calling at_show2, we see that the precise source information is preserved for +# both the surrounding expression and the interpolated fragments. +println("\nInterpolation example") +s3 = at_show2(s2) diff --git a/JuliaSyntax/src/JuliaSyntax.jl b/JuliaSyntax/src/JuliaSyntax.jl new file mode 100644 index 0000000000000..da5861c0d5b62 --- /dev/null +++ b/JuliaSyntax/src/JuliaSyntax.jl @@ -0,0 +1,108 @@ +module JuliaSyntax + +macro _public(syms) + if VERSION >= v"1.11" + names = syms isa Symbol ? [syms] : syms.args + esc(Expr(:public, names...)) + else + nothing + end +end + +# Public API, in the order of docs/src/api.md + +# Parsing. +export parsestmt, + parseall, + parseatom + +@_public parse!, + ParseStream, + build_tree + +# Tokenization +export tokenize, + Token, + untokenize + +# Source file handling +@_public sourcefile, + byte_range, + char_range, + first_byte, + last_byte, + filename, + source_line, + source_location, + sourcetext, + highlight + +export SourceFile +@_public source_line_range + +# Expression predicates, kinds and flags +export @K_str, kind +@_public Kind + +@_public flags, + SyntaxHead, + head, + is_trivia, + is_prefix_call, + is_infix_op_call, + is_prefix_op_call, + is_postfix_op_call, + is_dotted, + is_suffixed, + is_decorated, + numeric_flags, + has_flags, + TRIPLE_STRING_FLAG, + RAW_STRING_FLAG, + PARENS_FLAG, + COLON_QUOTE, + TOPLEVEL_SEMICOLONS_FLAG, + MUTABLE_FLAG, + BARE_MODULE_FLAG, + SHORT_FORM_FUNCTION_FLAG + +# Syntax trees +@_public is_leaf, + numchildren, + children + +export SyntaxNode + +@_public GreenNode, RedTreeCursor, GreenTreeCursor, + span + +# Helper utilities +include("utils.jl") + +include("julia/kinds.jl") + +# Lexing uses a significantly modified version of Tokenize.jl +include("julia/tokenize.jl") + +# Source and diagnostics +include("core/source_files.jl") +include("core/diagnostics.jl") + +# Parsing +include("core/parse_stream.jl") +include("core/tree_cursors.jl") +include("julia/julia_parse_stream.jl") +include("julia/parser.jl") +include("julia/parser_api.jl") +include("julia/literal_parsing.jl") + +# Tree data structures +include("porcelain/green_node.jl") +include("porcelain/syntax_tree.jl") +include("integration/expr.jl") + +# Hooks to integrate the parser with Base +include("integration/hooks.jl") +include("precompile.jl") + +end diff --git a/JuliaSyntax/src/core/diagnostics.jl b/JuliaSyntax/src/core/diagnostics.jl new file mode 100644 index 0000000000000..39fa473fed2f9 --- /dev/null +++ b/JuliaSyntax/src/core/diagnostics.jl @@ -0,0 +1,110 @@ +""" + Diagnostic(first_byte, last_byte; [error="msg" | warning="msg"]) + +A diagnostic message, referring to the source code byte range +first_byte:last_byte, with a `warning` or `error` message. + +Messages should be concise, matter-of-fact and not include decorations: + +* Concise: "Show don't tell". Where possible, let's show the user what's wrong + by annotating their original source code via the byte range. +* Matter-of-fact: Admonishing the user isn't helpful. Let's gently show them + what's wrong instead, using a neutral tone. +* Decorations: Capitalization, punctuation and diagnostic class ("error" / + "warning") should be omitted. These decorations will be added by the + formatting code. + +TODO: At some point we should enhance Diagnostic to allow multiple sub-ranges +for better annotation. Let's follow the excellent precedent set by Rust's +[rustc_errors::Diagnostic](https://doc.rust-lang.org/stable/nightly-rustc/rustc_errors/struct.Diagnostic.html). + +TODO: We should cater for extended descriptions containing multiple sentences +via a diagnostic code which can be used to look up detailed information. Again, +Rust does this well. +""" +struct Diagnostic + first_byte::Int + last_byte::Int + level::Symbol + message::String +end + +function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing) + message = !isnothing(error) ? error : + !isnothing(warning) ? warning : + Base.error("No message in diagnostic") + level = !isnothing(error) ? :error : :warning + Diagnostic(first_byte, last_byte, level, message) +end + +byte_range(d::Diagnostic) = d.first_byte:d.last_byte +is_error(d::Diagnostic) = d.level === :error + +# Make relative path into a file URL +function _file_url(filename) + try + @static if Sys.iswindows() + # TODO: Test this with windows terminal + path = replace(abspath(filename), '\\'=>'/') + else + path = abspath(filename) + end + return "file://$(path)" + catch exc + # abspath may fail if working directory doesn't exist + # TODO: It seems rather non-ideal to have the behavior here depend on + # the state of the local filesystem. And yet links in diagnostics seem + # useful. + # + # Ideally it'd be up to the caller to provide some notion of the + # "absolute location" of the source code resource when SourceFile is + # constructed. This is often not related to the local filesystem - it + # could be in memory, a fragment embedded in another file, etc etc. + return nothing + end +end + +function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile) + color,prefix = diagnostic.level === :error ? (:light_red, "Error") : + diagnostic.level === :warning ? (:light_yellow, "Warning") : + diagnostic.level === :note ? (:light_blue, "Note") : + (:normal, "Info") + line, col = source_location(source, first_byte(diagnostic)) + linecol = "$line:$col" + fname = filename(source) + file_href = nothing + if !isempty(fname) + locstr = "$fname:$linecol" + if !startswith(fname, "REPL[") && get(io, :color, false) + url = _file_url(fname) + if !isnothing(url) + file_href = url*"#$linecol" + end + end + else + locstr = "line $linecol" + end + _printstyled(io, "# $prefix @ ", fgcolor=:light_black) + _printstyled(io, "$locstr", fgcolor=:light_black, href=file_href) + print(io, "\n") + highlight(io, source, byte_range(diagnostic), + note=diagnostic.message, notecolor=color, + context_lines_before=1, context_lines_after=0) +end + +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, source::SourceFile) + first = true + for d in diagnostics + first || println(io) + first = false + show_diagnostic(io, d, source) + end +end + +function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text::AbstractString) + show_diagnostics(io, diagnostics, SourceFile(text)) +end + +function any_error(diagnostics::AbstractVector{Diagnostic}) + any(is_error(d) for d in diagnostics) +end diff --git a/JuliaSyntax/src/core/parse_stream.jl b/JuliaSyntax/src/core/parse_stream.jl new file mode 100644 index 0000000000000..393e23c86c075 --- /dev/null +++ b/JuliaSyntax/src/core/parse_stream.jl @@ -0,0 +1,993 @@ +#------------------------------------------------------------------------------- +# Flags hold auxiliary information about tokens/nonterminals which the Kind +# doesn't capture in a nice way. +# +# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias? +const RawFlags = UInt16 +const EMPTY_FLAGS = RawFlags(0) + +# Set for tokens or ranges which are syntax trivia after parsing +const TRIVIA_FLAG = RawFlags(1<<0) + +""" +Set for nodes that are non-terminals +""" +const NON_TERMINAL_FLAG = RawFlags(1<<7) + +function remove_flags(n::RawFlags, fs...) + RawFlags(n & ~(RawFlags((|)(fs...)))) +end + +""" + has_flags(x, test_flags) + +Return true if any of `test_flags` are set. +""" +has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0 + +#------------------------------------------------------------------------------- +""" + SyntaxHead(kind, flags) + +A `SyntaxHead` combines the [`Kind`](@ref) of a syntactic construct with a set +of flags. The kind defines the broad "type" of the syntactic construct, while +the flag bits compactly store more detailed information about the construct. +""" +struct SyntaxHead + kind::Kind + flags::RawFlags +end + +kind(head::SyntaxHead) = head.kind + +""" + flags(x) + +Return the flag bits of a syntactic construct. Prefer to query these with the +predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`, +`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`, +`is_decorated`. + +Or extract numeric portion of the flags with `numeric_flags`. +""" +flags(head::SyntaxHead) = head.flags + +function Base.summary(head::SyntaxHead) + untokenize(head, unique=false, include_flag_suff=false) +end + +#------------------------------------------------------------------------------- +# Generic interface for types `T` which have kind and flags. Either: +# 1. Define kind(::T) and flags(::T), or +# 2. Define head(::T) to return a type like `SyntaxKind` for which `kind` and +# `flags` are defined +kind(x) = kind(head(x)) +flags(x) = flags(head(x)) + +# Predicates based on flags() +has_flags(x, test_flags) = has_flags(flags(x), test_flags) +call_type_flags(x) = call_type_flags(flags(x)) + +""" + is_trivia(x) + +Return true for "syntax trivia": tokens in the tree which are either largely +invisible to the parser (eg, whitespace) or implied by the structure of the AST +(eg, reserved words). +""" +is_trivia(x) = has_flags(x, TRIVIA_FLAG) + +#------------------------------------------------------------------------------- +""" +`SyntaxToken` is a token covering a contiguous byte range in the input text. + +We record only the `next_byte` here (the index of the next byte *after* the +token) to avoid duplication of data between neighbouring tokens. This is more +useful than recording the first byte, as it allows an initial fixed sentinel +token to be used for recording the first byte of the first real token. +""" +struct SyntaxToken + head::SyntaxHead + orig_kind::Kind + preceding_whitespace::Bool + next_byte::UInt32 +end + +function Base.show(io::IO, tok::SyntaxToken) + print(io, rpad(untokenize(tok.head, unique=false), 15), " |", tok.next_byte) +end + +head(tok::SyntaxToken) = tok.head +preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace + + +#------------------------------------------------------------------------------- + +""" + RawGreenNode(head::SyntaxHead, byte_span::UInt32, orig_kind::Kind) # Terminal + RawGreenNode(head::SyntaxHead, byte_span::UInt32, nchildren::UInt32) # Non-terminal + +A "green tree" is a lossless syntax tree which overlays all the source text. +The most basic properties of a green tree are that: + +* Nodes cover a contiguous span of bytes in the text +* Sibling nodes are ordered in the same order as the text + +As implementation choices, we choose that: + +* Nodes are immutable and don't know their parents or absolute position, so can + be cached and reused +* Nodes are homogeneously typed at the language level so they can be stored + concretely, with the `head` defining the node type. Normally this would + include a "syntax kind" enumeration, but it can also include flags and record + information the parser knew about the layout of the child nodes. +* For simplicity and uniformity, leaf nodes cover a single token in the source. + This is like rust-analyzer, but different from Roslyn where leaves can + include syntax trivia. +* The parser produces a single buffer of `RawGreenNode` which encodes the tree. + There are higher level accessors, which make working with this tree easier. +""" +struct RawGreenNode + head::SyntaxHead # Kind,flags + byte_span::UInt32 # Number of bytes covered by this range + # If NON_TERMINAL_FLAG is set, this is the total number of child nodes + # Otherwise this is a terminal node (i.e. a token) and this is orig_kind + node_span_or_orig_kind::UInt32 + + # Constructor for terminal nodes (tokens) + function RawGreenNode(head::SyntaxHead, byte_span::Integer, orig_kind::Kind) + @assert (flags(head) & NON_TERMINAL_FLAG) == 0 + new(head, UInt32(byte_span), UInt32(reinterpret(UInt16, orig_kind))) + end + + # Constructor for non-terminal nodes - automatically sets NON_TERMINAL_FLAG + function RawGreenNode(head::SyntaxHead, byte_span::Integer, node_span::Integer) + h = SyntaxHead(kind(head), flags(head) | NON_TERMINAL_FLAG) + new(h, UInt32(byte_span), UInt32(node_span)) + end + + global reset_node + function reset_node(node::RawGreenNode, kind, flags) + new(_reset_node_head(node, kind, flags), + getfield(node, :byte_span), + getfield(node, :node_span_or_orig_kind)) + end +end + +function _reset_node_head(node, k, f) + if !isnothing(f) + f = RawFlags(f) + @assert (f & NON_TERMINAL_FLAG) == 0 + f |= flags(node) & NON_TERMINAL_FLAG + else + f = flags(node) + end + h = SyntaxHead(isnothing(k) ? kind(node) : k, f) +end + +Base.summary(node::RawGreenNode) = summary(node.head) +function Base.show(io::IO, node::RawGreenNode) + print(io, summary(node), " (", node.byte_span, " bytes,") + if is_terminal(node) + print(io, " orig_kind=", node.orig_kind, ")") + else + print(io, " ", node.node_span, " children)") + end +end + +function Base.getproperty(rgn::RawGreenNode, name::Symbol) + if name === :node_span + has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children + return getfield(rgn, :node_span_or_orig_kind) + elseif name === :orig_kind + has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node") + return Kind(getfield(rgn, :node_span_or_orig_kind)) + end + getfield(rgn, name) +end + +head(range::RawGreenNode) = range.head + +# Helper functions for unified output +is_terminal(node::RawGreenNode) = !has_flags(node.head, NON_TERMINAL_FLAG) +is_non_terminal(node::RawGreenNode) = has_flags(node.head, NON_TERMINAL_FLAG) + +#------------------------------------------------------------------------------- +struct ParseStreamPosition + """ + The current position in the byte stream, i.e. the byte at `byte_index` is + the first byte of the next token to be parsed. + """ + byte_index::UInt32 + """ + The total number of nodes (terminal + non-terminal) in the output so far. + """ + node_index::UInt32 +end + +const NO_POSITION = ParseStreamPosition(0, 0) + +#------------------------------------------------------------------------------- +""" + ParseStream(text::AbstractString, index::Integer=1; version=VERSION) + ParseStream(text::IO; version=VERSION) + ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) + ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) + +Construct a `ParseStream` from input which may come in various forms: +* An string (zero copy for `String` and `SubString`) +* An `IO` object (zero copy for `IOBuffer`). The `IO` object must be seekable. +* A buffer of bytes (zero copy). The caller is responsible for preserving + buffers passed as `(ptr,len)`. + +A byte `index` may be provided as the position to start parsing. + +ParseStream provides an IO interface for the parser which provides lexing of +the source text input into tokens, manages insignificant whitespace tokens on +behalf of the parser, and stores output tokens and tree nodes in a pair of +output arrays. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. +""" +mutable struct ParseStream + # `textbuf` is a buffer of UTF-8 encoded text of the source code. This is a + # natural representation as we desire random access and zero-copy parsing + # of UTF-8 text from various containers, and unsafe_wrap(Vector{UInt8}, + # ...) allows us to use a Vector here. + # + # We want `ParseStream` to be concrete so that all `parse_*` functions only + # need to be compiled once. Thus `textbuf` must not be parameterized here. + textbuf::Vector{UInt8} + # GC root for the object which owns the memory in `textbuf`. `nothing` if + # the `textbuf` owner was unknown (eg, ptr,length was passed) + text_root::Any + # Lexer, transforming the input bytes into a token stream + lexer::Tokenize.Lexer{IOBuffer} + # Lookahead buffer for already lexed tokens + lookahead::Vector{SyntaxToken} + lookahead_index::Int + # Pool of stream positions for use as working space in parsing + position_pool::Vector{Vector{ParseStreamPosition}} + output::Vector{RawGreenNode} + # Current byte position in the output (the next byte to be written) + next_byte::Int + # Parsing diagnostics (errors/warnings etc) + diagnostics::Vector{Diagnostic} + # Counter for number of peek()s we've done without making progress via a bump() + peek_count::Int + # (major,minor) version of Julia we're parsing this code for. + # May be different from VERSION! + version::Tuple{Int,Int} + + function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer, + version::VersionNumber) + io = IOBuffer(text_buf) + seek(io, next_byte-1) + lexer = Tokenize.Lexer(io) + # To avoid keeping track of the exact Julia development version where new + # features were added or comparing prerelease strings, we treat prereleases + # or dev versions as the release version using only major and minor version + # numbers. This means we're inexact for old dev versions but that seems + # like an acceptable tradeoff. + ver = (version.major, version.minor) + # Initial sentinel node (covering all ignored bytes before the first token) + sentinel = RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), next_byte-1, K"TOMBSTONE") + new(text_buf, + text_root, + lexer, + Vector{SyntaxToken}(), + 1, + Vector{Vector{ParseStreamPosition}}(), + RawGreenNode[sentinel], + next_byte, # Initialize next_byte from the parameter + Vector{Diagnostic}(), + 0, + ver) + end +end + +function ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) + ParseStream(text, text, index, version) +end + +# Buffer with unknown owner. Not exactly recommended, but good for C interop +function ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) + ParseStream(unsafe_wrap(Vector{UInt8}, ptr, len), nothing, index, version) +end + +# Buffers originating from strings +function ParseStream(text::String, index::Integer=1; version=VERSION) + ParseStream(unsafe_wrap(Vector{UInt8}, text), + text, index, version) +end +function ParseStream(text::SubString{String}, index::Integer=1; version=VERSION) + # See also IOBuffer(SubString("x")) + ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)), + text, index, version) +end +function ParseStream(text::AbstractString, index::Integer=1; version=VERSION) + ParseStream(String(text), index; version=version) +end + +# IO-based cases +# TODO: switch ParseStream to use a Memory internally on newer versions of Julia +VERSION < v"1.11.0-DEV.753" && function ParseStream(io::IOBuffer; version=VERSION) + ParseStream(io.data, io, position(io)+1, version) +end +function ParseStream(io::Base.GenericIOBuffer; version=VERSION) + textbuf = unsafe_wrap(Vector{UInt8}, pointer(io.data), length(io.data)) + ParseStream(textbuf, io, position(io)+1, version) +end +function ParseStream(io::IO; version=VERSION) + textbuf = read(io) + ParseStream(textbuf, textbuf, 1, version) +end + +function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) + println(io, "ParseStream at position $(stream.next_byte)") +end + +function show_diagnostics(io::IO, stream::ParseStream) + show_diagnostics(io, stream.diagnostics, SourceFile(stream)) +end + +# We manage a pool of stream positions as parser working space +function acquire_positions(stream) + if isempty(stream.position_pool) + return Vector{ParseStreamPosition}() + end + pop!(stream.position_pool) +end + +function release_positions(stream, positions) + empty!(positions) + push!(stream.position_pool, positions) +end + +#------------------------------------------------------------------------------- +# Return true when a terminal (token) was emitted last at stream position `pos` +function token_is_last(stream, pos) + # In the unified structure, check if the node at pos is a terminal + return pos.node_index > 0 && pos.node_index <= length(stream.output) && + is_terminal(stream.output[pos.node_index]) +end + +function lookahead_token_first_byte(stream, i) + i == 1 ? _next_byte(stream) : stream.lookahead[i-1].next_byte +end + +function lookahead_token_last_byte(stream, i) + stream.lookahead[i].next_byte - 1 +end + +#------------------------------------------------------------------------------- +# Stream input interface - the peek_* family of functions + +# Buffer several tokens ahead +function _buffer_lookahead_tokens(lexer, lookahead) + had_whitespace = false + token_count = 0 + while true + raw = Tokenize.next_token(lexer) + k = kind(raw) + was_whitespace = is_whitespace(k) + had_whitespace |= was_whitespace + f = EMPTY_FLAGS + raw.suffix && (f |= SUFFIXED_FLAG) + push!(lookahead, SyntaxToken(SyntaxHead(k, f), k, + had_whitespace, raw.endbyte + 2)) + token_count += 1 + if k == K"EndMarker" + break + end + if !was_whitespace + # Buffer tokens in batches for lookahead. Generally we want a + # moderate-size buffer to make sure we hit the fast path of peek(), + # but not too large to avoid (a) polluting the processor cache and + # (b) doing unnecessary work when not parsing the whole input. + had_whitespace = false + if token_count > 100 + break + end + end + end +end + +# Return the index of the next byte of the input +function _next_byte(stream) + stream.next_byte +end + +# Find the index of the next nontrivia token +@inline function _lookahead_index(stream::ParseStream, n::Integer, skip_newlines::Bool) + # Much of the time we'll be peeking ahead a single token and have one or + # zero whitespace tokens before the next token. The following code is an + # unrolled optimized version for that fast path. Empirically it seems we + # only hit the slow path about 5% of the time here. + i = stream.lookahead_index + @inbounds if n == 1 && i+2 <= length(stream.lookahead) + if skip_newlines + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") + return i + end + i += 1 + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment" || k == K"NewlineWs") + return i + end + else + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment") + return i + end + i += 1 + k = kind(stream.lookahead[i]) + if !(k == K"Whitespace" || k == K"Comment") + return i + end + end + end + # Fall through to the general case + return __lookahead_index(stream, n, skip_newlines) +end + +@noinline function __lookahead_index(stream, n, skip_newlines) + i = stream.lookahead_index + while true + if i+1 > length(stream.lookahead) + n_to_delete = stream.lookahead_index-1 + if n_to_delete > 0.9*length(stream.lookahead) + Base._deletebeg!(stream.lookahead, n_to_delete) + i -= n_to_delete + stream.lookahead_index = 1 + end + _buffer_lookahead_tokens(stream.lexer, stream.lookahead) + continue + end + k = @inbounds kind(stream.lookahead[i]) + if !((k == K"Whitespace" || k == K"Comment") || + (k == K"NewlineWs" && skip_newlines)) + if n == 1 + return i + end + n -= 1 + end + i += 1 + end +end + +@noinline function _parser_stuck_error(stream) + # Optimization: emit unlikely errors in a separate function + error("The parser seems stuck at byte $(stream.next_byte)") +end + +""" + peek(stream::ParseStream [, n=1]; skip_newlines=false) + +Look ahead in the stream `n` tokens, returning the token kind. Comments and +non-newline whitespace are skipped automatically. Whitespace containing a +single newline is returned as kind `K"NewlineWs"` unless `skip_newlines` is +true. +""" +function Base.peek(stream::ParseStream, n::Integer=1; + skip_newlines::Bool=false, skip_whitespace=true) + kind(peek_token(stream, n; skip_newlines=skip_newlines, skip_whitespace=skip_whitespace)) +end + +""" + peek_token(stream [, n=1]) + +Like `peek`, but return the full token information rather than just the kind. +""" +function peek_token(stream::ParseStream, n::Integer=1; + skip_newlines=false, skip_whitespace=true) + stream.peek_count += 1 + if stream.peek_count > 100_000 + _parser_stuck_error(stream) + end + i = _lookahead_index(stream, n, skip_newlines) + if !skip_whitespace + i = stream.lookahead_index + end + return @inbounds stream.lookahead[i] +end + + +struct FullToken + head::SyntaxHead + first_byte::UInt32 + last_byte::UInt32 +end + +head(t::FullToken) = t.head +byte_range(t::FullToken) = t.first_byte:t.last_byte +span(t::FullToken) = 1 + last_byte(t) - first_byte(t) + +function peek_full_token(stream::ParseStream, n::Integer=1; + skip_newlines=false, skip_whitespace=true) + stream.peek_count += 1 + if stream.peek_count > 100_000 + _parser_stuck_error(stream) + end + i = _lookahead_index(stream, n, skip_newlines) + if !skip_whitespace + i = stream.lookahead_index + end + t = stream.lookahead[i] + + FullToken(head(t), lookahead_token_first_byte(stream, i), + lookahead_token_last_byte(stream, i)) +end + +""" + peek_behind(ps; skip_trivia=true, skip_parens=true) + peek_behind(ps, pos::ParseStreamPosition) + +Return information about a span which was previously inserted into the output, +defaulting to the most previous nontrivia node when `skip_trivia` is true, or +at the provided position `pos`. + +Retroactively inspecting or modifying the parser's output can be confusing, so +using this function should be avoided where possible. +""" +function peek_behind(stream::ParseStream, pos::ParseStreamPosition) + if pos.node_index > 0 && pos.node_index <= length(stream.output) + node = stream.output[pos.node_index] + if is_terminal(node) + return (kind=kind(node), + flags=flags(node), + orig_kind=node.orig_kind, + is_leaf=true) + else + return (kind=kind(node), + flags=flags(node), + orig_kind=K"None", + is_leaf=false) + end + else + return (kind=K"None", + flags=EMPTY_FLAGS, + orig_kind=K"None", + is_leaf=true) + end +end + +""" + first_child_position(stream::ParseStream, pos::ParseStreamPosition) + +Find the first non-trivia child of this node (in the GreenTree/RedTree sense) and return +its position. +""" +function first_child_position(stream::ParseStream, pos::ParseStreamPosition) + output = stream.output + @assert pos.node_index > 0 + cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-UInt32(1)) + candidate = nothing + for child in reverse(cursor) + is_trivia(child) && continue + candidate = child + end + + candidate !== nothing && return ParseStreamPosition(candidate.byte_end+UInt32(1), candidate.green.position) + + # No children found - return the first non-trivia *token* (even if it + # is the child of a non-terminal trivia node (e.g. an error)). + byte_end = pos.byte_index + for i in pos.node_index-1:-1:(pos.node_index - treesize(cursor)) + node = output[i] + if is_terminal(node) + if !is_trivia(node) + return ParseStreamPosition(byte_end, i) + end + byte_end -= node.byte_span + end + end + + # Still none found. Return a sentinel value + return ParseStreamPosition(0, 0) +end + +""" + first_child_position(stream::ParseStream, pos::ParseStreamPosition) + + Find the last non-trivia child of this node (in the GreenTree/RedTree sense) and + return its position (i.e. the position as if that child had been the last thing parsed). +""" +function last_child_position(stream::ParseStream, pos::ParseStreamPosition) + output = stream.output + @assert pos.node_index > 0 + cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-1) + candidate = nothing + for child in reverse(cursor) + is_trivia(child) && continue + return ParseStreamPosition(child.byte_end+UInt32(1), child.green.position) + end + return ParseStreamPosition(0, 0) +end + +# Get last position in stream "of interest", skipping +# * parens nodes +# * deleted tokens (TOMBSTONE) +# * whitespace (if skip_trivia=true) +function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true, + skip_parens::Bool=true) + # Work backwards through the output + node_idx = length(stream.output) + byte_idx = stream.next_byte + + # Skip parens nodes if requested + if skip_parens + while node_idx > 0 + node = stream.output[node_idx] + if is_non_terminal(node) && kind(node) == K"parens" + node_idx -= 1 + else + break + end + end + end + + # Skip trivia if requested + while node_idx > 0 + node = stream.output[node_idx] + if kind(node) == K"TOMBSTONE" || (skip_trivia && is_trivia(node)) + node_idx -= 1 + byte_idx -= node.byte_span + else + break + end + end + + return ParseStreamPosition(byte_idx, node_idx) +end + +function peek_behind(stream::ParseStream; kws...) + peek_behind(stream, peek_behind_pos(stream; kws...)) +end + +#------------------------------------------------------------------------------- +# Stream output interface - the `bump_*` and `emit_*` family of functions +# +# Though note bump() really does both input and output + +# Bump up until the `n`th token +# flags and remap_kind are applied to any non-trivia tokens +function _bump_until_n(stream::ParseStream, n::Integer, new_flags, remap_kind=K"None") + if n < stream.lookahead_index + return + end + for i in stream.lookahead_index:n + tok = stream.lookahead[i] + k = kind(tok) + if k == K"EndMarker" + break + end + f = new_flags | flags(tok) + is_trivia = is_whitespace(k) + is_trivia && (f |= TRIVIA_FLAG) + outk = (is_trivia || remap_kind == K"None") ? k : remap_kind + h = SyntaxHead(outk, f) + + # Calculate byte span for this token + if i == stream.lookahead_index + # First token in this batch - calculate span from current stream position + prev_byte = stream.next_byte + else + # Subsequent tokens - use previous token's next_byte + prev_byte = stream.lookahead[i-1].next_byte + end + byte_span = Int(tok.next_byte) - Int(prev_byte) + + # Create terminal RawGreenNode + node = RawGreenNode(h, byte_span, kind(tok)) + push!(stream.output, node) + + # Update next_byte + stream.next_byte += byte_span + end + stream.lookahead_index = n + 1 + # Defuse the time bomb + stream.peek_count = 0 +end + +""" + bump(stream [, flags=EMPTY_FLAGS]; + skip_newlines=false, error, remap_kind) + +Copy the current token from the input stream to the output. Adds the given +flags to the output token (normally this would be the default `EMPTY_FLAGS` or +`TRIVIA_FLAG`). + +Keyword arguments: +* `skip_newlines` - if `true`, newlines are treated as whitespace. +* `error` - if set, emit an error for this token +* `remap_kind` - the kind of the token in the output token stream if it needs + to be modified. +""" +function bump(stream::ParseStream, flags=EMPTY_FLAGS; skip_newlines=false, + error=nothing, remap_kind::Kind=K"None") + emark = position(stream) + _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines), flags, remap_kind) + if !isnothing(error) + emit(stream, emark, K"error", flags, error=error) + end + # Return last token location in output if needed for reset_node! + return position(stream) +end + +""" +Bump comments and whitespace tokens preceding the next token + +**Skips newlines** by default. Set skip_newlines=false to avoid that. +""" +function bump_trivia(stream::ParseStream, flags=EMPTY_FLAGS; + skip_newlines=true, error=nothing) + emark = position(stream) + _bump_until_n(stream, _lookahead_index(stream, 1, skip_newlines) - 1, EMPTY_FLAGS) + if !isnothing(error) + emit(stream, emark, K"error", flags, error=error) + end + return position(stream) +end + +""" +Bump an invisible zero-width token into the output + +This is useful when surrounding syntax implies the presence of a token. For +example, `2x` means `2*x` via the juxtaposition rules. +""" +function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS; + error=nothing) + b = stream.next_byte + h = SyntaxHead(kind, flags) + # Zero-width token + node = RawGreenNode(h, 0, kind) + push!(stream.output, node) + # No need to update next_byte for zero-width token + if !isnothing(error) + emit_diagnostic(stream, b:b-1, error=error) + end + stream.peek_count = 0 + return position(stream) +end + +""" +Bump several tokens, gluing them together into a single token + +This is for use in special circumstances where the parser needs to resolve +lexing ambiguities. There's no special whitespace handling — bump any +whitespace if necessary with bump_trivia. +""" +function bump_glue(stream::ParseStream, kind, flags) + i = stream.lookahead_index + h = SyntaxHead(kind, flags) + # Calculate byte span for glued tokens + start_byte = stream.next_byte + end_byte = stream.lookahead[i+1].next_byte + byte_span = end_byte - start_byte + + node = RawGreenNode(h, byte_span, kind) + push!(stream.output, node) + stream.next_byte += byte_span + stream.lookahead_index += 2 + stream.peek_count = 0 + return position(stream) +end + +""" +Reset kind or flags of an existing node in the output stream + +This is a hack, but in some limited occasions the trailing syntax may change +the kind or flags of a token in a way which would require unbounded lookahead +in a recursive descent parser. Modifying the output with reset_node! is useful +in those cases. +""" +function reset_node!(stream::ParseStream, pos::ParseStreamPosition; + kind=nothing, flags=nothing) + node = stream.output[pos.node_index] + stream.output[pos.node_index] = reset_node(node, kind, flags) +end + +""" +Move `numbytes` from the range at output position `pos+1` to the output +position `pos`. If the donor range becomes empty, mark it dead with +K"TOMBSTONE" and return `true`, otherwise return `false`. + +Hack alert! This is used only for managing the complicated rules related to +dedenting triple quoted strings. +""" +function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes) + i = pos.node_index + t1 = stream.output[i] + t2 = stream.output[i+1] + @assert is_terminal(t1) && is_terminal(t2) + + stream.output[i] = RawGreenNode(t1.head, t1.byte_span + numbytes, + t1.orig_kind) + + t2_is_empty = t2.byte_span == numbytes + head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head + stream.output[i+1] = RawGreenNode(head2, t2.byte_span - numbytes, + t2.orig_kind) + return t2_is_empty +end + +# Get position of last item emitted into the output stream +function Base.position(stream::ParseStream) + byte_idx = stream.next_byte + node_idx = length(stream.output) + + ParseStreamPosition(byte_idx, node_idx) +end + +""" + emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing) + +Emit a new non-terminal node into the output which covers source bytes from `mark` to +the end of the most recent token which was `bump()`'ed. The starting `mark` +should be a previous return value of `position()`. The emitted node will have +its `node_span` set to the number of nodes emitted since `mark`. +""" +function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind, + flags::RawFlags = EMPTY_FLAGS; error=nothing) + # Calculate byte span from mark position to current + mark_byte = mark.byte_index + current_byte = stream.next_byte + byte_span = current_byte - mark_byte + + # Calculate node span (number of children, exclusive of the node itself) + node_span = length(stream.output) - mark.node_index + + # Create non-terminal RawGreenNode + node = RawGreenNode(SyntaxHead(kind, flags), byte_span, node_span) + + if !isnothing(error) + emit_diagnostic(stream, mark_byte:current_byte-1, error=error) + end + + push!(stream.output, node) + # Note: emit() for non-terminals doesn't advance next_byte + # because it's a range over already-emitted tokens + return position(stream) +end + +function emit_diagnostic(stream::ParseStream, byterange::AbstractUnitRange; kws...) + emit_diagnostic(stream.diagnostics, byterange; kws...) + return nothing +end + +""" +Emit a diagnostic at the position of the next token + +If `whitespace` is true, the diagnostic is positioned on the whitespace before +the next token. Otherwise it's positioned at the next token as returned by `peek()`. +""" +function emit_diagnostic(stream::ParseStream; whitespace=false, kws...) + i = _lookahead_index(stream, 1, true) + begin_tok_i = i + end_tok_i = i + if whitespace + # It's the whitespace which is the error. Find the range of the current + # whitespace. + begin_tok_i = stream.lookahead_index + end_tok_i = is_whitespace(stream.lookahead[i]) ? + i : max(stream.lookahead_index, i - 1) + end + fbyte = lookahead_token_first_byte(stream, begin_tok_i) + lbyte = lookahead_token_last_byte(stream, end_tok_i) + emit_diagnostic(stream, fbyte:lbyte; kws...) + return nothing +end + +function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...) + # Find the byte range from mark to current position + start_byte = mark.byte_index + end_byte = stream.next_byte - 1 + + if trim_whitespace + # TODO: Implement whitespace trimming for unified output + # This would require scanning the output array + end + + emit_diagnostic(stream, start_byte:end_byte; kws...) +end + +function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition, + end_mark::ParseStreamPosition; kws...) + emit_diagnostic(stream, mark.byte_index:end_mark.byte_index-1; kws...) +end + +function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, + byterange::AbstractUnitRange; kws...) + push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) +end + +# Tree construction from the list of text ranges held by ParseStream + +# API for extracting results from ParseStream + +function sourcetext(stream::ParseStream; steal_textbuf=false) + Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext) + root = stream.text_root + # The following kinda works but makes the return type of this method type + # unstable. (Also codeunit(root) == UInt8 doesn't imply UTF-8 encoding?) + # if root isa AbstractString && codeunit(root) == UInt8 + # return root + str = if root isa String || root isa SubString + root + elseif steal_textbuf + String(stream.textbuf) + else + # Safe default for other cases is to copy the buffer. Technically this + # could possibly be avoided in some situations, but might have side + # effects such as mutating stream.text_root or stealing the storage of + # stream.textbuf + String(copy(stream.textbuf)) + end + SubString(str, first_byte(stream), thisind(str, last_byte(stream))) +end + +function SourceFile(stream::ParseStream; kws...) + fbyte = first_byte(stream) + lbyte = last_byte(stream) + if !isempty(stream.diagnostics) + lbyte = max(lbyte, maximum(last_byte(d) for d in stream.diagnostics)) + end + # See also sourcetext() + srcroot = stream.text_root + str = if srcroot isa String + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + elseif srcroot isa SubString{String} + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + else + SubString(String(stream.textbuf[fbyte:lbyte])) + end + return SourceFile(str; first_index=first_byte(stream), kws...) +end + +""" + unsafe_textbuf(stream) + +Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`. + +!!! warning + The caller must hold a reference to `stream` while using textbuf +""" +unsafe_textbuf(stream) = stream.textbuf + +first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel +last_byte(stream::ParseStream) = stream.next_byte - 1 +any_error(stream::ParseStream) = any_error(stream.diagnostics) + +# Return last non-whitespace byte which was parsed +function last_non_whitespace_byte(stream::ParseStream) + byte_pos = stream.next_byte + for i = length(stream.output):-1:1 + node = stream.output[i] + if is_terminal(node) + if kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment" || kind(node) == K"error" && node.byte_span == 0 + byte_pos -= node.byte_span + else + return byte_pos - 1 + end + end + end + return first_byte(stream) - 1 +end + +function Base.empty!(stream::ParseStream) + # Keep only the sentinel + if !isempty(stream.output) && kind(stream.output[1]) == K"TOMBSTONE" + resize!(stream.output, 1) + else + empty!(stream.output) + # Restore sentinel node + push!(stream.output, RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), 0, K"TOMBSTONE")) + end + # Reset next_byte to initial position + stream.next_byte = 1 +end diff --git a/JuliaSyntax/src/core/source_files.jl b/JuliaSyntax/src/core/source_files.jl new file mode 100644 index 0000000000000..87019aa81e594 --- /dev/null +++ b/JuliaSyntax/src/core/source_files.jl @@ -0,0 +1,392 @@ +#------------------------------------------------------------------------------- +# Generic functions for source text, source location computation and formatting +# functions + +""" + sourcefile(x) + +Get the source file object (usually `SourceFile`) for a given syntax object +`x`. The source file along with a byte range may be used to compute +`source_line()`, `source_location()`, `filename()`, etc. +""" +function sourcefile +end + +""" + byte_range(x) + +Return the range of bytes which `x` covers in the source text. See also +[`char_range`](@ref). +""" +function byte_range +end + +""" + char_range(x) + +Compute the range in *character indices* over the source text for syntax object +`x`. If you want to index the source string you need this, rather than +[`byte_range`](@ref). +""" +function char_range(x) + br = byte_range(x) + first(br):thisind(sourcefile(x), last(br)) +end + +""" + first_byte(x) + +Return the first byte of `x` in the source text. +""" +first_byte(x) = first(byte_range(x)) + +""" + last_byte(x) + +Return the last byte of `x` in the source text. +""" +last_byte(x) = last(byte_range(x)) + +""" + filename(x) + +Get file name associated with `source`, or an empty string if one didn't exist. + +For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by +default. +""" +function filename(x) + source = sourcefile(x) + isnothing(source) ? "" : filename(source) +end + +""" + source_line(x) + source_line(source::SourceFile, byte_index::Integer) + +Get the line number of the first line on which object `x` appears. In the +second form, get the line number at the given `byte_index` within `source`. +""" +source_line(x) = source_line(sourcefile(x), first_byte(x)) + +""" + source_location(x) + source_location(source::SourceFile, byte_index::Integer) + + source_location(LineNumberNode, x) + source_location(LineNumberNode, source, byte_index) + +Get `(line,column)` of the first byte where object `x` appears in the source. +The second form allows one to be more precise with the `byte_index`, given the +source file. + +Providing `LineNumberNode` as the first argument will return the line and file +name in a line number node object. +""" +source_location(x) = source_location(sourcefile(x), first_byte(x)) + +""" + sourcetext(x) + +Get the full source text syntax object `x` +""" +function sourcetext(x) + view(sourcefile(x), byte_range(x)) +end + +""" + highlight(io, x; color, note, notecolor, + context_lines_before, context_lines_inner, context_lines_after) + + highlight(io::IO, source::SourceFile, range::UnitRange; kws...) + +Print the lines of source code surrounding `x` which is highlighted with +background `color` and underlined with markers in the text. A `note` in +`notecolor` may be provided as annotation. By default, `x` should be an object +with `sourcefile(x)` and `byte_range(x)` implemented. + +The context arguments `context_lines_before`, etc, refer to the number of +lines of code which will be printed as context before and after, with `inner` +referring to context lines inside a multiline region. + +The second form shares the keywords of the first but allows an explicit source +file and byte range to be supplied. +""" +function highlight(io::IO, x; kws...) + highlight(io, sourcefile(x), byte_range(x); kws...) +end + + +#------------------------------------------------------------------------------- +""" + SourceFile(code [; filename=nothing, first_line=1, first_index=1]) + +UTF-8 source text with associated file name and line number, storing the +character indices of the start of each line. `first_line` and `first_index` +can be used to specify the line number and index of the first character of +`code` within a larger piece of source text. + +`SourceFile` may be indexed via `getindex` or `view` to get a string. Line +information for a byte offset can be looked up via the `source_line`, +`source_location` and `source_line_range` functions. +""" +struct SourceFile + # TODO: Rename SourceFile -> SourceText / SourceChunk / SourceIndex / SourceLineIndex ? + # See https://github.com/JuliaLang/JuliaSyntax.jl/issues/190 + code::SubString{String} + # Offset of `code` within a larger chunk of source text + byte_offset::Int + filename::Union{Nothing,String} + # first_column::Int ?? + first_line::Int + # String index of start of every line + line_starts::Vector{Int} +end + +Base.hash(s::SourceFile, h::UInt) = + hash(s.code, hash(s.byte_offset, hash(s.filename, hash(s.first_line, hash(s.line_starts, h))))) +function Base.:(==)(a::SourceFile, b::SourceFile) + a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename && + a.first_line == b.first_line && a.line_starts == b.line_starts +end + +function SourceFile(code::AbstractString; filename=nothing, first_line=1, + first_index=1) + line_starts = Int[1] + for i in eachindex(code) + # The line is considered to start after the `\n` + code[i] == '\n' && push!(line_starts, i+1) + end + SourceFile(code, first_index-1, filename, first_line, line_starts) +end + +function SourceFile(; filename, kwargs...) + SourceFile(read(filename, String); filename=filename, kwargs...) +end + +# Get line number of the given byte within the code +function _source_line_index(source::SourceFile, byte_index) + searchsortedlast(source.line_starts, byte_index - source.byte_offset) +end +_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1 + +function source_location(::Type{LineNumberNode}, x) + source_location(LineNumberNode, sourcefile(x), first_byte(x)) +end + +source_line(source::SourceFile, byte_index::Integer) = + _source_line(source, _source_line_index(source, byte_index)) + +function filename(source::SourceFile) + f = source.filename + !isnothing(f) ? f : "" +end + +function source_location(source::SourceFile, byte_index::Integer) + lineidx = _source_line_index(source, byte_index) + i = source.line_starts[lineidx] + column = 1 + while i < byte_index - source.byte_offset + i = nextind(source.code, i) + column += 1 + end + _source_line(source, lineidx), column +end + +""" +Get byte range of the source line at byte_index, buffered by +`context_lines_before` and `context_lines_after` before and after. +""" +function source_line_range(source::SourceFile, byte_index::Integer; + context_lines_before=0, context_lines_after=0) + lineidx = _source_line_index(source, byte_index) + fbyte = source.line_starts[max(lineidx-context_lines_before, 1)] + lline = lineidx + context_lines_after + lbyte = lline >= lastindex(source.line_starts) ? + ncodeunits(source.code) : source.line_starts[lline + 1] - 1 + + return (fbyte + source.byte_offset, + lbyte + source.byte_offset) +end + +function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer) + fn = filename(source) + LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn)) +end + +function Base.show(io::IO, ::MIME"text/plain", source::SourceFile) + fn = filename(source) + header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##" + print(io, header, "\n") + heightlim = displaysize(io)[1] ÷ 2 + if !get(io, :limit, false) || length(source.line_starts) <= heightlim + print(io, source.code) + else + r1 = source_line_range(source, 1, context_lines_after=heightlim-3) + print(io, view(source, r1[1]:r1[2])) + println(io, "⋮") + end +end + +function Base.getindex(source::SourceFile, rng::AbstractUnitRange) + i = first(rng) - source.byte_offset + # Convert byte range into unicode String character range. + # Assumes valid unicode! (SubString doesn't give us a reliable way to opt + # out of the valid unicode check. The SubString{String} inner constructor + # has some @boundscheck, but using @inbounds depends on inlining choices.) + j = prevind(source.code, last(rng) + 1 - source.byte_offset) + source.code[i:j] +end + +# TODO: Change view() here to `sourcetext` ? +function Base.view(source::SourceFile, rng::AbstractUnitRange) + i = first(rng) - source.byte_offset + j = prevind(source.code, last(rng) + 1 - source.byte_offset) + SubString(source.code, i, j) +end + +function Base.getindex(source::SourceFile, i::Integer) + source.code[i - source.byte_offset] +end + +function Base.thisind(source::SourceFile, i::Integer) + thisind(source.code, i - source.byte_offset) + source.byte_offset +end + +function Base.nextind(source::SourceFile, i::Integer) + nextind(source.code, i - source.byte_offset) + source.byte_offset +end + +Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset +Base.lastindex(source::SourceFile) = lastindex(source.code) + source.byte_offset + +""" + sourcetext(source::SourceFile) + +Get the full source text of a `SourceFile` as a string. +""" +function sourcetext(source::SourceFile) + return source.code +end + + +#------------------------------------------------------------------------------- +# Tools for highlighting source ranges +function _print_marker_line(io, prefix_str, str, underline, singleline, color, + note, notecolor) + # Whitespace equivalent in length to `prefix_str` + # Getting exactly the same width of whitespace as `str` is tricky. + # Especially for mixtures of tabs and spaces. + # tabs are zero width according to textwidth + indent = join(isspace(c) ? c : repeat(' ', textwidth(c)) for c in prefix_str) + + # Assume tabs are 4 wide rather than 0. (fixme: implement tab alignment?) + w = textwidth(str) + 4*count(c->c=='\t', str) + if !isempty(indent) + indent = "#" * (first(indent) == '\t' ? indent : indent[nextind(indent,1):end]) + end + + midchar = '─' + startstr, endstr, singlestart = underline ? ("└","┘","╙") : ("┌","┐","╓") + + markline = + if singleline + w == 0 ? string(indent, startstr) : + w == 1 ? string(indent, singlestart) : + string(indent, startstr, repeat('─', w-2), endstr) + else + if underline && isempty(indent) && w > 1 + string('#', repeat('─', w-2), endstr) + else + s,e = underline ? ("", endstr) : (startstr, "") + w == 0 ? string(indent, s, e) : + string(indent, s, repeat('─', w-1), e) + end + end + if note isa AbstractString + markline *= " ── " + end + _printstyled(io, markline; fgcolor=color) + if !isnothing(note) + if note isa AbstractString + _printstyled(io, note, fgcolor=notecolor) + else + note(io, indent, w) + end + end +end + +function highlight(io::IO, source::SourceFile, range::UnitRange; + color=(120,70,70), context_lines_before=2, + context_lines_inner=1, context_lines_after=2, + note=nothing, notecolor=nothing) + p = first(range) + q = last(range) + + x,y = source_line_range(source, p; + context_lines_before=context_lines_before, + context_lines_after=context_lines_inner) + a,b = source_line_range(source, p) + q1 = max(q, p) # Ignore q for empty ranges + c,d = source_line_range(source, q1) + z,w = source_line_range(source, q1; + context_lines_before=context_lines_inner, + context_lines_after=context_lines_after) + + p_line = source_line(source, p) + q_line = source_line(source, q) + + marker_line_color = :light_black + + if p_line >= q_line + # x----------------- + # a---p-------q----b + # # └───────┘ ── note + # -----------------w + + hitext = source[p:q] + print(io, source[x:p-1]) + _printstyled(io, hitext; bgcolor=color) + #print(io, source[q+1:d]) + print(io, source[nextind(source,q):d]) + if d >= firstindex(source) && source[thisind(source, d)] != '\n' + print(io, "\n") + end + _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor) + else + # x -------------- + # # ┌───── + # a---p----b + # --------------y + # --------------- + # z-------------- + # c --------q----d + # #───────────┘ ── note + # -----------------w + + prefix1 = source[a:p-1] + print(io, source[x:a-1]) + _print_marker_line(io, prefix1, source[p:b], false, false, marker_line_color, nothing, notecolor) + print(io, '\n') + print(io, prefix1) + if q_line - p_line - 1 <= 2*context_lines_inner + # The diagnostic range is compact and we show the whole thing + _printstyled(io, source[p:q]; bgcolor=color) + else + # Or large and we truncate the code to show only the region around the + # start and end of the error. + _printstyled(io, source[p:y]; bgcolor=color) + print(io, "⋮\n") + _printstyled(io, source[z:q]; bgcolor=color) + end + print(io, source[nextind(source, q):d]) + source[thisind(source, d)] == '\n' || print(io, "\n") + qline = source[c:q] + _print_marker_line(io, "", qline, true, false, marker_line_color, note, notecolor) + end + if context_lines_after > 0 && d+1 <= lastindex(source) + print(io, '\n') + w1 = source[thisind(source, w)] == '\n' ? w - 1 : w + print(io, source[d+1:w1]) + end +end diff --git a/JuliaSyntax/src/core/tree_cursors.jl b/JuliaSyntax/src/core/tree_cursors.jl new file mode 100644 index 0000000000000..75a5c0e44008f --- /dev/null +++ b/JuliaSyntax/src/core/tree_cursors.jl @@ -0,0 +1,175 @@ +using Base.Iterators: Reverse + +""" + prev_sibling_assumed(cursor::GreenTreeCursor)::Union{Nothing, GreenTreeCursor} + prev_sibling_assumed(cursor::RedTreeCursor)::Union{Nothing, RedTreeCursor} + +Gives the previous sibling of the current node, but makes the assumption that +there is one or that we are at the top level. +Without knowing the parent, we cannot otherwise know which the last sibling is, +unless we are at the top level in which case `nothing` is returned. +""" +function prev_sibling_assumed end + +""" + GreenTreeCursor + +Represents a cursors into a ParseStream output buffer that makes it easy to +work with the green tree representation. +""" +struct GreenTreeCursor + parser_output::Vector{RawGreenNode} + position::UInt32 +end +GreenTreeCursor(stream::ParseStream) = GreenTreeCursor(stream.output, length(stream.output)) +this(node::GreenTreeCursor) = node.parser_output[node.position] + +const SENTINEL_INDEX = UInt32(1) +function prev_sibling_assumed(cursor::GreenTreeCursor) + next_idx = cursor.position - this(cursor).node_span - UInt32(1) + next_idx == SENTINEL_INDEX && return nothing + GreenTreeCursor(cursor.parser_output, next_idx) +end + +function Base.in(child::GreenTreeCursor, parent::GreenTreeCursor) + @assert child.parser_output === parent.parser_output + child.position < parent.position || return false + return child.position >= parent.position - this(parent).node_span +end + +# Debug printing +function Base.show(io::IO, node::GreenTreeCursor) + print(io, Base.summary(this(node)), " @", node.position) +end + +# Reverse iterator interface +Base.reverse(node::GreenTreeCursor) = Base.Iterators.Reverse(node) +Base.IteratorSize(::Type{Reverse{GreenTreeCursor}}) = Base.SizeUnknown() +@inline function Base.iterate(node::Reverse{GreenTreeCursor}, + (next_idx, final)::NTuple{2, UInt32} = + (node.itr.position-UInt32(1), node.itr.position - this(node.itr).node_span - UInt32(1)))::Union{Nothing, Tuple{GreenTreeCursor, NTuple{2, UInt32}}} + node = node.itr + while true + next_idx == final && return nothing + next_node = GreenTreeCursor(node.parser_output, next_idx) + nrgn = this(next_node) + if getfield(nrgn, :head).kind == K"TOMBSTONE" + # TOMBSTONED nodes are counted as part of the size of the tree, but + # do not contribute either byte ranges or children. + next_idx -= UInt32(1) + continue + end + # Inlined prev_sibling_assumed + new_next_idx = next_idx - nrgn.node_span - UInt32(1) + return (next_node, (new_next_idx, final)) + end +end + +# Accessors / predicates +is_leaf(node::GreenTreeCursor) = !is_non_terminal(this(node)) +head(node::GreenTreeCursor) = this(node).head +treesize(node::GreenTreeCursor) = this(node).node_span +is_non_terminal(node::GreenTreeCursor) = is_non_terminal(this(node)) + +""" + span(node) + +Get the number of bytes this node covers in the source text. +""" +span(node::GreenTreeCursor) = this(node).byte_span + +""" + RedTreeCursor + +Wraps a `GreenTreeCursor` to keep track of the absolute position of the node +in the original source text. +""" +struct RedTreeCursor + green::GreenTreeCursor + # The last byte that is still part of the node + byte_end::UInt32 +end +RedTreeCursor(stream::ParseStream) = RedTreeCursor( + GreenTreeCursor(stream), stream.next_byte - UInt32(1)) + +function prev_sibling_assumed(cursor::RedTreeCursor) + prevgreen = prev_sibling_assumed(cursor.green) + if prevgreen === nothing + return nothing + end + return RedTreeCursor(prevgreen, cursor.byte_end - span(cursor)) +end + + +Base.reverse(node::RedTreeCursor) = Base.Iterators.Reverse(node) +Base.IteratorSize(::Type{Reverse{RedTreeCursor}}) = Base.SizeUnknown() +@inline function Base.iterate(node::Reverse{RedTreeCursor})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}} + r = iterate(Reverse(node.itr.green)) + return _iterate_red_cursor(r, node.itr.byte_end) +end + +@inline function Base.iterate(node::Reverse{RedTreeCursor}, state::NTuple{3, UInt32})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}} + r = iterate(Reverse(node.itr.green), Base.tail(state)) + return _iterate_red_cursor(r, first(state)) +end + +@inline function _iterate_red_cursor(r, byte_end) + r === nothing && return nothing + next_node, next_idx = r + return RedTreeCursor(next_node, byte_end), + (byte_end - span(next_node), next_idx...) +end + +is_leaf(node::RedTreeCursor) = is_leaf(node.green) +head(node::RedTreeCursor) = head(node.green) +span(node::RedTreeCursor) = span(node.green) +byte_range(node::RedTreeCursor) = (node.byte_end - span(node.green) + UInt32(1)):node.byte_end +treesize(node::RedTreeCursor) = treesize(node.green) +is_non_terminal(node::RedTreeCursor) = is_non_terminal(node.green) + +function Base.show(io::IO, node::RedTreeCursor) + print(io, node.green, " [", byte_range(node), "]") +end + +has_toplevel_siblings(cursor::GreenTreeCursor) = + treesize(cursor)+1 != length(cursor.parser_output)-1 +has_toplevel_siblings(cursor::RedTreeCursor) = + has_toplevel_siblings(cursor.green) +struct TopLevelSiblingIterator{C} + cursor::C +end + +function reverse_toplevel_siblings(cursor::RedTreeCursor) + @assert cursor.green.position == length(cursor.green.parser_output) + TopLevelSiblingIterator(cursor) +end + +function reverse_toplevel_siblings(cursor::GreenTreeCursor) + @assert cursor.position == length(cursor.parser_output) + TopLevelSiblingIterator(cursor) +end + +function Base.iterate(tsi::TopLevelSiblingIterator) + return (tsi.cursor, tsi.cursor) +end +function Base.iterate(cursor::TopLevelSiblingIterator{C}, last::C) where {C} + this = prev_sibling_assumed(last) + this === nothing && return nothing + return (this, this) +end + +# HACK: Force inlining of `filter` for our cursors to avoid significant perf +# degradation. +@inline function Base.iterate(f::Iterators.Filter{<:Any, Iterators.Reverse{T}}, state...) where {T<:Union{RedTreeCursor, GreenTreeCursor}} + y = iterate(f.itr, state...) + while y !== nothing + if f.flt(y[1]) + return y + end + y = iterate(f.itr, y[2]) + end + nothing +end + +Base.in(child::GreenTreeCursor, parent::RedTreeCursor) = + in(child, parent.green) diff --git a/JuliaSyntax/src/integration/expr.jl b/JuliaSyntax/src/integration/expr.jl new file mode 100644 index 0000000000000..53de5f55f0ee4 --- /dev/null +++ b/JuliaSyntax/src/integration/expr.jl @@ -0,0 +1,674 @@ +#------------------------------------------------------------------------------- +# Conversion to Base.Expr + +""" + @isexpr(ex, head) + @isexpr(ex, head, nargs) + +Type inference friendly replacement for `Meta.isexpr`. + +When using the pattern +```julia +if @isexpr(ex, headsym) + body +end +``` +Julia's type inference knows `ex isa Expr` inside `body`. But `Meta.isexpr` +hides this information from the compiler, for whatever reason. +""" +macro isexpr(ex, head) + ex isa Symbol || error("First argument to `@isexpr` must be a variable name") + :($(esc(ex)) isa Expr && $(esc(ex)).head == $(esc(head))) +end + +macro isexpr(ex, head, nargs) + ex isa Symbol || error("First argument to `@isexpr` must be a variable name") + :($(esc(ex)) isa Expr && + $(esc(ex)).head == $(esc(head)) && + length($(esc(ex)).args) == $(esc(nargs))) +end + +function _reorder_parameters!(args::Vector{Any}, params_pos::Int) + p = 0 + for i = length(args):-1:1 + ai = args[i] + if !@isexpr(ai, :parameters) + break + end + p = i + end + if p == 0 + return + end + # nest frankentuples parameters sections + for i = length(args)-1:-1:p + pushfirst!((args[i]::Expr).args, pop!(args)) + end + # Move parameters to args[params_pos] + insert!(args, params_pos, pop!(args)) +end + +function _strip_parens(ex::Expr) + while true + if @isexpr(ex, :parens) + if length(ex.args) == 1 + ex = ex.args[1] + else + # Only for error cases + return Expr(:block, ex.args...) + end + else + return ex + end + end +end + + +reverse_nontrivia_children(cursor::RedTreeCursor) = Iterators.filter(should_include_node, Iterators.reverse(cursor)) +reverse_nontrivia_children(cursor) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor))) + +# Julia string literals in a `K"string"` node may be split into several chunks +# interspersed with trivia in two situations: +# 1. Triple quoted string indentation is trivia +# 2. An \ before newline removes the newline and any following indentation +# +# This function concatenating adjacent string chunks together as done in the +# reference parser. +function _string_to_Expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) + ret = Expr(:string) + args2 = Any[] + i = 1 + it = reverse_nontrivia_children(cursor) + r = iterate(it) + while r !== nothing + (child, state) = r + ex = node_to_expr(child, source, txtbuf, txtbuf_offset) + if isa(ex, String) + # This branch combines consequent string chunks together. + # It's unrolled once to avoid unnecessary allocations. + r = iterate(it, state) + if r === nothing + pushfirst!(ret.args, ex) + continue + end + (child, state) = r + ex2 = node_to_expr(child, source, txtbuf, txtbuf_offset) + if !isa(ex2, String) + pushfirst!(ret.args, ex) + ex = ex2 + # Fall through to process `ex` (!::String) + else + strings = String[ex2, ex] # Note: reversed order since we're iterating backwards + r = iterate(it, state) + while r !== nothing + (child, state) = r + ex = node_to_expr(child, source, txtbuf, txtbuf_offset) + isa(ex, String) || break + pushfirst!(strings, ex) + r = iterate(it, state) + end + buf = IOBuffer() + for s in strings + write(buf, s) + end + pushfirst!(ret.args, String(take!(buf))) + r === nothing && break + # Fall through to process `ex` (!::String) + end + end + # ex not a string + if @isexpr(ex, :parens, 1) + ex = _strip_parens(ex) + if ex isa String + # Wrap interpolated literal strings in (string) so we can + # distinguish them from the surrounding text (issue #38501) + # Ie, "$("str")" vs "str" + # https://github.com/JuliaLang/julia/pull/38692 + ex = Expr(:string, ex) + end + end + @assert ex !== nothing + pushfirst!(ret.args, ex) + r = iterate(it, state) + end + + if length(ret.args) == 1 && ret.args[1] isa String + # If there's a single string remaining after joining, we unwrap + # to give a string literal. + # """\n a\n b""" ==> "a\nb" + return only(ret.args) + else + # This only happens when the kind is K"string" or when an error has occurred. + return ret + end +end + +# Shared fixups for Expr children in cases where the type of the parent node +# affects the child layout. +function fixup_Expr_child(head::SyntaxHead, @nospecialize(arg), first::Bool) + isa(arg, Expr) || return arg + k = kind(head) + eq_to_kw_in_call = ((k == K"call" || k == K"dotcall") && + is_prefix_call(head)) || k == K"ref" + eq_to_kw_in_params = k != K"vect" && k != K"curly" && + k != K"braces" && k != K"ref" + coalesce_dot = k in KSet"call dotcall curly" || + (k == K"quote" && has_flags(head, COLON_QUOTE)) + was_parens = @isexpr(arg, :parens) + arg = _strip_parens(arg) + if @isexpr(arg, :(=)) && eq_to_kw_in_call && !first + arg = Expr(:kw, arg.args...) + elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple + # This undoes the "Hack" below" + h, a = arg.args[1]::Tuple{SyntaxHead,Any} + arg = ((!was_parens && coalesce_dot && first) || + is_syntactic_operator(h)) ? + Symbol(".", a) : Expr(:., a) + elseif @isexpr(arg, :parameters) && eq_to_kw_in_params + pargs = arg.args + for j = 1:length(pargs) + pj = pargs[j] + if @isexpr(pj, :(=)) + pargs[j] = Expr(:kw, pj.args...) + end + end + end + return arg +end + +# Remove the `do` block from the final position in a function/macro call arg list +function _extract_do_lambda!(args::Vector{Any}) + if length(args) > 1 && Meta.isexpr(args[end], :do_lambda) + do_ex = pop!(args)::Expr + return Expr(:->, do_ex.args...) + else + return nothing + end +end + +function _append_iterspec!(args::Vector{Any}, @nospecialize(ex)) + if @isexpr(ex, :iteration) + for iter in ex.args::Vector{Any} + push!(args, Expr(:(=), iter.args...)) + end + else + push!(args, ex) + end + return args +end + +function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) + args = retexpr.args + firstchildhead = head(cursor) + firstchildrange::UnitRange{UInt32} = byte_range(cursor) + itr = reverse_nontrivia_children(cursor) + r = iterate(itr) + while r !== nothing + (child, state) = r + r = iterate(itr, state) + expr = node_to_expr(child, source, txtbuf, txtbuf_offset) + @assert expr !== nothing + firstchildhead = head(child) + firstchildrange = byte_range(child) + pushfirst!(args, fixup_Expr_child(head(cursor), expr, r === nothing)) + end + return (firstchildhead, firstchildrange) +end + +_expr_leaf_val(node::SyntaxNode, _...) = node.val +_expr_leaf_val(cursor::RedTreeCursor, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32) = + parse_julia_literal(txtbuf, head(cursor), byte_range(cursor) .+ txtbuf_offset) +# Extended in JuliaLowering to support `node_to_expr(::SyntaxTree, ...)` + +# Convert `cursor` (SyntaxNode or RedTreeCursor) to an Expr +# `source` is a SourceFile, or if node was an Expr originally, a LineNumberNode +function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0)) + if !should_include_node(cursor) + return nothing + end + + nodehead = head(cursor) + k = kind(cursor) + srcrange::UnitRange{UInt32} = byte_range(cursor) + if is_leaf(cursor) + if is_error(k) + return k == K"error" ? + Expr(:error) : + Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`") + else + scoped_val = _expr_leaf_val(cursor, txtbuf, txtbuf_offset) + val = @isexpr(scoped_val, :scope_layer) ? scoped_val.args[1] : scoped_val + if val isa Union{Int128,UInt128,BigInt} + # Ignore the values of large integers and convert them back to + # symbolic/textural form for compatibility with the Expr + # representation of these. + str = replace(source[srcrange], '_'=>"") + macname = val isa Int128 ? Symbol("@int128_str") : + val isa UInt128 ? Symbol("@uint128_str") : + Symbol("@big_str") + return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + elseif is_identifier(k) + val2 = lower_identifier_name(val, k) + return @isexpr(scoped_val, :scope_layer) ? + Expr(:scope_layer, val2, scoped_val.args[2]) : val2 + else + return scoped_val + end + end + end + + if k == K"string" + return _string_to_Expr(cursor, source, txtbuf, txtbuf_offset) + end + + loc = source_location(LineNumberNode, source, first(srcrange)) + + if k == K"cmdstring" + return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, + _string_to_Expr(cursor, source, txtbuf, txtbuf_offset)) + end + + headstr = untokenize(nodehead, include_flag_suff=false) + headsym = !isnothing(headstr) ? + Symbol(headstr) : + error("Can't untokenize head of kind $(k)") + retexpr = Expr(headsym) + + # Block gets special handling for extra line number nodes + if k == K"block" || (k == K"toplevel" && !has_flags(nodehead, TOPLEVEL_SEMICOLONS_FLAG)) + args = retexpr.args + for child in reverse_nontrivia_children(cursor) + expr = node_to_expr(child, source, txtbuf, txtbuf_offset) + @assert expr !== nothing + # K"block" does not have special first-child handling, so we do not need to keep track of that here + pushfirst!(args, fixup_Expr_child(head(cursor), expr, false)) + pushfirst!(args, source_location(LineNumberNode, source, first(byte_range(child)))) + end + isempty(args) && push!(args, loc) + if k == K"block" && has_flags(nodehead, PARENS_FLAG) + popfirst!(args) + end + return retexpr + end + + # Now recurse to parse all arguments + (firstchildhead, firstchildrange) = parseargs!(retexpr, loc, cursor, source, txtbuf, txtbuf_offset) + + return _node_to_expr(retexpr, loc, srcrange, + firstchildhead, firstchildrange, + nodehead, source) +end + +function adjust_macro_name!(retexpr::Union{Expr, Symbol}) + if retexpr isa Symbol + return lower_identifier_name(retexpr, K"macro_name") + else + retexpr::Expr + if length(retexpr.args) == 2 && retexpr.head == :(.) + arg2 = retexpr.args[2] + if isa(arg2, QuoteNode) && arg2.value isa Symbol + retexpr.args[2] = QuoteNode(lower_identifier_name(arg2.value, K"macro_name")) + end + end + return retexpr + end +end + +# Split out from `node_to_expr` for codesize reasons, to avoid specialization on multiple +# tree types. +@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode, + srcrange::UnitRange{UInt32}, + firstchildhead::SyntaxHead, + firstchildrange::UnitRange{UInt32}, + nodehead::SyntaxHead, + source) + args = retexpr.args + k = kind(nodehead) + endloc = source_location(LineNumberNode, source, last(srcrange)) + if (k == K"var" || k == K"char") && length(retexpr.args) == 1 + # `var` and `char` nodes have a single argument which is the value. + # However, errors can add additional errors tokens which we represent + # as e.g. `Expr(:var, ..., Expr(:error))`. + return retexpr.args[1] + elseif k == K"macro_name" + return adjust_macro_name!(retexpr.args[1]) + elseif k == K"?" + retexpr.head = :if + elseif k == K"op=" && length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = string(args[2], '=') + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] + elseif k == K".op=" && length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = '.' * string(args[2], '=') + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] + elseif k == K"macrocall" + if length(args) >= 2 + a2 = args[2] + if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName" + # Fix up for custom cmd macros like foo`x` + args[2] = a2.args[3] + end + end + do_lambda = _extract_do_lambda!(args) + _reorder_parameters!(args, 2) + insert!(args, 2, loc) + if do_lambda isa Expr + return Expr(:do, retexpr, do_lambda) + end + elseif k == K"doc" + retexpr.head = :macrocall + retexpr.args = [GlobalRef(Core, Symbol("@doc")), loc, args...] + elseif k == K"dotcall" || k == K"call" + # Julia's standard `Expr` ASTs have children stored in a canonical + # order which is often not always source order. We permute the children + # here as necessary to get the canonical order. + if is_infix_op_call(nodehead) || is_postfix_op_call(nodehead) + args[2], args[1] = args[1], args[2] + end + # Lower (call x ') to special ' head + if is_postfix_op_call(nodehead) && args[1] == Symbol("'") + popfirst!(args) + retexpr.head = Symbol("'") + end + do_lambda = _extract_do_lambda!(args) + # Move parameters blocks to args[2] + _reorder_parameters!(args, 2) + if retexpr.head === :dotcall + funcname = args[1] + if is_prefix_call(nodehead) + retexpr.head = :. + retexpr.args = Any[funcname, Expr(:tuple, args[2:end]...)] + else + # operator calls + retexpr.head = :call + if funcname isa Symbol + args[1] = Symbol(:., funcname) + end # else funcname could be an Expr(:error), just propagate it + end + end + if do_lambda isa Expr + return Expr(:do, retexpr, do_lambda) + end + elseif k == K"." + if length(args) == 2 + a2 = args[2] + if !@isexpr(a2, :quote) && !(a2 isa QuoteNode) + args[2] = QuoteNode(a2) + end + elseif length(args) == 1 + # Hack: Here we preserve the head of the operator to determine whether + # we need to coalesce it with the dot into a single symbol later on. + args[1] = (firstchildhead, args[1]) + end + elseif k == K"ref" || k == K"curly" + # Move parameters blocks to args[2] + _reorder_parameters!(args, 2) + elseif k == K"for" + iters = _append_iterspec!([], args[1]) + args[1] = length(iters) == 1 ? only(iters) : Expr(:block, iters...) + # Add extra line number node for the `end` of the block. This may seem + # useless but it affects code coverage. + push!(args[2].args, endloc) + elseif k == K"while" + # Line number node for the `end` of the block as in `for` loops. + push!(args[2].args, endloc) + elseif k in KSet"tuple vect braces" + # Move parameters blocks to args[1] + _reorder_parameters!(args, 1) + elseif k == K"where" + if length(args) == 2 + a2 = args[2] + if @isexpr(a2, :braces) + a2a = a2.args + _reorder_parameters!(a2a, 2) + retexpr.args = Any[args[1], a2a...] + end + end + elseif k == K"catch" + if kind(firstchildhead) == K"Placeholder" + args[1] = false + end + elseif k == K"try" + # Try children in source order: + # try_block catch_var catch_block else_block finally_block + # Expr ordering: + # try_block catch_var catch_block [finally_block] [else_block] + try_ = args[1] + catch_var = false + catch_ = false + else_ = false + finally_ = false + for i in 2:length(args) + a = args[i] + if @isexpr(a, :catch) + catch_var = a.args[1] + catch_ = a.args[2] + elseif @isexpr(a, :else) + else_ = only(a.args) + elseif @isexpr(a, :finally) + finally_ = only(a.args) + elseif @isexpr(a, :error) + finally_ = Expr(:block, a) # Unclear where to put this but here will do? + else + @assert false "Illegal $a subclause in `try`" + end + end + empty!(args) + push!(args, try_, catch_var, catch_) + if finally_ !== false || else_ !== false + push!(args, finally_) + if else_ !== false + push!(args, else_) + end + end + elseif k == K"generator" + # Reconstruct the nested Expr form for generator from our flatter + # source-ordered `generator` format. + gen = args[1] + for j = length(args):-1:2 + gen = Expr(:generator, gen) + _append_iterspec!(gen.args, args[j]) + if j < length(args) + # Additional `for`s flatten the inner generator + gen = Expr(:flatten, gen) + end + end + return gen + elseif k == K"filter" + @assert length(args) == 2 + retexpr.args = _append_iterspec!(Any[args[2]], args[1]) + elseif k == K"nrow" || k == K"ncat" + # For lack of a better place, the dimension argument to nrow/ncat + # is stored in the flags + pushfirst!(args, numeric_flags(flags(nodehead))) + elseif k == K"typed_ncat" + insert!(args, 2, numeric_flags(flags(nodehead))) + elseif k == K"elseif" + # Block for conditional's source location + args[1] = Expr(:block, loc, args[1]) + elseif k == K"->" + a1 = args[1] + if @isexpr(a1, :tuple) + # TODO: This makes the Expr form objectively worse for the sake of + # compatibility. We should consider deleting this special case in + # the future as a minor change. + if length(a1.args) == 1 && + (!has_flags(firstchildhead, PARENS_FLAG) || + !has_flags(firstchildhead, TRAILING_COMMA_FLAG)) && + !Meta.isexpr(a1.args[1], :parameters) + # `(a) -> c` is parsed without tuple on lhs in Expr form + args[1] = a1.args[1] + elseif length(a1.args) == 2 && (a11 = a1.args[1]; @isexpr(a11, :parameters) && + length(a11.args) <= 1 && !Meta.isexpr(a1.args[2], :(...))) + # `(a; b=1) -> c` parses args as `block` in Expr form :-( + if length(a11.args) == 0 + args[1] = Expr(:block, a1.args[2]) + else + a111 = only(a11.args) + assgn = @isexpr(a111, :kw) ? Expr(:(=), a111.args...) : a111 + argloc = source_location(LineNumberNode, source, last(firstchildrange)) + args[1] = Expr(:block, a1.args[2], argloc, assgn) + end + end + end + a2 = args[2] + # Add function source location to rhs; add block if necessary + if @isexpr(a2, :block) + pushfirst!(a2.args, loc) + else + args[2] = Expr(:block, loc, args[2]) + end + elseif k == K"function" + if length(args) > 1 + if has_flags(nodehead, SHORT_FORM_FUNCTION_FLAG) + a2 = args[2] + if !@isexpr(a2, :block) + args[2] = Expr(:block, a2) + end + retexpr.head = :(=) + else + a1 = args[1] + if @isexpr(a1, :tuple) + # Convert to weird Expr forms for long-form anonymous functions. + # + # (function (tuple (... xs)) body) ==> (function (... xs) body) + if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...)) + # function (xs...) \n body end + args[1] = a11 + end + end + end + arg2 = args[2] + # Only push if this is an Expr - could be an ErrorVal + isa(arg2, Expr) && pushfirst!(arg2.args, loc) + end + elseif k == K"macro" + if length(args) > 1 + pushfirst!((args[2]::Expr).args, loc) + end + elseif k == K"module" + pushfirst!(args, !has_flags(nodehead, BARE_MODULE_FLAG)) + pushfirst!((args[3]::Expr).args, loc) + elseif k == K"quote" + if length(args) == 1 + a1 = only(args) + if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool) + # Flisp parser does an optimization here: simple values are stored + # as inert QuoteNode rather than in `Expr(:quote)` quasiquote + return QuoteNode(a1) + end + end + elseif k == K"do" + # Temporary head which is picked up by _extract_do_lambda + retexpr.head = :do_lambda + elseif k == K"let" + a1 = args[1] + if @isexpr(a1, :block) + a1a = (args[1]::Expr).args + filter!(a -> !(a isa LineNumberNode), a1a) + # Ugly logic to strip the Expr(:block) in certain cases for compatibility + if length(a1a) == 1 + a = a1a[1] + if a isa Symbol || @isexpr(a, :(=)) || @isexpr(a, :(::)) + args[1] = a + end + end + end + elseif k == K"local" || k === K"global" + if length(args) == 1 + a1 = args[1] + if @isexpr(a1, :const) + # Normalize `local const` to `const local` + args[1] = Expr(retexpr.head, (a1::Expr).args...) + retexpr.head = :const + elseif @isexpr(a1, :tuple) + # Normalize `global (x, y)` to `global x, y` + retexpr.args = a1.args + end + end + elseif k == K"return" && isempty(args) + push!(args, nothing) + elseif k == K"juxtapose" + retexpr.head = :call + pushfirst!(args, :*) + elseif k == K"struct" + @assert args[2].head == :block + orig_fields = args[2].args + fields = Expr(:block) + for field in orig_fields + if @isexpr(field, :macrocall) && field.args[1] == GlobalRef(Core, Symbol("@doc")) + # @doc macro calls don't occur within structs, in Expr form. + push!(fields.args, field.args[3]) + push!(fields.args, field.args[4]) + else + push!(fields.args, field) + end + end + args[2] = fields + pushfirst!(args, has_flags(nodehead, MUTABLE_FLAG)) + elseif k == K"importpath" + retexpr.head = :. + for i = 1:length(args) + ai = args[i] + if ai isa QuoteNode + # Permit nonsense additional quoting such as + # import A.(:b).:c + args[i] = ai.value + end + end + elseif k == K"wrapper" + # This should only happen for errors wrapped next to what should have + # been single statements or atoms - represent these as blocks. + retexpr.head = :block + elseif k == K"comparison" + for i = 2:2:length(args) + arg = args[i] + if @isexpr(arg, :., 1) + args[i] = Symbol(".", arg.args[1]) + end + end + end + + return retexpr +end + +function build_tree(::Type{Expr}, stream::ParseStream; + filename=nothing, first_line=1, + # unused, but required since `_parse` is written generic + keep_parens=false) + source = SourceFile(stream, filename=filename, first_line=first_line) + return build_tree(Expr, stream, source) +end + +function build_tree(::Type{Expr}, stream::ParseStream, source::SourceFile) + txtbuf = unsafe_textbuf(stream) + cursor = RedTreeCursor(stream) + wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) + if has_toplevel_siblings(cursor) + entry = Expr(:block) + for child in + Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor)) + pushfirst!(entry.args, fixup_Expr_child(wrapper_head, node_to_expr(child, source, txtbuf), false)) + end + length(entry.args) == 1 && (entry = only(entry.args)) + else + entry = fixup_Expr_child(wrapper_head, node_to_expr(cursor, source, txtbuf), false) + end + return entry +end + +function to_expr(node) + source = sourcefile(node) + txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(source)) + wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS) + return fixup_Expr_child(wrapper_head, node_to_expr(node, source, txtbuf, UInt32(txtbuf_offset)), false) +end + +Base.Expr(node::SyntaxNode) = to_expr(node) diff --git a/JuliaSyntax/src/integration/hooks.jl b/JuliaSyntax/src/integration/hooks.jl new file mode 100644 index 0000000000000..2d1e4df852c18 --- /dev/null +++ b/JuliaSyntax/src/integration/hooks.jl @@ -0,0 +1,430 @@ +# This file provides an adaptor to match the API expected by the Julia runtime +# code in the binding Core._parse + +const _has_v1_6_hooks = VERSION >= v"1.6" +const _has_v1_10_hooks = isdefined(Core, :_setparser!) + +struct ErrorSpec + child_idx::Int + node::RedTreeCursor + parent_kind::Kind +end + +function first_error_cursor(stream::ParseStream) + output = stream.output + for i = 2:length(output) + is_error(output[i]) && return GreenTreeCursor(output, i) + end +end + +# Find the first error in a SyntaxNode tree, returning the index of the error +# within its parent and the node itself. +function first_tree_error(c::RedTreeCursor, error_cursor::GreenTreeCursor) + @assert !is_leaf(c) && !is_error(c) + first_child = first_error = nothing + it = reverse_nontrivia_children(c) + r = iterate(it) + local child + while r !== nothing + (child, state) = r + r = iterate(it, state) + (error_cursor in child || error_cursor == child.green) || continue + is_error(child) && break + return first_tree_error(child, error_cursor) + end + i = 1 # count node index + while r !== nothing + i += 1 + (_, state) = r + r = iterate(it, state) + end + return ErrorSpec(i, child, kind(c)) +end + +function first_tree_error(stream::ParseStream) + c = RedTreeCursor(stream) + err = first_error_cursor(stream) + for c in reverse_toplevel_siblings(c) + is_error(c) && return ErrorSpec(0, c, K"wrapper") + is_leaf(c) && continue + return first_tree_error(c, err) + end +end + +# Classify an incomplete expression, returning a Symbol compatible with +# Base.incomplete_tag(). +# +# Roughly, the intention here is to classify which expression head is expected +# next if the incomplete stream was to continue. (Though this is just rough. In +# practice several categories are combined for the purposes of the REPL - +# perhaps we can/should do something more precise in the future.) +function _incomplete_tag(theerror::ErrorSpec, codelen) + i = theerror.child_idx + c = theerror.node + kp = theerror.parent_kind + if isnothing(c) || last_byte(c) < codelen || codelen == 0 + if kind(c) == K"ErrorEofMultiComment" + # This is the one weird case where the token itself is an + # incomplete error + return :comment + else + return :none + end + elseif first_byte(c) <= codelen && kind(c) != K"ErrorInvalidEscapeSequence" + # "ErrorInvalidEscapeSequence" may be incomplete, so we don't include it + # here as a hard error. + return :none + end + if kind(c) == K"error" && is_non_terminal(c) + for cc in reverse_nontrivia_children(c) + if kind(cc) == K"error" + return :other + end + end + end + if kp == K"wrapper" + return :other + elseif kp == K"string" || kp == K"var" + return :string + elseif kp == K"cmdstring" + return :cmd + elseif kp == K"char" + return :char + elseif kp in KSet"block quote let try" + return :block + elseif kp in KSet"for while function if" + return i == 1 ? :other : :block + elseif kp in KSet"module struct" + return i == 1 ? :other : :block + elseif kp == K"do" + return i < 3 ? :other : :block + else + return :other + end +end + +#------------------------------------------------------------------------------- +function _set_core_parse_hook(parser) + @static if _has_v1_10_hooks + Core._setparser!(parser) + else + # HACK! Fool the runtime into allowing us to set Core._parse, even during + # incremental compilation. (Ideally we'd just arrange for Core._parse to be + # set to the JuliaSyntax parser. But how do we signal that to the dumping + # code outside of the initial creation of Core?) + i = Base.fieldindex(Base.JLOptions, :incremental) + ptr = convert(Ptr{fieldtype(Base.JLOptions, i)}, + cglobal(:jl_options, Base.JLOptions) + fieldoffset(Base.JLOptions, i)) + incremental = unsafe_load(ptr) + if incremental != 0 + unsafe_store!(ptr, 0) + end + + Base.eval(Core, :(_parse = $parser)) + + if incremental != 0 + unsafe_store!(ptr, incremental) + end + end +end + + +# Wrap the function `f` so that it's always invoked in the given `world_age` +# +# NB: We need an inference barrier of one type or another here to prevent +# invalidations. The invokes provide this currently. +function fix_world_age(f, world_age::UInt) + if world_age == typemax(UInt) + function invoke_latestworld(args...; kws...) + Base.invokelatest(f, args...; kws...) + end + else + function invoke_fixedworld(args...; kws...) + Base.invoke_in_world(world_age, f, args...; kws...) + end + end +end + +function _has_nested_error(ex) + if ex isa Expr + if ex.head == :error + return true + else + return any(_has_nested_error(e) for e in ex.args) + end + elseif ex isa QuoteNode + return _has_nested_error(ex.value) + else + return false + end +end + +# Debug log file for dumping parsed code +const _debug_log = Ref{Union{Nothing,IO}}(nothing) + +function core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol) + try + # TODO: Check that we do all this input wrangling without copying the + # code buffer + if code isa Core.SimpleVector + # The C entry points will pass us this form. + (ptr,len) = code + code = String(unsafe_wrap(Array, ptr, len)) + elseif !(code isa String || code isa SubString || code isa Vector{UInt8}) + # For non-Base string types, convert to UTF-8 encoding, using an + # invokelatest to avoid world age issues. + code = Base.invokelatest(String, code) + end + if !isnothing(_debug_log[]) + print(_debug_log[], """ + #-#-#------------------------------- + # ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options" + #-#-#------------------------------- + """) + write(_debug_log[], code) + end + + stream = ParseStream(code, offset+1) + if options === :statement || options === :atom + # To copy the flisp parser driver: + # * Parsing atoms consumes leading trivia + # * Parsing statements consumes leading+trailing trivia + bump_trivia(stream) + if peek(stream) == K"EndMarker" + # If we're at the end of stream after skipping whitespace, just + # return `nothing` to indicate this rather than attempting to + # parse a statement or atom and failing. + return Core.svec(nothing, last_byte(stream)) + end + end + parse!(stream; rule=options) + if options === :statement + bump_trivia(stream; skip_newlines=false) + if peek(stream) == K"NewlineWs" + bump(stream) + end + end + + if any_error(stream) + pos_before_comments = last_non_whitespace_byte(stream) + errspec = first_tree_error(stream) + tag = _incomplete_tag(errspec, pos_before_comments) + if _has_v1_10_hooks + exc = ParseError(stream, filename=filename, first_line=lineno, + incomplete_tag=tag) + msg = sprint(showerror, exc) + error_ex = Expr(tag === :none ? :error : :incomplete, + Meta.ParseError(msg, exc)) + elseif tag !== :none + # Hack: For older Julia versions, replicate the messages which + # Base.incomplete_tag() will match + msg = + tag === :string ? "incomplete: invalid string syntax" : + tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" : + tag === :block ? "incomplete: construct requires end" : + tag === :cmd ? "incomplete: invalid \"`\" syntax" : + tag === :char ? "incomplete: invalid character literal" : + "incomplete: premature end of input" + error_ex = Expr(:incomplete, msg) + else + # In the flisp parser errors are normally `Expr(:error, msg)` where + # `msg` is a String. By using a JuliaSyntax.ParseError for msg + # we can do fancy error reporting instead. + error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno)) + end + ex = if options === :all + # When encountering a toplevel error, the reference parser + # * truncates the top level expression arg list before that error + # * includes the last line number + # * appends the error message + source = SourceFile(stream, filename=filename, first_line=lineno) + topex = build_tree(Expr, stream, source) + @assert topex.head == :toplevel + i = findfirst(_has_nested_error, topex.args) + if i > 1 && topex.args[i-1] isa LineNumberNode + i -= 1 + end + resize!(topex.args, i-1) + push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename)) + push!(topex.args, error_ex) + topex + else + error_ex + end + else + # TODO: Figure out a way to show warnings. Meta.parse() has no API + # to communicate this, and we also can't show them to stdout as + # this is too side-effectful and can result in double-reporting in + # the REPL. + # + # show_diagnostics(stdout, stream.diagnostics, code) + # + ex = build_tree(Expr, stream; filename=filename, first_line=lineno) + end + + # Note the next byte in 1-based indexing is `last_byte(stream) + 1` but + # the Core hook must return an offset (ie, it's 0-based) so the factors + # of one cancel here. + last_offset = last_byte(stream) + + if !isnothing(_debug_log[]) + println(_debug_log[], """ + #-#-#- + # EXIT last_offset=$last_offset + #-#-#- + """) + flush(_debug_log[]) + end + + # Rewrap result in an svec for use by the C code + return Core.svec(ex, last_offset) + catch exc + if !isnothing(_debug_log[]) + println(_debug_log[], """ + #-#-#- + # ERROR EXIT + # $exc + #-#-#- + """) + flush(_debug_log[]) + end + @error("""JuliaSyntax parser failed — falling back to flisp! + This is not your fault. Please submit a bug report to https://github.com/JuliaLang/JuliaSyntax.jl/issues""", + exception=(exc,catch_backtrace()), + offset=offset, + code=code) + + _fl_parse_hook(code, filename, lineno, offset, options) + end +end + +# Core._parse gained a `lineno` argument in +# https://github.com/JuliaLang/julia/pull/43876 +# Prior to this, the following signature was needed: +function core_parser_hook(code, filename, offset, options) + core_parser_hook(code, filename, 1, offset, options) +end + +if _has_v1_10_hooks + Base.incomplete_tag(e::JuliaSyntax.ParseError) = e.incomplete_tag +else + # Hack: Meta.parse() attempts to construct a ParseError from a string if it + # receives `Expr(:error)`. Add an override to the ParseError constructor to + # prevent this. + Base.Meta.ParseError(e::JuliaSyntax.ParseError) = e +end + +_default_system_parser = _has_v1_6_hooks ? Core._parse : nothing + +# hook into InteractiveUtils.@activate +activate!(enable=true) = enable_in_core!(enable) + +""" + enable_in_core!([enable=true; freeze_world_age=true, debug_filename=nothing]) + +Connect the JuliaSyntax parser to the Julia runtime so that it replaces the +flisp parser for all parsing work. That is, JuliaSyntax will be used for +`include()`, `Meta.parse()`, the REPL, etc. To reset to the reference parser, +use `enable_in_core!(false)`. + +Keyword arguments: +* `freeze_world_age` - Use a fixed world age for the parser to prevent + recompilation of the parser due to any user-defined methods (default `true`). +* `debug_filename` - File name of parser debug log (defaults to `nothing` or + the value of `ENV["JULIA_SYNTAX_DEBUG_FILE"]`). +""" +function enable_in_core!(enable=true; freeze_world_age = true, + debug_filename = get(ENV, "JULIA_SYNTAX_DEBUG_FILE", nothing)) + if !_has_v1_6_hooks + error("Cannot use JuliaSyntax as the main Julia parser in Julia version $VERSION < 1.6") + end + if enable && !isnothing(debug_filename) + _debug_log[] = open(debug_filename, "w") + elseif !enable && !isnothing(_debug_log[]) + close(_debug_log[]) + _debug_log[] = nothing + end + if enable + world_age = freeze_world_age ? Base.get_world_counter() : typemax(UInt) + _set_core_parse_hook(fix_world_age(core_parser_hook, world_age)) + else + @assert !isnothing(_default_system_parser) + _set_core_parse_hook(_default_system_parser) + end + nothing +end + + +#------------------------------------------------------------------------------- +# Tools to call the reference flisp parser +# +# Call the flisp parser +function _fl_parse_hook(code, filename, lineno, offset, options) + @static if VERSION >= v"1.8.0-DEV.1370" # https://github.com/JuliaLang/julia/pull/43876 + return Core.Compiler.fl_parse(code, filename, lineno, offset, options) + elseif _has_v1_6_hooks + return Core.Compiler.fl_parse(code, filename, offset, options) + else + if options === :all + ex = Base.parse_input_line(String(code), filename=filename, depwarn=false) + if !@isexpr(ex, :toplevel) + ex = Expr(:toplevel, ex) + end + return ex, sizeof(code) + elseif options === :statement || options === :atom + ex, pos = Meta.parse(code, offset+1, greedy=options==:statement, raise=false) + return ex, pos-1 + else + error("Unknown parse options $options") + end + end +end + +#------------------------------------------------ +# Copy of the Meta.parse() API, but ensuring that we call the flisp parser +# rather than using Meta.parse() which may be using the JuliaSyntax parser. + +""" +Like Meta.parse() but always call the flisp reference parser. +""" +function fl_parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) + ex, pos = fl_parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) + if isa(ex,Expr) && ex.head === :error + return ex + end + if pos <= ncodeunits(str) + raise && throw(Meta.ParseError("extra token after end of expression")) + return Expr(:error, "extra token after end of expression") + end + return ex +end + +function fl_parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, + depwarn::Bool=true) + ex, pos = _fl_parse_string(str, "none", 1, pos, greedy ? :statement : :atom) + if raise && isa(ex,Expr) && ex.head === :error + throw(Meta.ParseError(ex.args[1])) + end + return ex, pos +end + +""" +Like Meta.parseall() but always call the flisp reference parser. +""" +function fl_parseall(text::AbstractString; filename="none", lineno=1) + ex,_ = _fl_parse_string(text, String(filename), lineno, 1, :all) + return ex +end + +function _fl_parse_string(text::AbstractString, filename::AbstractString, + lineno::Integer, index::Integer, options) + if index < 1 || index > ncodeunits(text) + 1 + throw(BoundsError(text, index)) + end + ex, offset::Int = _fl_parse_hook(text, filename, lineno, index-1, options) + ex, offset+1 +end + +# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases. +fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...) +fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...) diff --git a/JuliaSyntax/src/julia/julia_parse_stream.jl b/JuliaSyntax/src/julia/julia_parse_stream.jl new file mode 100644 index 0000000000000..87ad038699a77 --- /dev/null +++ b/JuliaSyntax/src/julia/julia_parse_stream.jl @@ -0,0 +1,331 @@ +# Token flags - may be set for operator kinded tokens +# Operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) + +# Set for K"call", K"dotcall" or any syntactic operator heads +# Distinguish various syntaxes which are mapped to K"call" +const PREFIX_CALL_FLAG = RawFlags(0<<3) +const INFIX_FLAG = RawFlags(1<<3) +const PREFIX_OP_FLAG = RawFlags(2<<3) +const POSTFIX_OP_FLAG = RawFlags(3<<3) + +# The following flags are quite head-specific and may overlap with numeric flags + +""" +Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` +""" +const TRIPLE_STRING_FLAG = RawFlags(1<<8) + +""" +Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping +""" +const RAW_STRING_FLAG = RawFlags(1<<9) + +""" +Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +""" +const PARENS_FLAG = RawFlags(1<<8) + +""" +Set for various delimited constructs when they contains a trailing comma. For +example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where +this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. +""" +const TRAILING_COMMA_FLAG = RawFlags(1<<9) + +""" +Set for K"quote" for the short form `:x` as opposed to long form `quote x end` +""" +const COLON_QUOTE = RawFlags(1<<8) + +""" +Set for K"toplevel" which is delimited by parentheses +""" +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8) + +""" +Set for K"function" in short form definitions such as `f() = 1` +""" +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8) + +""" +Set for K"struct" when mutable +""" +const MUTABLE_FLAG = RawFlags(1<<8) + +""" +Set for K"module" when it's not bare (`module`, not `baremodule`) +""" +const BARE_MODULE_FLAG = RawFlags(1<<8) + +# Flags holding the dimension of an nrow or other UInt8 not held in the source +# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? +const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) + +function set_numeric_flags(n::Integer) + f = RawFlags((n << 8) & NUMERIC_FLAGS) + if numeric_flags(f) != n + error("Numeric flags unable to hold large integer $n") + end + f +end + +function call_type_flags(f::RawFlags) + f & 0b11000 +end + +function numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + +flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) + +""" + is_prefix_call(x) + +Return true for normal prefix function call syntax such as the `f` call node +parsed from `f(x)`. +""" +is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG + +""" + is_infix_op_call(x) + +Return true for infix operator calls such as the `+` call node parsed from +`x + y`. +""" +is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG + +""" + is_prefix_op_call(x) + +Return true for prefix operator calls such as the `+` call node parsed from `+x`. +""" +is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG + +""" + is_postfix_op_call(x) + +Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. +""" +is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG + + +""" + is_suffixed(x) + +Return true for operators which have suffixes, such as `+₁` +""" +is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) + + +""" + numeric_flags(x) + +Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` +and `K"ncat"`, for now. +""" +numeric_flags(x) = numeric_flags(flags(x)) + +function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) + str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : + untokenize(kind(head); unique=unique))::String + if include_flag_suff + is_trivia(head) && (str = str*"-t") + is_infix_op_call(head) && (str = str*"-i") + is_prefix_op_call(head) && (str = str*"-pre") + is_postfix_op_call(head) && (str = str*"-post") + + k = kind(head) + # Handle numeric flags for nrow/ncat nodes + if k in KSet"nrow ncat typed_ncat" + n = numeric_flags(head) + n != 0 && (str = str*"-"*string(n)) + else + # Handle head-specific flags that overlap with numeric flags + if k in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif k in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif k == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif k == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif k == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") + elseif k == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif k == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end + end + is_suffixed(head) && (str = str*"-suf") + end + str +end + + +#------------------------------------------------------------------------------- +# ParseStream Post-processing + +function validate_tokens(stream::ParseStream) + txtbuf = unsafe_textbuf(stream) + charbuf = IOBuffer() + + # Process terminal nodes in the output + fbyte = stream.output[1].byte_span+1 # Start after sentinel + for i = 2:length(stream.output) + node = stream.output[i] + if !is_terminal(node) || kind(node) == K"TOMBSTONE" + continue + end + + k = kind(node) + nbyte = fbyte + node.byte_span + tokrange = fbyte:nbyte-1 + error_kind = K"None" + + if k in KSet"Integer BinInt OctInt HexInt" + # The following shouldn't be able to error... + # parse_int_literal + # parse_uint_literal + elseif k == K"Float" || k == K"Float32" + underflow0 = false + if k == K"Float" + x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) + # jl_strtod_c can return "underflow" even for valid cases such + # as `5e-324` where the source is an exact representation of + # `x`. So only warn when underflowing to zero. + underflow0 = code === :underflow && x == 0 + else + x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) + underflow0 = code === :underflow && x == 0 + end + if code === :ok + # pass + elseif code === :overflow + emit_diagnostic(stream, tokrange, + error="overflow in floating point literal") + error_kind = K"ErrorNumericOverflow" + elseif underflow0 + emit_diagnostic(stream, tokrange, + warning="underflow to zero in floating point literal") + end + elseif k == K"Char" + @assert fbyte < nbyte # Already handled in the parser + truncate(charbuf, 0) + had_error = unescape_julia_string(charbuf, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + else + seek(charbuf,0) + read(charbuf, Char) + if !eof(charbuf) + error_kind = K"ErrorOverLongCharacter" + emit_diagnostic(stream, tokrange, + error="character literal contains multiple characters") + end + end + elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) + had_error = unescape_julia_string(devnull, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + end + elseif is_error(k) && k != K"error" + # Emit messages for non-generic token errors + tokstr = String(txtbuf[tokrange]) + msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" + "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" + elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" + "$(_token_error_descriptions[k]) $(repr(tokstr))" + else + _token_error_descriptions[k] + end + emit_diagnostic(stream, tokrange, error=msg) + end + + if error_kind != K"None" + # Update the node with new error kind + stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), + node.byte_span, node.orig_kind) + end + + fbyte = nbyte + end + sort!(stream.diagnostics, by=first_byte) +end + +""" + bump_split(stream, token_spec1, [token_spec2 ...]) + +Bump the next token, splitting it into several pieces + +Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. +If all `nbyte` are positive, the sum must equal the token length. If one +`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of +all `nbyte` must equal zero. + +This is a hack which helps resolves the occasional lexing ambiguity. For +example +* Whether .+ should be a single token or the composite (. +) which is used for + standalone operators. +* Whether ... is splatting (most of the time) or three . tokens in import paths + +TODO: Are these the only cases? Can we replace this general utility with a +simpler one which only splits preceding dots? +""" +function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} + tok = stream.lookahead[stream.lookahead_index] + stream.lookahead_index += 1 + start_b = _next_byte(stream) + toklen = tok.next_byte - start_b + prev_b = start_b + for (i, (nbyte, k, f)) in enumerate(split_spec) + h = SyntaxHead(k, f) + actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte + orig_k = k == K"." ? K"." : kind(tok) + node = RawGreenNode(h, actual_nbyte, orig_k) + push!(stream.output, node) + prev_b += actual_nbyte + stream.next_byte += actual_nbyte + end + @assert tok.next_byte == prev_b + stream.peek_count = 0 + return position(stream) +end + +function peek_dotted_op_token(ps, allow_whitespace=false) + # Peek the next token, but if it is a dot, peek the next one as well + t = peek_token(ps) + isdotted = kind(t) == K"." + if isdotted + t2 = peek_token(ps, 2) + if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2)) + isdotted = false + else + t = t2 + end + end + return (isdotted, t) +end + +function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") + if isdot + if emit_dot_node + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + else + bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + end + end + pos = bump(ps, flags, remap_kind=remap_kind) + isdot && emit_dot_node && (pos = emit(ps, dotmark, K".")) + return pos +end diff --git a/JuliaSyntax/src/julia/kinds.jl b/JuliaSyntax/src/julia/kinds.jl new file mode 100644 index 0000000000000..dd25663b14ef3 --- /dev/null +++ b/JuliaSyntax/src/julia/kinds.jl @@ -0,0 +1,1230 @@ +# Definition of Kind type - mapping from token string identifiers to +# enumeration values as used in @K_str + +""" + K"name" + Kind(namestr) + +`Kind` is a type tag for specifying the type of tokens and interior nodes of +a syntax tree. Abstractly, this tag is used to define our own *sum types* for +syntax tree nodes. We do this explicitly outside the Julia type system because +(a) Julia doesn't have sum types and (b) we want concrete data structures which +are unityped from the Julia compiler's point of view, for efficiency. + +Naming rules: +* Kinds which correspond to exactly one textural form are represented with that + text. This includes keywords like K"for" and operators like K"*". +* Kinds which represent many textural forms have UpperCamelCase names. This + includes kinds like K"Identifier" and K"Comment". +* Kinds which exist merely as delimiters are all uppercase +""" +primitive type Kind 16 end + +# The implementation of Kind here is basically similar to @enum. However we use +# the K_str macro to self-name these kinds with their literal representation, +# rather than needing to invent a new name for each. + +const _kind_str_to_int = Dict{String,UInt16}() +const _kind_int_to_str = Dict{UInt16,String}() +const _kind_modules = Dict{Int,Union{Symbol,Module}}( + 0=>nameof(@__MODULE__), + 1=>:JuliaLowering, + 2=>:JuliaSyntaxFormatter +) +# Number of bits reserved for kind id's belonging to a single module +const _kind_nbits = 10 +const _kind_module_id_max = typemax(UInt16) >> _kind_nbits + +function Kind(x::Integer) + if x < 0 || x > typemax(UInt16) + throw(ArgumentError("Kind out of range: $x")) + end + return Base.bitcast(Kind, convert(UInt16, x)) +end + +function Kind(s::AbstractString) + i = get(_kind_str_to_int, s) do + error("unknown Kind name $(repr(s))") + end + Kind(i) +end + +Base.string(x::Kind) = get(_kind_int_to_str, reinterpret(UInt16, x), "") +Base.print(io::IO, x::Kind) = print(io, string(x)) + +Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y) + +function Base.show(io::IO, k::Kind) + print(io, "K\"", k, "\"") +end + +# Save the string representation rather than the bit pattern so that kinds +# can be serialized and deserialized across different JuliaSyntax versions. +function Base.write(io::IO, k::Kind) + str = string(k) + write(io, UInt8(sizeof(str))) + write(io, str) +end +function Base.read(io::IO, ::Type{Kind}) + len = read(io, UInt8) + str = String(read(io, len)) + Kind(str) +end + +function Base.parentmodule(k::Kind) + mod_id = reinterpret(UInt16, k) >> _kind_nbits + _kind_modules[mod_id]::Module +end + +function _register_kinds!(kind_modules, int_to_kindstr, kind_str_to_int, mod, module_id, names) + if module_id > _kind_module_id_max + error("Kind module id $module_id is out of range") + elseif length(names) >= 1 << _kind_nbits + error("Too many kind names") + elseif !haskey(kind_modules, module_id) + kind_modules[module_id] = mod + else + m = kind_modules[module_id] + if m == nameof(mod) + # Ok: known kind module, but not loaded until now + kind_modules[module_id] = mod + elseif m == mod + existing_kinds = Union{Nothing, Kind}[(i = get(kind_str_to_int, n, nothing); + isnothing(i) ? nothing : Kind(i)) for n in names] + if any(isnothing, existing_kinds) || + !issorted(existing_kinds) || + any(k->parentmodule(k) != mod, existing_kinds) + error("Error registering kinds for module $mod (register_kinds() called more than once inconsistently, or conflict with existing module kinds?)") + else + # Assume we're re-registering kinds as in top level vs `__init__` + return + end + else + error("Kind module ID $module_id already claimed by module $m") + end + end + _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names) +end + +# This function is separated from `_register_kinds!` to prevent sharing of the variable `i` +# here and in the closure in `_register_kinds!`, which causes boxing and bad inference. +function _register_kinds_names!(int_to_kindstr, kind_str_to_int, module_id, names) + # Process names to conflate category BEGIN/END markers with the first/last + # in the category. + i = 0 + for name in names + normal_kind = false + if startswith(name, "BEGIN_") + j = i + elseif startswith(name, "END_") + j = i - 1 + else + normal_kind = true + j = i + i += 1 + end + kind_int = (module_id << _kind_nbits) | j + push!(kind_str_to_int, name=>kind_int) + if normal_kind + push!(int_to_kindstr, kind_int=>name) + end + end +end + +""" + register_kinds!(mod, module_id, names) + +Register custom `Kind`s with the given `names`, belonging to a module `mod`. +`names` is an array of arbitrary strings. + +In order for kinds to be represented by a small number of bits, some nontrivial +cooperation is required between modules using custom kinds: +* The integer `module_id` is globally unique for each `mod` which will be used + together, and not larger than $_kind_module_id_max. +* No two modules register the same `name`. The semantics of a given `kind` name + should be defined by the module which owns it. + +To allow ranges of kinds to be delimited and quickly tested for, some special +names are allowed: `BEGIN_section` and `END_section` pairs are detected, and +alias the next and previous kind id's respectively so that kinds in `section` +can be tested with `BEGIN_section <= k <= END_section`. +""" +function register_kinds!(mod, module_id, names) + _register_kinds!(_kind_modules, _kind_int_to_str, _kind_str_to_int, mod, module_id, names) +end + +#------------------------------------------------------------------------------- + +""" + K"s" + +The kind of a token or AST internal node with string "s". + +For example +* K")" is the kind of the right parenthesis token +* K"block" is the kind of a block of code (eg, statements within a begin-end). +""" +macro K_str(s) + Kind(s) +end + +""" +A set of kinds which can be used with the `in` operator. For example + + k in KSet"+ - *" +""" +macro KSet_str(str) + kinds = [Kind(s) for s in split(str)] + + quote + ($(kinds...),) + end +end + +""" + kind(x) + +Return the `Kind` of `x`. +""" +kind(k::Kind) = k + + +#------------------------------------------------------------------------------- +# Kinds used by JuliaSyntax +register_kinds!(JuliaSyntax, 0, [ + # Whitespace + "Comment" + "Whitespace" + "NewlineWs" # newline-containing whitespace + + # Identifiers + "BEGIN_IDENTIFIERS" + "Identifier" + "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering + # String and command macro names are modeled as a special kind of + # identifier as they need to be mangled before lookup. + "StrMacroName" + "CmdMacroName" + "END_IDENTIFIERS" + + "BEGIN_KEYWORDS" + "baremodule" + "begin" + "break" + "const" + "continue" + "do" + "export" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "BEGIN_BLOCK_CONTINUATION_KEYWORDS" + "catch" + "finally" + "else" + "elseif" + "end" + "END_BLOCK_CONTINUATION_KEYWORDS" + "BEGIN_CONTEXTUAL_KEYWORDS" + # contextual keywords + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "public" + "type" + "var" + "END_CONTEXTUAL_KEYWORDS" + "END_KEYWORDS" + + "BEGIN_LITERAL" + "BEGIN_NUMBERS" + "Bool" + "Integer" + "BinInt" + "HexInt" + "OctInt" + "Float" + "Float32" + "END_NUMBERS" + "String" + "Char" + "CmdString" + "END_LITERAL" + + "BEGIN_DELIMITERS" + # Punctuation + "@" + "," + ";" + + # Paired delimiters + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + "END_DELIMITERS" + + "BEGIN_OPS" + "ErrorInvalidOperator" + "Error**" + + "..." + + # Level 1 + "BEGIN_ASSIGNMENTS" + "BEGIN_SYNTACTIC_ASSIGNMENTS" + "=" + ".=" + "op=" # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= ) + ".op=" + ":=" + "END_SYNTACTIC_ASSIGNMENTS" + "~" + "≔" + "⩴" + "≕" + "END_ASSIGNMENTS" + + "BEGIN_PAIRARROW" + "=>" + "END_PAIRARROW" + + # Level 2 + "BEGIN_CONDITIONAL" + "?" + "END_CONDITIONAL" + + # Level 3 + "BEGIN_ARROW" + "-->" + "<--" + "<-->" + "←" + "→" + "↔" + "↚" + "↛" + "↞" + "↠" + "↢" + "↣" + "↤" + "↦" + "↮" + "⇎" + "⇍" + "⇏" + "⇐" + "⇒" + "⇔" + "⇴" + "⇶" + "⇷" + "⇸" + "⇹" + "⇺" + "⇻" + "⇼" + "⇽" + "⇾" + "⇿" + "⟵" + "⟶" + "⟷" + "⟹" + "⟺" + "⟻" + "⟼" + "⟽" + "⟾" + "⟿" + "⤀" + "⤁" + "⤂" + "⤃" + "⤄" + "⤅" + "⤆" + "⤇" + "⤌" + "⤍" + "⤎" + "⤏" + "⤐" + "⤑" + "⤔" + "⤕" + "⤖" + "⤗" + "⤘" + "⤝" + "⤞" + "⤟" + "⤠" + "⥄" + "⥅" + "⥆" + "⥇" + "⥈" + "⥊" + "⥋" + "⥎" + "⥐" + "⥒" + "⥓" + "⥖" + "⥗" + "⥚" + "⥛" + "⥞" + "⥟" + "⥢" + "⥤" + "⥦" + "⥧" + "⥨" + "⥩" + "⥪" + "⥫" + "⥬" + "⥭" + "⥰" + "⧴" + "⬱" + "⬰" + "⬲" + "⬳" + "⬴" + "⬵" + "⬶" + "⬷" + "⬸" + "⬹" + "⬺" + "⬻" + "⬼" + "⬽" + "⬾" + "⬿" + "⭀" + "⭁" + "⭂" + "⭃" + "⥷" + "⭄" + "⥺" + "⭇" + "⭈" + "⭉" + "⭊" + "⭋" + "⭌" + "←" + "→" + "⇜" + "⇝" + "↜" + "↝" + "↩" + "↪" + "↫" + "↬" + "↼" + "↽" + "⇀" + "⇁" + "⇄" + "⇆" + "⇇" + "⇉" + "⇋" + "⇌" + "⇚" + "⇛" + "⇠" + "⇢" + "↷" + "↶" + "↺" + "↻" + "🢲" + "END_ARROW" + + # Level 4 + "BEGIN_LAZYOR" + "||" + ".||" + "END_LAZYOR" + + # Level 5 + "BEGIN_LAZYAND" + "&&" + ".&&" + "END_LAZYAND" + + # Level 6 + "BEGIN_COMPARISON" + "<:" + ">:" + ">" + "<" + ">=" + "≥" + "<=" + "≤" + "==" + "===" + "≡" + "!=" + "≠" + "!==" + "≢" + "∈" + "in" + "isa" + "∉" + "∋" + "∌" + "⊆" + "⊈" + "⊂" + "⊄" + "⊊" + "∝" + "∊" + "∍" + "∥" + "∦" + "∷" + "∺" + "∻" + "∽" + "∾" + "≁" + "≃" + "≂" + "≄" + "≅" + "≆" + "≇" + "≈" + "≉" + "≊" + "≋" + "≌" + "≍" + "≎" + "≐" + "≑" + "≒" + "≓" + "≖" + "≗" + "≘" + "≙" + "≚" + "≛" + "≜" + "≝" + "≞" + "≟" + "≣" + "≦" + "≧" + "≨" + "≩" + "≪" + "≫" + "≬" + "≭" + "≮" + "≯" + "≰" + "≱" + "≲" + "≳" + "≴" + "≵" + "≶" + "≷" + "≸" + "≹" + "≺" + "≻" + "≼" + "≽" + "≾" + "≿" + "⊀" + "⊁" + "⊃" + "⊅" + "⊇" + "⊉" + "⊋" + "⊏" + "⊐" + "⊑" + "⊒" + "⊜" + "⊩" + "⊬" + "⊮" + "⊰" + "⊱" + "⊲" + "⊳" + "⊴" + "⊵" + "⊶" + "⊷" + "⋍" + "⋐" + "⋑" + "⋕" + "⋖" + "⋗" + "⋘" + "⋙" + "⋚" + "⋛" + "⋜" + "⋝" + "⋞" + "⋟" + "⋠" + "⋡" + "⋢" + "⋣" + "⋤" + "⋥" + "⋦" + "⋧" + "⋨" + "⋩" + "⋪" + "⋫" + "⋬" + "⋭" + "⋲" + "⋳" + "⋴" + "⋵" + "⋶" + "⋷" + "⋸" + "⋹" + "⋺" + "⋻" + "⋼" + "⋽" + "⋾" + "⋿" + "⟈" + "⟉" + "⟒" + "⦷" + "⧀" + "⧁" + "⧡" + "⧣" + "⧤" + "⧥" + "⩦" + "⩧" + "⩪" + "⩫" + "⩬" + "⩭" + "⩮" + "⩯" + "⩰" + "⩱" + "⩲" + "⩳" + "⩵" + "⩶" + "⩷" + "⩸" + "⩹" + "⩺" + "⩻" + "⩼" + "⩽" + "⩾" + "⩿" + "⪀" + "⪁" + "⪂" + "⪃" + "⪄" + "⪅" + "⪆" + "⪇" + "⪈" + "⪉" + "⪊" + "⪋" + "⪌" + "⪍" + "⪎" + "⪏" + "⪐" + "⪑" + "⪒" + "⪓" + "⪔" + "⪕" + "⪖" + "⪗" + "⪘" + "⪙" + "⪚" + "⪛" + "⪜" + "⪝" + "⪞" + "⪟" + "⪠" + "⪡" + "⪢" + "⪣" + "⪤" + "⪥" + "⪦" + "⪧" + "⪨" + "⪩" + "⪪" + "⪫" + "⪬" + "⪭" + "⪮" + "⪯" + "⪰" + "⪱" + "⪲" + "⪳" + "⪴" + "⪵" + "⪶" + "⪷" + "⪸" + "⪹" + "⪺" + "⪻" + "⪼" + "⪽" + "⪾" + "⪿" + "⫀" + "⫁" + "⫂" + "⫃" + "⫄" + "⫅" + "⫆" + "⫇" + "⫈" + "⫉" + "⫊" + "⫋" + "⫌" + "⫍" + "⫎" + "⫏" + "⫐" + "⫑" + "⫒" + "⫓" + "⫔" + "⫕" + "⫖" + "⫗" + "⫘" + "⫙" + "⫷" + "⫸" + "⫹" + "⫺" + "⊢" + "⊣" + "⟂" + # ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 + "⫪" + "⫫" + "END_COMPARISON" + + # Level 7 + "BEGIN_PIPE" + "<|" + "|>" + "END_PIPE" + + # Level 8 + "BEGIN_COLON" + ":" + ".." + "…" + "⁝" + "⋮" + "⋱" + "⋰" + "⋯" + "END_COLON" + + # Level 9 + "BEGIN_PLUS" + "\$" + "+" + "-" # also used for "−" + "++" + "⊕" + "⊖" + "⊞" + "⊟" + "|" + "∪" + "∨" + "⊔" + "±" + "∓" + "∔" + "∸" + "≏" + "⊎" + "⊻" + "⊽" + "⋎" + "⋓" + "⟇" + "⧺" + "⧻" + "⨈" + "⨢" + "⨣" + "⨤" + "⨥" + "⨦" + "⨧" + "⨨" + "⨩" + "⨪" + "⨫" + "⨬" + "⨭" + "⨮" + "⨹" + "⨺" + "⩁" + "⩂" + "⩅" + "⩊" + "⩌" + "⩏" + "⩐" + "⩒" + "⩔" + "⩖" + "⩗" + "⩛" + "⩝" + "⩡" + "⩢" + "⩣" + "¦" + "END_PLUS" + + # Level 10 + "BEGIN_TIMES" + "*" + "/" + "÷" + "%" + "⋅" # also used for lookalikes "·" and "·" + "∘" + "×" + "\\" + "&" + "∩" + "∧" + "⊗" + "⊘" + "⊙" + "⊚" + "⊛" + "⊠" + "⊡" + "⊓" + "∗" + "∙" + "∤" + "⅋" + "≀" + "⊼" + "⋄" + "⋆" + "⋇" + "⋉" + "⋊" + "⋋" + "⋌" + "⋏" + "⋒" + "⟑" + "⦸" + "⦼" + "⦾" + "⦿" + "⧶" + "⧷" + "⨇" + "⨰" + "⨱" + "⨲" + "⨳" + "⨴" + "⨵" + "⨶" + "⨷" + "⨸" + "⨻" + "⨼" + "⨽" + "⩀" + "⩃" + "⩄" + "⩋" + "⩍" + "⩎" + "⩑" + "⩓" + "⩕" + "⩘" + "⩚" + "⩜" + "⩞" + "⩟" + "⩠" + "⫛" + "⊍" + "▷" + "⨝" + "⟕" + "⟖" + "⟗" + "⌿" + "⨟" + "END_TIMES" + + # Level 11 + "BEGIN_RATIONAL" + "//" + "END_RATIONAL" + + # Level 12 + "BEGIN_BITSHIFTS" + "<<" + ">>" + ">>>" + "END_BITSHIFTS" + + # Level 13 + "BEGIN_POWER" + "^" + "↑" + "↓" + "⇵" + "⟰" + "⟱" + "⤈" + "⤉" + "⤊" + "⤋" + "⤒" + "⤓" + "⥉" + "⥌" + "⥍" + "⥏" + "⥑" + "⥔" + "⥕" + "⥘" + "⥙" + "⥜" + "⥝" + "⥠" + "⥡" + "⥣" + "⥥" + "⥮" + "⥯" + "↑" + "↓" + "END_POWER" + + # Level 14 + "BEGIN_DECL" + "::" + "END_DECL" + + # Level 15 + "BEGIN_WHERE" + "where" + "END_WHERE" + + # Level 16 + "BEGIN_DOT" + "." + "END_DOT" + + "!" + "'" + ".'" + "->" + + "BEGIN_UNICODE_OPS" + "¬" + "√" + "∛" + "∜" + "END_UNICODE_OPS" + "END_OPS" + + # 2. Nonterminals which are exposed in the AST, but where the surface + # syntax doesn't have a token corresponding to the node type. + "BEGIN_SYNTAX_KINDS" + "block" + "call" + "dotcall" + "comparison" + "curly" + "juxtapose" # Numeric juxtaposition like 2x + "string" # A string interior node (possibly containing interpolations) + "cmdstring" # A cmd string node (containing delimiters plus string) + "char" # A char string node (containing delims + char data) + "macrocall" + "parameters" # the list after ; in f(; a=1) + "toplevel" + "tuple" + "ref" + "vect" + "parens" + "importpath" + # Concatenation syntax + "braces" + "bracescat" + "hcat" + "vcat" + "ncat" + "typed_hcat" + "typed_vcat" + "typed_ncat" + "row" + "nrow" + # Comprehensions + "generator" + "filter" + "iteration" + "comprehension" + "typed_comprehension" + "macro_name" + # Container for a single statement/atom plus any trivia and errors + "wrapper" + "END_SYNTAX_KINDS" + + # Special tokens + "TOMBSTONE" # Empty placeholder for kind to be filled later + "None" # Never emitted by lexer/parser + "EndMarker" # EOF + + "BEGIN_ERRORS" + # Tokenization errors + "ErrorEofMultiComment" + "ErrorInvalidNumericConstant" + "ErrorHexFloatMustContainP" + "ErrorAmbiguousNumericConstant" + "ErrorAmbiguousNumericDotMultiply" + "ErrorInvalidInterpolationTerminator" + "ErrorNumericOverflow" + "ErrorInvalidEscapeSequence" + "ErrorOverLongCharacter" + "ErrorInvalidUTF8" + "ErrorInvisibleChar" + "ErrorIdentifierStart" + "ErrorUnknownCharacter" + "ErrorBidiFormatting" + # Generic error + "error" + "END_ERRORS" +]) + +#------------------------------------------------------------------------------- +const _nonunique_kind_names = Set([ + K"Comment" + K"Whitespace" + K"NewlineWs" + K"Identifier" + K"Placeholder" + + K"ErrorEofMultiComment" + K"ErrorInvalidNumericConstant" + K"ErrorHexFloatMustContainP" + K"ErrorAmbiguousNumericConstant" + K"ErrorAmbiguousNumericDotMultiply" + K"ErrorInvalidInterpolationTerminator" + K"ErrorNumericOverflow" + K"ErrorInvalidEscapeSequence" + K"ErrorOverLongCharacter" + K"ErrorInvalidUTF8" + K"ErrorInvisibleChar" + K"ErrorUnknownCharacter" + K"ErrorBidiFormatting" + K"ErrorInvalidOperator" + + K"Bool" + K"Integer" + K"BinInt" + K"HexInt" + K"OctInt" + K"Float" + K"Float32" + K"String" + K"Char" + K"CmdString" + + K"StrMacroName" + K"CmdMacroName" +]) + +""" +Return the string representation of a token kind, or `nothing` if the kind +represents a class of tokens like K"Identifier". + +When `unique=true` only return a string when the kind uniquely defines the +corresponding input token, otherwise return `nothing`. When `unique=false`, +return the name of the kind. + +TODO: Replace `untokenize()` with `Base.string()`? +""" +function untokenize(k::Kind; unique=true) + if unique && k in _nonunique_kind_names + return nothing + else + return string(k) + end +end + +# Error kind => description +const _token_error_descriptions = Dict{Kind, String}( + K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", + K"ErrorInvalidNumericConstant" => "invalid numeric constant", + K"ErrorHexFloatMustContainP" => "hex float literal must contain `p` or `P`", + K"ErrorAmbiguousNumericConstant" => "ambiguous `.` syntax; add whitespace to clarify (eg `1.+2` might be `1.0+2` or `1 .+ 2`)", + K"ErrorAmbiguousNumericDotMultiply" => "numeric constant cannot be implicitly multiplied because it ends with `.`", + K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", + K"ErrorNumericOverflow"=>"overflow in numeric literal", + K"ErrorInvalidEscapeSequence"=>"invalid string escape sequence", + K"ErrorOverLongCharacter"=>"character literal contains multiple characters", + K"ErrorInvalidUTF8"=>"invalid UTF-8 sequence", + K"ErrorInvisibleChar"=>"invisible character", + K"ErrorIdentifierStart" => "identifier cannot begin with character", + K"ErrorUnknownCharacter"=>"unknown unicode character", + K"ErrorBidiFormatting"=>"unbalanced bidirectional unicode formatting", + K"ErrorInvalidOperator" => "invalid operator", + K"Error**" => "use `x^y` instead of `x**y` for exponentiation, and `x...` instead of `**x` for splatting", + K"error" => "unknown error token", +) + +#------------------------------------------------------------------------------- +# Predicates +is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS" +is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS" +is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**" +is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" +is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" <= k <= K"END_BLOCK_CONTINUATION_KEYWORDS" +is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" +is_number(k::Kind) = K"BEGIN_NUMBERS" <= k <= K"END_NUMBERS" +is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" +is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") + +is_identifier(x) = is_identifier(kind(x)) +is_contextual_keyword(x) = is_contextual_keyword(kind(x)) +is_error(x) = is_error(kind(x)) +is_keyword(x) = is_keyword(kind(x)) +is_literal(x) = is_literal(kind(x)) +is_number(x) = is_number(kind(x)) +is_operator(x) = is_operator(kind(x)) +is_word_operator(x) = is_word_operator(kind(x)) + +# Predicates for operator precedence +# FIXME: Review how precedence depends on dottedness, eg +# https://github.com/JuliaLang/julia/pull/36725 +is_prec_assignment(x) = K"BEGIN_ASSIGNMENTS" <= kind(x) <= K"END_ASSIGNMENTS" +is_prec_pair(x) = K"BEGIN_PAIRARROW" <= kind(x) <= K"END_PAIRARROW" +is_prec_conditional(x) = K"BEGIN_CONDITIONAL" <= kind(x) <= K"END_CONDITIONAL" +is_prec_arrow(x) = K"BEGIN_ARROW" <= kind(x) <= K"END_ARROW" +is_prec_lazy_or(x) = K"BEGIN_LAZYOR" <= kind(x) <= K"END_LAZYOR" +is_prec_lazy_and(x) = K"BEGIN_LAZYAND" <= kind(x) <= K"END_LAZYAND" +is_prec_comparison(x) = K"BEGIN_COMPARISON" <= kind(x) <= K"END_COMPARISON" +is_prec_pipe(x) = K"BEGIN_PIPE" <= kind(x) <= K"END_PIPE" +is_prec_colon(x) = K"BEGIN_COLON" <= kind(x) <= K"END_COLON" +is_prec_plus(x) = K"BEGIN_PLUS" <= kind(x) <= K"END_PLUS" +is_prec_bitshift(x) = K"BEGIN_BITSHIFTS" <= kind(x) <= K"END_BITSHIFTS" +is_prec_times(x) = K"BEGIN_TIMES" <= kind(x) <= K"END_TIMES" +is_prec_rational(x) = K"BEGIN_RATIONAL" <= kind(x) <= K"END_RATIONAL" +is_prec_power(x) = K"BEGIN_POWER" <= kind(x) <= K"END_POWER" +is_prec_decl(x) = K"BEGIN_DECL" <= kind(x) <= K"END_DECL" +is_prec_where(x) = K"BEGIN_WHERE" <= kind(x) <= K"END_WHERE" +is_prec_dot(x) = K"BEGIN_DOT" <= kind(x) <= K"END_DOT" +is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS" +is_prec_pipe_lt(x) = kind(x) == K"<|" +is_prec_pipe_gt(x) = kind(x) == K"|>" +is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" +is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS" + +function is_string_delim(x) + kind(x) in (K"\"", K"\"\"\"") +end + +function is_radical_op(x) + kind(x) in (K"√", K"∛", K"∜") +end + +""" +Return true if `x` has whitespace or comment kind +""" +function is_whitespace(x) + k = kind(x) + return k == K"Whitespace" || k == K"NewlineWs" || k == K"Comment" +end + +function is_syntactic_operator(x) + k = kind(x) + # TODO: Do we need to disallow dotted and suffixed forms when this is used + # in the parser? The lexer itself usually disallows such tokens, so it's + # not clear whether we need to handle them. (Though note `.->` is a + # token...) + return k in KSet"&& || . ... ->" || is_syntactic_assignment(k) +end diff --git a/JuliaSyntax/src/julia/literal_parsing.jl b/JuliaSyntax/src/julia/literal_parsing.jl new file mode 100644 index 0000000000000..5a087eac6d54e --- /dev/null +++ b/JuliaSyntax/src/julia/literal_parsing.jl @@ -0,0 +1,473 @@ +""" +Nontrivia tokens (leaf nodes / literals) which are malformed are parsed into +ErrorVal when `ignore_errors=true` during parsing. +""" +struct ErrorVal +end + +Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red) + +#------------------------------------------------------------------------------- +# This file contains utility functions for converting undecorated source +# strings into Julia values. For example, string->number, string unescaping, etc. + +function parse_int_literal(str::AbstractString) + # TODO: A specialized code path here can be a lot faster and also + # allocation free + str = replace(replace(str, '_'=>""), '−'=>'-') + x = Base.tryparse(Int, str) + if Int === Int32 && isnothing(x) + x = Base.tryparse(Int64, str) + end + if isnothing(x) + x = Base.tryparse(Int128, str) + if x === nothing + x = Base.parse(BigInt, str) + end + end + return x +end + +function parse_uint_literal(str::AbstractString, k) + str = replace(str, '_'=>"") + if startswith(str, '+') + str = str[2:end] + end + ndigits = length(str)-2 + if k == K"HexInt" + return ndigits <= 2 ? Base.parse(UInt8, str) : + ndigits <= 4 ? Base.parse(UInt16, str) : + ndigits <= 8 ? Base.parse(UInt32, str) : + ndigits <= 16 ? Base.parse(UInt64, str) : + ndigits <= 32 ? Base.parse(UInt128, str) : + Base.parse(BigInt, str) + elseif k == K"BinInt" + ndigits = length(str)-2 + return ndigits <= 8 ? Base.parse(UInt8, str) : + ndigits <= 16 ? Base.parse(UInt16, str) : + ndigits <= 32 ? Base.parse(UInt32, str) : + ndigits <= 64 ? Base.parse(UInt64, str) : + ndigits <= 128 ? Base.parse(UInt128, str) : + Base.parse(BigInt, str) + elseif k == K"OctInt" + x = Base.tryparse(UInt64, str) + if isnothing(x) + x = Base.tryparse(UInt128, str) + if isnothing(x) + x = Base.parse(BigInt, str) + elseif ndigits > 43 + x = BigInt(x) + end + else + x = ndigits <= 3 && x <= typemax(UInt8) ? UInt8(x) : + ndigits <= 6 && x <= typemax(UInt16) ? UInt16(x) : + ndigits <= 11 && x <= typemax(UInt32) ? UInt32(x) : + ndigits <= 22 ? x : + ndigits <= 43 ? UInt128(x) : + BigInt(x) + end + return x + end +end + +#------------------------------------------------------------------------------- +""" +Like `Base.parse(Union{Float64,Float32}, str)`, but permits float underflow + +Parse a Float64. str[firstind:lastind] must be a valid floating point literal +string. If the value is outside Float64 range. +""" +function parse_float_literal(::Type{T}, str::Union{String,SubString,Vector{UInt8}}, + firstind::Integer, endind::Integer) where {T} # force specialize with where {T} + strsize = endind - firstind + bufsz = 50 + if strsize < bufsz + buf = Ref{NTuple{bufsz, UInt8}}() + ptr = Base.unsafe_convert(Ptr{UInt8}, pointer_from_objref(buf)) + GC.@preserve str buf begin + n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize) + _unsafe_parse_float(T, ptr, n) + end + else + # Slower path with allocation. + buf = Vector{UInt8}(undef, strsize+1) + ptr = pointer(buf) + GC.@preserve str buf begin + n = _copy_normalize_number!(ptr, pointer(str, firstind), strsize) + _unsafe_parse_float(T, ptr, n) + end + end +end + +# Like replace(replace(str, '_'=>""), '−'=>'-') +# dest must be of size at least srcsize+1 +function _copy_normalize_number!(dest, src, srcsize) + i = 0 + j = 0 + while i < srcsize + b = unsafe_load(src + i) + if b == UInt8('_') + i += 1 + continue + elseif b == 0xe2 && i+2 < srcsize && + unsafe_load(src + i + 1) == 0x88 && + unsafe_load(src + i + 2) == 0x92 + # src at i,i+1,i+2 is UTF-8 code for unicode minus sign '−' + b = UInt8('-') + i += 2 + end + unsafe_store!(dest+j, b) + i += 1 + j += 1 + end + unsafe_store!(dest+j, 0x00) + return j +end + +# Internals of parse_float_literal, split into a separate function to avoid some +# apparent codegen issues https://github.com/JuliaLang/julia/issues/46509 +# (perhaps we don't want the `buf` in `GC.@preserve buf` to be stack allocated +# on one branch and heap allocated in another?) +@inline function _unsafe_parse_float(::Type{Float64}, ptr, strsize) + Libc.errno(0) + endptr = Ref{Ptr{UInt8}}(C_NULL) + x = ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr) + @check endptr[] == ptr + strsize + status = :ok + if Libc.errno() == Libc.ERANGE + # strtod man page: + # * If the correct value would cause overflow, plus or + # minus HUGE_VAL, HUGE_VALF, or HUGE_VALL is returned and + # ERANGE is stored in errno. + # * If the correct value would cause underflow, a value with + # magnitude no larger than DBL_MIN, FLT_MIN, or LDBL_MIN is + # returned and ERANGE is stored in errno. + status = abs(x) < 1 ? :underflow : :overflow + end + return (x, status) +end + +@inline function _unsafe_parse_float(::Type{Float32}, ptr, strsize) + # Convert float exponent 'f' to 'e' for strtof, eg, 1.0f0 => 1.0e0 + # Presumes we can modify the data in ptr! + for p in ptr+strsize-1:-1:ptr + if unsafe_load(p) == UInt8('f') + unsafe_store!(p, UInt8('e')) + break + end + end + Libc.errno(0) + endptr = Ref{Ptr{UInt8}}(C_NULL) + status = :ok + @static if Sys.iswindows() + # Call strtod here and convert to Float32 on the Julia side because + # strtof seems buggy on windows and doesn't set ERANGE correctly on + # overflow. See also + # https://github.com/JuliaLang/julia/issues/46544 + x = Float32(ccall(:jl_strtod_c, Cdouble, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr)) + if isinf(x) + status = :overflow + # Underflow not detected, but that will only be a warning elsewhere. + end + else + x = ccall(:jl_strtof_c, Cfloat, (Ptr{UInt8}, Ptr{Ptr{UInt8}}), ptr, endptr) + end + @check endptr[] == ptr + strsize + if Libc.errno() == Libc.ERANGE + status = abs(x) < 1 ? :underflow : :overflow + end + return (x, status) +end + + +#------------------------------------------------------------------------------- +""" +Process Julia source code escape sequences for raw strings +""" +function unescape_raw_string(io::IO, txtbuf::Vector{UInt8}, + firstind, endind, is_cmd::Bool) + delim = is_cmd ? u8"`" : u8"\"" + i = firstind + while i < endind + c = txtbuf[i] + if c != u8"\\" + if c == u8"\r" + # convert literal \r and \r\n in strings to \n (issue #11988) + if i+1 < endind && txtbuf[i+1] == u8"\n" + i += 1 + end + c = u8"\n" + end + write(io, c) + i += 1 + continue + end + # Process \ escape sequences + j = i + while j < endind && txtbuf[j] == u8"\\" + j += 1 + end + nbackslash = j - i + if (j < endind && txtbuf[j] == delim) || j >= endind + # Backslashes before a delimiter must also be escaped + nbackslash = div(nbackslash,2) + end + for k = 1:nbackslash + write(io, u8"\\") + end + i = j + if i < endind + write(io, txtbuf[i]) + i += 1 + end + end +end + +""" +Process Julia source code escape sequences for non-raw strings. +`txtbuf` should be passed without delimiting quotes. +""" +function unescape_julia_string(io::IO, txtbuf::Vector{UInt8}, + firstind, endind, diagnostics) + had_error = false + i = firstind + while i < endind + c = txtbuf[i] + if c != u8"\\" + if c == u8"\r" + # convert literal \r and \r\n in strings to \n (issue #11988) + if i+1 < endind && txtbuf[i+1] == u8"\n" + i += 1 + end + c = u8"\n" + end + write(io, c) + i = nextind(txtbuf, i) + continue + end + # Process \ escape sequences. See also Base.unescape_string which some + # of this code derives from (but which disallows \` \' \$) + escstart = i + i += 1 + if i >= endind + emit_diagnostic(diagnostics, escstart:endind-1, + error="invalid escape sequence") + had_error = true + break + end + c = txtbuf[i] + if c == u8"x" || c == u8"u" || c == u8"U" + n = k = 0 + m = c == u8"x" ? 2 : + c == u8"u" ? 4 : 8 + while (k += 1) <= m && i+1 < endind + nc = txtbuf[i+1] + n = u8"0" <= nc <= u8"9" ? n<<4 + (nc-u8"0") : + u8"a" <= nc <= u8"f" ? n<<4 + (nc-u8"a"+10) : + u8"A" <= nc <= u8"F" ? n<<4 + (nc-u8"A"+10) : break + i += 1 + end + if k == 1 || n > 0x10ffff + u = m == 4 ? u8"u" : u8"U" + msg = (m == 2) ? "invalid hex escape sequence" : + "invalid unicode escape sequence" + emit_diagnostic(diagnostics, escstart:i, error=msg) + had_error = true + else + if m == 2 # \x escape sequence + write(io, UInt8(n)) + else + print(io, Char(n)) + end + end + elseif u8"0" <= c <= u8"7" + k = 1 + n = Int(c - u8"0") + while (k += 1) <= 3 && i+1 < endind + c = txtbuf[i+1] + n = (u8"0" <= c <= u8"7") ? n<<3 + c-u8"0" : break + i += 1 + end + if n > 255 + emit_diagnostic(diagnostics, escstart:i, + error="invalid octal escape sequence") + had_error = true + else + write(io, UInt8(n)) + end + else + u = # C escapes + c == u8"n" ? u8"\n" : + c == u8"t" ? u8"\t" : + c == u8"r" ? u8"\r" : + c == u8"e" ? u8"\e" : + c == u8"b" ? u8"\b" : + c == u8"f" ? u8"\f" : + c == u8"v" ? u8"\v" : + c == u8"a" ? u8"\a" : + # Literal escapes allowed in Julia source + c == u8"\\" ? u8"\\" : + c == u8"'" ? u8"'" : + c == u8"\"" ? u8"\"" : + c == u8"$" ? u8"$" : + c == u8"`" ? u8"`" : + nothing + if isnothing(u) + emit_diagnostic(diagnostics, escstart:i, + error="invalid escape sequence") + had_error = true + else + write(io, u) + end + end + # For non-ascii characters we may not be in the middle of the UTF-8 + # encoding for that char, but this doesn't matter because unescaping + # only relies on the ascii subset. + i += 1 + end + return had_error +end + +#------------------------------------------------------------------------------- +# Unicode normalization. As of Julia 1.8, this is part of Base and the Unicode +# stdlib under the name `Unicode.julia_chartransform`. See +# https://github.com/JuliaLang/julia/pull/42561 +# +# To allow use on older Julia versions and to workaround the bug +# https://github.com/JuliaLang/julia/issues/45716 +# we reproduce a specialized version of that logic here. + +# static wrapper around user callback function +function utf8proc_custom_func(codepoint::UInt32, ::Ptr{Cvoid})::UInt32 + (codepoint == 0x025B ? 0x03B5 : # 'ɛ' => 'ε' + codepoint == 0x00B5 ? 0x03BC : # 'µ' => 'μ' + codepoint == 0x00B7 ? 0x22C5 : # '·' => '⋅' + codepoint == 0x0387 ? 0x22C5 : # '·' => '⋅' + codepoint == 0x2212 ? 0x002D : # '−' (\minus) => '-' + codepoint == 0x210F ? 0x0127 : # 'ℏ' (\hslash) => 'ħ' \hbar + codepoint) +end + +function utf8proc_decompose(str, options, buffer, nwords) + ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ptr{Cvoid}), + str, sizeof(str), buffer, nwords, options, + @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ptr{Cvoid})), C_NULL) + ret < 0 && Base.Unicode.utf8proc_error(ret) + return ret +end + +function utf8proc_map(str::Union{String,SubString{String}}, options::Integer) + nwords = utf8proc_decompose(str, options, C_NULL, 0) + buffer = Base.StringVector(nwords*4) + nwords = utf8proc_decompose(str, options, buffer, nwords) + nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options) + nbytes < 0 && Base.Unicode.utf8proc_error(nbytes) + return String(resize!(buffer, nbytes)) +end + +function normalize_identifier(str) + flags = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE + return isascii(str) ? str : utf8proc_map(str, flags) +end + + +#------------------------------------------------------------------------------- +function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) + k = kind(head) + # Any errors parsing literals are represented as ErrorVal() - this can + # happen when the user sets `ignore_errors=true` during parsing. + if k == K"Float" + v, code = parse_float_literal(Float64, txtbuf, first(srcrange), + last(srcrange)+1) + return (code === :ok || code === :underflow) ? v : ErrorVal() + elseif k == K"Float32" + v, code = parse_float_literal(Float32, txtbuf, first(srcrange), + last(srcrange)+1) + return (code === :ok || code === :underflow) ? v : ErrorVal() + elseif k == K"Char" + io = IOBuffer() + had_error = unescape_julia_string(io, txtbuf, first(srcrange), + last(srcrange)+1, Diagnostic[]) + if had_error + return ErrorVal() + else + seek(io, 0) + c = read(io, Char) + return eof(io) ? c : ErrorVal() + end + elseif k in KSet"String CmdString" + io = IOBuffer() + had_error = false + if has_flags(head, RAW_STRING_FLAG) + unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, + k == K"CmdString") + else + had_error = unescape_julia_string(io, txtbuf, first(srcrange), + last(srcrange)+1, Diagnostic[]) + end + return had_error ? ErrorVal() : String(take!(io)) + elseif k == K"Bool" + return txtbuf[first(srcrange)] == u8"t" + end + + # TODO: Avoid allocating temporary String here + val_str = String(txtbuf[srcrange]) + if k == K"Integer" + parse_int_literal(val_str) + elseif k in KSet"BinInt OctInt HexInt" + parse_uint_literal(val_str, k) + elseif is_identifier(k) + if has_flags(head, RAW_STRING_FLAG) + io = IOBuffer() + unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) + Symbol(normalize_identifier(String(take!(io)))) + else + Symbol(normalize_identifier(val_str)) + end + elseif is_operator(k) + isempty(srcrange) ? + Symbol(untokenize(k)) : # synthetic invisible tokens + Symbol(normalize_identifier(val_str)) + elseif k == K"error" + ErrorVal() + elseif is_syntax_kind(head) + nothing + elseif is_keyword(k) + # This should only happen for tokens nested inside errors + Symbol(val_str) + else + # Other kinds should only happen for tokens nested inside errors + # TODO: Consolidate this with the is_keyword() above? Something else? + ErrorVal() + end +end + +""" + lower_identifier_name(name, kind) + +Lower a Julia identifier `name` of given `kind` to the name used by the Julia +runtime. (In particular, this handles the name mangling of macros.) + +This is a lowering (rather than parsing) step, but is needed for `Expr` +conversion and is also used for pretty printing. +""" +function lower_identifier_name(name::AbstractString, k::Kind) + # Replicate eager lowering done by the flisp parser + if k == K"macro_name" + name == "." ? "@__dot__" : "@$name" + elseif k == K"StrMacroName" + "@$(name)_str" + elseif k == K"CmdMacroName" + "@$(name)_cmd" + else + name + end +end + +function lower_identifier_name(name::Symbol, k::Kind) + if k == K"Identifier" + name # avoid unnecessary conversion + else + Symbol(lower_identifier_name(string(name), k)) + end +end diff --git a/JuliaSyntax/src/julia/parser.jl b/JuliaSyntax/src/julia/parser.jl new file mode 100644 index 0000000000000..70a345057a56b --- /dev/null +++ b/JuliaSyntax/src/julia/parser.jl @@ -0,0 +1,3693 @@ +""" + ParseState(stream::ParseStream) + +ParseState is an internal data structure wrapping `ParseStream` to carry parser +context as we recursively descend into the parse tree. For example, normally +`x -y` means `(x) - (y)`, but when parsing matrix literals we're in +`space_sensitive` mode, and `[x -y]` means [(x) (-y)]. +""" +struct ParseState + stream::ParseStream + + # Disable range colon for parsing ternary conditional operator + range_colon_enabled::Bool + # In space-sensitive mode "x -y" is 2 expressions, not a subtraction + space_sensitive::Bool + # Seeing `for` stops parsing macro arguments and makes a generator + for_generator::Bool + # Treat 'end' like a normal symbol instead of a reserved word + end_symbol::Bool + # Treat newline like ordinary whitespace instead of as a potential separator + whitespace_newline::Bool + # Enable parsing `where` with high precedence + where_enabled::Bool +end + +# Normal context +function ParseState(stream::ParseStream) + ParseState(stream, true, false, false, false, false, true) +end + +function ParseState(ps::ParseState; range_colon_enabled=nothing, + space_sensitive=nothing, for_generator=nothing, + end_symbol=nothing, whitespace_newline=nothing, + where_enabled=nothing) + ParseState(ps.stream, + range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, + space_sensitive === nothing ? ps.space_sensitive : space_sensitive, + for_generator === nothing ? ps.for_generator : for_generator, + end_symbol === nothing ? ps.end_symbol : end_symbol, + whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, + where_enabled === nothing ? ps.where_enabled : where_enabled) +end + +# Functions to change parse state + +function normal_context(ps::ParseState) + ParseState(ps, + range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + for_generator=false, + end_symbol=false, + whitespace_newline=false) +end + +function with_space_sensitive(ps::ParseState) + ParseState(ps, + space_sensitive=true, + whitespace_newline=false) +end + +# Convenient wrappers for ParseStream + +function Base.peek(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek(ps.stream, n; skip_newlines=skip_nl) +end + +function peek_token(ps::ParseState, n=1; skip_newlines=nothing) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek_token(ps.stream, n, skip_newlines=skip_nl) +end + +function peek_full_token(ps::ParseState, n=1; skip_newlines=nothing, kws...) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + peek_full_token(ps.stream, n; skip_newlines=skip_nl, kws...) +end + +function peek_behind(ps::ParseState, args...; kws...) + peek_behind(ps.stream, args...; kws...) +end + +function peek_behind_pos(ps::ParseState, args...; kws...) + peek_behind_pos(ps.stream, args...; kws...) +end + +function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...) + skip_nl = isnothing(skip_newlines) ? ps.whitespace_newline : skip_newlines + bump(ps.stream, flags; skip_newlines=skip_nl, kws...) +end + +function bump_trivia(ps::ParseState, args...; kws...) + bump_trivia(ps.stream, args...; kws...) +end + +function bump_invisible(ps::ParseState, args...; kws...) + bump_invisible(ps.stream, args...; kws...) +end + +function bump_glue(ps::ParseState, args...; kws...) + bump_glue(ps.stream, args...; kws...) +end + +function bump_split(ps::ParseState, args...; kws...) + bump_split(ps.stream, args...; kws...) +end + +function reset_node!(ps::ParseState, args...; kws...) + reset_node!(ps.stream, args...; kws...) +end + +function steal_token_bytes!(ps::ParseState, args...) + steal_token_bytes!(ps.stream, args...) +end + +function Base.position(ps::ParseState, args...) + position(ps.stream, args...) +end + +function emit(ps::ParseState, args...; kws...) + emit(ps.stream, args...; kws...) +end + +function emit_diagnostic(ps::ParseState, args...; kws...) + emit_diagnostic(ps.stream, args...; kws...) +end + +function unsafe_textbuf(ps::ParseState) + unsafe_textbuf(ps.stream) +end + +function first_child_position(ps::ParseState, pos::ParseStreamPosition) + first_child_position(ps.stream, pos) +end + +function last_child_position(ps::ParseState, pos::ParseStreamPosition) + last_child_position(ps.stream, pos) +end +#------------------------------------------------------------------------------- +# Parser Utils + +# Bump an expected closing token. If not found, discard unexpected tokens +# until we find it or another closing token. +# +# Crude recovery heuristic: bump any tokens which aren't block or bracket +# closing tokens. +function bump_closing_token(ps, closing_kind, alternative_closer_hint=nothing) + # todo: Refactor with recover() ? + if peek(ps) == closing_kind + bump_trivia(ps) + bump(ps, TRIVIA_FLAG) + return + end + errmsg = "Expected `$(untokenize(closing_kind))`" + if !isnothing(alternative_closer_hint) + errmsg *= alternative_closer_hint + end + # We didn't find the closing token. Read ahead in the stream + mark = position(ps) + emit_diagnostic(ps, mark, mark, error=errmsg) + while true + k = peek(ps) + if is_closing_token(ps, k) && !(k in KSet", ;") + break + end + bump(ps) + end + # mark as trivia => ignore in AST. + emit(ps, mark, K"error", TRIVIA_FLAG) + if peek(ps) == closing_kind + bump(ps, TRIVIA_FLAG) + end +end + +# Read tokens until we find an expected closing token. +# Bump the big pile of resulting tokens as a single nontrivia error token +function recover(is_closer::Function, ps, flags=EMPTY_FLAGS; mark = position(ps), error="unexpected tokens") + while true + k = peek(ps) + if k == K"EndMarker" + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="premature end of input") + break + elseif is_closer(ps, k) + break + end + bump(ps) + end + emit(ps, mark, K"error", flags, error=error) +end + +@noinline function min_supported_version_err(ps, mark, message, min_ver) + major, minor = ps.stream.version + msg = "$message not supported in Julia version $major.$minor < $(min_ver.major).$(min_ver.minor)" + emit(ps, mark, K"error", error=msg) +end + +# Emit an error if the version is less than `min_ver` +function min_supported_version(min_ver, ps, mark, message) + if ps.stream.version < (min_ver.major, min_ver.minor) + min_supported_version_err(ps, mark, message, min_ver) + end +end + +# flisp: disallow-space +function bump_disallowed_space(ps) + if preceding_whitespace(peek_token(ps)) + bump_trivia(ps, TRIVIA_FLAG, skip_newlines=false, + error="whitespace is not allowed here") + end +end + +function bump_semicolon_trivia(ps) + while peek(ps) in KSet"; NewlineWs" + bump(ps, TRIVIA_FLAG) + end +end + +#------------------------------------------------------------------------------- +# Parsing-specific predicates on tokens/kinds +# +# All these take either a raw kind or a token. + +function is_plain_equals(t) + kind(t) == K"=" && !is_suffixed(t) +end + +function is_closing_token(ps::ParseState, k) + k = kind(k) + return k in KSet"else elseif catch finally , ) ] } ; EndMarker" || + (k == K"end" && !ps.end_symbol) +end + +function is_block_continuation_keyword(ps::ParseState, k) + is_block_continuation_keyword(k) && !(ps.end_symbol && k == K"end") +end + +function is_closer_or_newline(ps::ParseState, k) + is_closing_token(ps,k) || k == K"NewlineWs" +end + +function is_initial_reserved_word(ps::ParseState, k) + k = kind(k) + is_iresword = k in KSet"begin while if for try return break continue function + macro quote let local global const do struct module + baremodule using import export" + # `begin` means firstindex(a) inside a[...] + return is_iresword && !(k == K"begin" && ps.end_symbol) +end + +function is_reserved_word(k) + k = kind(k) + is_keyword(k) && !is_contextual_keyword(k) +end + +# Return true if the next word (or word pair) is reserved, introducing a +# syntactic structure. +function peek_initial_reserved_words(ps::ParseState) + k = peek(ps) + if is_initial_reserved_word(ps, k) + return true + elseif is_contextual_keyword(k) + k2 = peek(ps, 2, skip_newlines=false) + return (k == K"mutable" && k2 == K"struct") || + (k == K"primitive" && k2 == K"type") || + (k == K"abstract" && k2 == K"type") + else + return false + end +end + +function is_block_form(k) + kind(k) in KSet"block quote if for while let function macro + abstract primitive struct try module" +end + +function is_syntactic_unary_op(k) + kind(k) in KSet"$ & ::" +end + +function is_type_operator(t, isdot) + kind(t) in KSet"<: >:" && !isdot +end + +function is_unary_op(t, isdot) + k = kind(t) + !is_suffixed(t) && ( + (k in KSet"<: >:" && !isdot) || + k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed + ) +end + +# Operators that are both unary and binary +function is_both_unary_and_binary(t, isdot) + k = kind(t) + # Preventing is_suffixed here makes this consistent with the flisp parser. + # But is this by design or happenstance? + !is_suffixed(t) && ( + k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !isdot) + ) +end + +function is_string_macro_suffix(k) + k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k) +end + +# flisp: invalid-identifier? +function is_valid_identifier(k) + k = kind(k) + !(is_syntactic_operator(k) || k in KSet"? .'") +end + +# The expression is a call after stripping `where` and `::` +function was_eventually_call(ps::ParseState) + stream = ps.stream + p = peek_behind_pos(ps) + while true + b = peek_behind(stream, p) + if b.kind == K"call" + return true + elseif b.kind == K"where" || b.kind == K"parens" || + (b.kind == K"::" && has_flags(b.flags, INFIX_FLAG)) + if b.kind == K"::" + p_last = last_child_position(ps, p) + if p == p_last + return false + end + end + p = first_child_position(ps, p) + else + return false + end + end +end + + +#------------------------------------------------------------------------------- +# Parser +# +# The definitions and top-level comments here were copied to match the +# structure of Julia's previous flisp-based parser to make both codebases +# mutually understandable and make porting changes simple. +# +# The `parse_*` functions are listed here roughly in order of increasing +# precedence (lowest to highest binding power). A few helper functions are +# interspersed. + +# parse left-to-right binary operator +# produces structures like (+ (+ (+ 2 3) 4) 5) +# +# flisp: parse-LtoR +function parse_LtoR(ps::ParseState, down, is_op) + mark = position(ps) + down(ps) + while true + isdot, tk = peek_dotted_op_token(ps) + is_op(tk) || break + isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + bump(ps, remap_kind=K"Identifier") + down(ps) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end +end + +# parse right-to-left binary operator +# produces structures like (=> a (=> b (=> c d))) +# +# flisp: parse-RtoL +function parse_RtoL(ps::ParseState, down, is_op, self) + mark = position(ps) + down(ps) + isdot, tk = peek_dotted_op_token(ps) + if is_op(tk) + bump_dotted(ps, isdot, remap_kind=K"Identifier") + self(ps) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end +end + +# parse block-like structures +# +# `delimiters` are a set of token kinds acting as delimiters; `closing_tokens` +# stop the parsing. +# +# Returns true if the block was nontrivial and a node needs to be emitted by +# the caller. +# +# flisp: parse-Nary +function parse_Nary(ps::ParseState, down, delimiters, closing_tokens) + bump_trivia(ps) + k = peek(ps) + if k in closing_tokens + return true + end + n_delims = 0 + if k in delimiters + # allow leading delimiters + # ; a ==> (block a) + else + # a ; b ==> (block a b) + down(ps) + end + while peek(ps) in delimiters + bump(ps, TRIVIA_FLAG) + n_delims += 1 + k = peek(ps) + if k == K"EndMarker" || k in closing_tokens + break + elseif k in delimiters + # ignore empty delimited sections + # a;;;b ==> (block a b) + continue + end + down(ps) + end + return n_delims != 0 +end + +# Parse a sequence of top level statements separated by newlines, all wrapped +# in a toplevel node. +# +# a \n b ==> (toplevel a b) +# +# Note that parse_stmts can also emit toplevel nodes for semicolon-separated +# statements, so it's possible for these to be nested one level deep. +# +# a;b \n c;d ==> (toplevel (toplevel a b) (toplevel c d)) +function parse_toplevel(ps::ParseState) + mark = position(ps) + while true + if peek(ps, skip_newlines=true) == K"EndMarker" + # Allow end of input if there is nothing left but whitespace + # a \n \n ==> (toplevel a) + # Empty files + # ==> (toplevel) + bump_trivia(ps) + break + else + bump_trivia(ps) + parse_stmts(ps) + end + end + emit(ps, mark, K"toplevel") + nothing +end + +# Parse a newline or semicolon-delimited list of expressions. +# Repeated delimiters are allowed but ignored +# a;b;c ==> (block a b c) +# a;;;b;; ==> (block a b) +# ;a ==> (block a) +# \n a ==> (block a) +# a \n b ==> (block a b) +# +# flisp: parse-block +function parse_block(ps::ParseState, down=parse_eq, mark=position(ps)) + parse_block_inner(ps::ParseState, down) + emit(ps, mark, K"block") +end + +# Parse a block, but leave emitting the block up to the caller. +function parse_block_inner(ps::ParseState, down::F) where {F <: Function} + parse_Nary(ps, down, KSet"NewlineWs ;", KSet"end else elseif catch finally") +end + +# ";" at the top level produces a sequence of top level expressions +# +# a;b;c ==> (toplevel a b c) +# a;;;b;; ==> (toplevel a b) +# "x" a ; "y" b ==> (toplevel (doc (string "x") a) (doc (string "y") b)) +# +# flisp: parse-stmts +function parse_stmts(ps::ParseState) + mark = position(ps) + do_emit = parse_Nary(ps, parse_public, (K";",), (K"NewlineWs",)) + # check for unparsed junk after an expression + junk_mark = position(ps) + while peek(ps) ∉ KSet"EndMarker NewlineWs" + # Error recovery + bump(ps) + end + if junk_mark != position(ps) + # x y ==> x (error-t y) + emit(ps, junk_mark, K"error", TRIVIA_FLAG, + error="extra tokens after end of expression") + end + if do_emit + emit(ps, mark, K"toplevel", TOPLEVEL_SEMICOLONS_FLAG) + end +end + +# Parse `public foo, bar` +# +# We *only* call this from toplevel contexts (file and module level) for +# compatibility. In the future we should probably make public a full fledged +# keyword like `export`. +function parse_public(ps::ParseState) + if ps.stream.version >= (1, 11) && peek(ps) == K"public" + if peek(ps, 2) ∈ KSet"( = [" + # this branch is for compatibility with use of public as a non-keyword. + # it should be removed at some point. + emit_diagnostic(ps, warning="using public as an identifier is deprecated") + else + return parse_resword(ps) + end + end + parse_docstring(ps) +end + +# Parse docstrings attached by a space or single newline +# +# flisp: parse-docstring +function parse_docstring(ps::ParseState, down=parse_eq) + mark = position(ps) + down(ps) + if peek_behind(ps).kind == K"string" + is_doc = true + k = peek(ps) + if is_closing_token(ps, k) + # "notdoc" ] ==> (string "notdoc") + is_doc = false + elseif k == K"NewlineWs" + k2 = peek(ps, 2) + if is_closing_token(ps, k2) || k2 == K"NewlineWs" + # "notdoc" \n] ==> (string "notdoc") + # "notdoc" \n\n foo ==> (string "notdoc") + is_doc = false + else + # Allow a single newline + # "doc" \n foo ==> (doc (string "doc") foo) + bump(ps, TRIVIA_FLAG) # NewlineWs + end + else + # "doc" foo ==> (doc (string "doc") foo) + # "doc $x" foo ==> (doc (string "doc " x) foo) + # Allow docstrings with embedded trailing whitespace trivia + # """\n doc\n """ foo ==> (doc (string-s "doc\n") foo) + end + if is_doc + down(ps) + emit(ps, mark, K"doc") + end + end +end + +# Parse assignments with comma separated lists on each side +# a = b ==> (= a b) +# a .= b ==> (.= a b) +# a += b ==> (+= a b) +# a .+= b ==> (.+= a b) +# a, b = c, d ==> (= (tuple a b) (tuple c d)) +# x, = xs ==> (= (tuple x) xs) +# +# flisp: parse-eq +function parse_eq(ps::ParseState) + parse_assignment(ps, parse_comma) +end + +# parse_eq_star is used where commas are special, for example in an argument list +# +# flisp: parse-eq* +function parse_eq_star(ps::ParseState) + k = peek(ps) + k2 = peek(ps,2) + if (is_literal(k) || k == K"Identifier") && k2 in KSet", ) } ]" + # optimization: skip checking the whole precedence stack if we have a + # simple token followed by a common closing token + bump(ps) + else + parse_assignment(ps, parse_pair) + end +end + +# a = b ==> (= a b) +# +# flisp: parse-assignment +function parse_assignment(ps::ParseState, down) + mark = position(ps) + down(ps) + parse_assignment_with_initial_ex(ps, mark, down) +end + +function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {T} # where => specialize on `down` + isdot, t = peek_dotted_op_token(ps) + k = kind(t) + if !is_prec_assignment(k) + return + end + if k == K"~" + if ps.space_sensitive && preceding_whitespace(t) && !preceding_whitespace(peek_token(ps, 2)) + # Unary ~ in space sensitive context is not assignment precedence + # [a ~b] ==> (hcat a (call-pre ~ b)) + return + end + # ~ is currently the only assignment-precedence operator which is parsed as a call. + # TODO: Make the other non-syntactic assignments such as `≔ ⩴ ≕` into calls as well? + # a ~ b ==> (call-i a ~ b) + # a .~ b ==> (dotcall-i a ~ b) + # [a ~ b c] ==> (hcat (call-i a ~ b) c) + # [a~b] ==> (vect (call-i a ~ b)) + bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_trivia(ps) + parse_assignment(ps, down) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + else + # f() = 1 ==> (function-= (call f) 1) + # f() .= 1 ==> (.= (call f) 1) + # a += b ==> (+= a b) + # a .= b ==> (.= a b) + is_short_form_func = k == K"=" && !isdot && was_eventually_call(ps) + if k == K"op=" + # x += y ==> (op= x + y) + # x .+= y ==> (.op= x + y) + bump_trivia(ps) + isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + bump_split(ps, + (-1, K"Identifier", EMPTY_FLAGS), # op + (1, K"=", TRIVIA_FLAG)) + else + bump_dotted(ps, isdot, TRIVIA_FLAG) + end + bump_trivia(ps) + # Syntax Edition TODO: We'd like to call `down` here when + # is_short_form_func is true, to prevent `f() = 1 = 2` from parsing. + parse_assignment(ps, down) + emit(ps, mark, + is_short_form_func ? K"function" : (isdot ? dotted(k) : k), + is_short_form_func ? SHORT_FORM_FUNCTION_FLAG : flags(t)) + end +end + +# parse_comma is needed for commas outside parens, for example a = b,c +# +# flisp: parse-comma +function parse_comma(ps::ParseState, do_emit=true) + mark = position(ps) + n_commas = 0 + parse_pair(ps) + while true + if peek(ps) != K"," + if do_emit && n_commas >= 1 + emit(ps, mark, K"tuple") + end + return n_commas + end + bump(ps, TRIVIA_FLAG) + n_commas += 1 + if is_plain_equals(peek_token(ps)) + # Allow trailing comma before `=` + # x, = xs ==> (tuple x) + continue + end + parse_pair(ps) + end +end + +# flisp: parse-pair +# a => b ==> (call-i a => b) +# a .=> b ==> (dotcall-i a => b) +function parse_pair(ps::ParseState) + parse_RtoL(ps, parse_cond, is_prec_pair, parse_pair) +end + +# Parse short form conditional expression +# a ? b : c ==> (? a b c) +# +# flisp: parse-cond +function parse_cond(ps::ParseState) + mark = position(ps) + parse_arrow(ps) + t = peek_token(ps) + if kind(t) != K"?" + return + end + if !preceding_whitespace(t) + # a? b : c => (? a (error-t) b c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required before `?` operator") + end + bump(ps, TRIVIA_FLAG) # ? + t = peek_token(ps) + if !preceding_whitespace(t) + # a ?b : c ==> (? a (error-t) b c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required after `?` operator") + end + parse_eq_star(ParseState(ps, range_colon_enabled=false)) + t = peek_token(ps) + if !preceding_whitespace(t) + # a ? b: c ==> (? a b (error-t) c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required before `:` in `?` expression") + end + if kind(t) == K":" + bump(ps, TRIVIA_FLAG) + else + # a ? b c ==> (? a b (error-t) c) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression") + end + t = peek_token(ps; skip_newlines = true) + if !preceding_whitespace(t) + # a ? b :c ==> (? a b (error-t) c) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="space required after `:` in `?` expression") + end + + # FIXME: This is a very specific case. Error recovery should be handled more + # generally elsewhere. + if is_block_continuation_keyword(ps, kind(t)) + # a "continuation keyword" is likely to belong to the surrounding code, so + # we abort early + + # if true; x ? true elseif true end ==> (if true (block (if x true (error-t) (error-t))) (elseif true (block))) + # if true; x ? true end ==> (if true (block (if x true (error-t) (error-t)))) + # if true; x ? true\n end ==> (if true (block (if x true (error-t) (error-t)))) + # if true; x ? true : elseif true end ==> (if true (block (if x true (error-t))) (elseif true (block))) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="unexpected `$(kind(t))`") + emit(ps, mark, K"if") + return + else + # A[x ? y : end] ==> (ref A (? x y end)) + end + parse_eq_star(ps) + emit(ps, mark, K"?") +end + +# Parse arrows. Like parse_RtoL, but specialized for --> syntactic operator +# +# flisp: parse-arrow +function parse_arrow(ps::ParseState) + mark = position(ps) + parse_or(ps) + isdot, t = peek_dotted_op_token(ps) + k = kind(t) + if is_prec_arrow(k) + if kind(t) == K"-->" && !isdot && !is_suffixed(t) + # x --> y ==> (--> x y) # The only syntactic arrow + bump(ps, TRIVIA_FLAG) + parse_arrow(ps) + emit(ps, mark, k, flags(t)) + else + # x → y ==> (call-i x → y) + # x <--> y ==> (call-i x <--> y) + # x .--> y ==> (dotcall-i x --> y) + # x -->₁ y ==> (call-i x -->₁ y) + bump_dotted(ps, isdot, remap_kind=K"Identifier") + parse_arrow(ps) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end + end +end + +function dotted(k) + if k == K"||" + return K".||" + elseif k == K"&&" + return K".&&" + elseif k == K"=" + return K".=" + elseif k == K"op=" + return K".op=" + else + error("Unexpected dotted operator: $k") + end +end + +# Like parse_RtoL, but specialized for the version test of dotted operators. +function parse_lazy_cond(ps::ParseState, down, is_op, self) + mark = position(ps) + down(ps) + (isdot, t) = peek_dotted_op_token(ps) + k = kind(t) + if is_op(k) + bump_dotted(ps, isdot, TRIVIA_FLAG) + self(ps) + emit(ps, mark, isdot ? dotted(k) : k, flags(t)) + if isdot + min_supported_version(v"1.7", ps, mark, "dotted operators `.||` and `.&&`") + end + end +end + +# x || y || z ==> (|| x (|| y z)) +#v1.6: x .|| y ==> (error (.|| x y)) +#v1.7: x .|| y ==> (.|| x y) +# +# flisp: parse-or +function parse_or(ps::ParseState) + parse_lazy_cond(ps, parse_and, is_prec_lazy_or, parse_or) +end + +# x && y && z ==> (&& x (&& y z)) +#v1.6: x .&& y ==> (error (.&& x y)) +#v1.7: x .&& y ==> (.&& x y) +# +# flisp: parse-and +function parse_and(ps::ParseState) + parse_lazy_cond(ps, parse_comparison, is_prec_lazy_and, parse_and) +end + +# Parse binary comparisons and comparison chains +# +# flisp: parse-comparison +function parse_comparison(ps::ParseState, subtype_comparison=false) + mark = position(ps) + if subtype_comparison && is_reserved_word(peek(ps)) + # Recovery + # struct try end ==> (struct (error (try)) (block)) + name = untokenize(peek(ps)) + bump(ps) + emit(ps, mark, K"error", error="Invalid type name `$name`") + else + parse_pipe_lt(ps) + end + n_comparisons = 0 + op_pos = NO_POSITION + op_dotted = false + (initial_dot, initial_tok) = peek_dotted_op_token(ps) + while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t)) + n_comparisons += 1 + op_dotted = isdot + op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier") + parse_pipe_lt(ps) + end + if n_comparisons == 1 + if is_type_operator(initial_tok, initial_dot) + # Type comparisons are syntactic + # x <: y ==> (<: x y) + # x >: y ==> (>: x y) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + emit(ps, mark, kind(initial_tok)) + else + # Normal binary comparisons + # x < y ==> (call-i x < y) + # x .< y ==> (dotcall-i x < y) + if op_dotted + # Reset the extra (non-terminal) K"." (e.g. in `(. <)`) node to just `. <` + reset_node!(ps, op_pos, kind=K"TOMBSTONE", flags=TRIVIA_FLAG) + end + emit(ps, mark, op_dotted ? K"dotcall" : K"call", INFIX_FLAG) + end + elseif n_comparisons > 1 + # Comparison chains + # x < y < z ==> (comparison x < y < z) + # x == y < z ==> (comparison x == y < z) + # x .< y .< z ==> (comparison x (. <) y (. <) z) + # x .< y < z ==> (comparison x (. <) y < z) + emit(ps, mark, K"comparison") + end +end + +# x <| y <| z ==> (call-i x <| (call-i y <| z)) +# flisp: parse-pipe< +function parse_pipe_lt(ps::ParseState) + parse_RtoL(ps, parse_pipe_gt, is_prec_pipe_lt, parse_pipe_lt) +end + +# x |> y |> z ==> (call-i (call-i x |> y) |> z) +# x .|> y ==> (dotcall-i x |> y) +# flisp: parse-pipe> +function parse_pipe_gt(ps::ParseState) + parse_LtoR(ps, parse_range, is_prec_pipe_gt) +end + +# parse ranges and postfix ... +# colon is strange; 3 arguments with 2 colons yields one call: +# 1:2 ==> (call-i 1 : 2) +# 1:2:3 ==> (call-i 1 : 2 3) +# Chaining gives +# a:b:c:d:e ==> (call-i (call-i a : b c) : d e) +# +# flisp: parse-range +function parse_range(ps::ParseState) + mark = position(ps) + parse_invalid_ops(ps) + (initial_dot, initial_tok) = peek_dotted_op_token(ps) + initial_kind = kind(initial_tok) + if initial_kind != K":" && is_prec_colon(initial_kind) + # a..b ==> (call-i a .. b) + # a … b ==> (call-i a … b) + # a .… b ==> (dotcall-i a … b) + bump_dotted(ps, initial_dot, remap_kind=K"Identifier") + parse_invalid_ops(ps) + emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG) + elseif initial_kind == K":" && ps.range_colon_enabled + # a ? b : c:d ==> (? a b (call-i c : d)) + n_colons = 0 + while peek(ps) == K":" + if ps.space_sensitive && + preceding_whitespace(peek_token(ps)) && + !preceding_whitespace(peek_token(ps, 2)) + # Tricky cases in space sensitive mode + # [1 :a] ==> (hcat 1 (quote-: a)) + # [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote-: a)) + break + end + t2 = peek_token(ps,2) + if kind(t2) in KSet"< >" && !preceding_whitespace(t2) + # Error heuristic: we found `:>` or `:<` which are invalid lookalikes + # for `<:` and `>:`. Attempt to recover by treating them as a + # comparison operator. + # a :> b ==> (call-i a (error : >) b) + bump_trivia(ps, skip_newlines=false) + emark = position(ps) + bump(ps, remap_kind=K"Identifier") # K":" + ks = untokenize(peek(ps)) + bump(ps, remap_kind=K"Identifier") # K"<" or K">" + emit(ps, emark, K"error", + error="Invalid `:$ks` found, maybe replace with `$ks:`") + parse_invalid_ops(ps) + emit(ps, mark, K"call", INFIX_FLAG) + break + end + n_colons += 1 + bump(ps, n_colons == 1 ? EMPTY_FLAGS : TRIVIA_FLAG; remap_kind=K"Identifier") + had_newline = peek(ps) == K"NewlineWs" + t = peek_token(ps) + if is_closing_token(ps, kind(t)) + # 1: } ==> (call-i 1 : (error)) + # 1:2: } ==> (call-i 1 : 2 (error)) + bump_invisible(ps, K"error", + error="missing last argument in range expression") + emit(ps, mark, K"call", INFIX_FLAG) + emit_diagnostic(ps, error="found unexpected closing token") + return + end + if had_newline + # Error message for people coming from python + # 1:\n2 ==> (call-i 1 : (error)) + # (1:\n2) ==> (parens (call-i 1 : 2)) + emit_diagnostic(ps, whitespace=true, + error="line break after `:` in range expression") + bump_invisible(ps, K"error") + emit(ps, mark, K"call", INFIX_FLAG) + return + end + parse_invalid_ops(ps) + if n_colons == 2 + emit(ps, mark, K"call", INFIX_FLAG) + n_colons = 0 + end + end + if n_colons > 0 + emit(ps, mark, K"call", INFIX_FLAG) + end + end + + # x... ==> (... x) + # x:y... ==> (... (call-i x : y)) + # x..y... ==> (... (call-i x .. y)) # flisp parser fails here + if peek(ps) == K"..." + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"...") + end +end + +# Parse invalid binary operators +# +# Having this is unnecessary, but it improves error messages and the +# error-containing parse tree. +# +# a--b ==> (call-i a (error) b) +function parse_invalid_ops(ps::ParseState) + mark = position(ps) + parse_expr(ps) + while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") + bump_trivia(ps) + bump_dotted(ps, isdot) + parse_expr(ps) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end +end + +# a - b - c ==> (call-i (call-i a - b) - c) +# a + b + c ==> (call-i a + b c) +# a .+ b ==> (dotcall-i a + b) +# +# flisp: parse-expr +function parse_expr(ps::ParseState) + parse_with_chains(ps, parse_term, is_prec_plus, KSet"+ ++") +end + +# a * b * c ==> (call-i a * b c) +# +# flisp: parse-term +function parse_term(ps::ParseState) + parse_with_chains(ps, parse_rational, is_prec_times, KSet"*") +end + +# Parse left to right, combining any of `chain_ops` into one call +# +# flisp: parse-with-chains +function parse_with_chains(ps::ParseState, down, is_op, chain_ops) + mark = position(ps) + down(ps) + while ((isdot, t) = peek_dotted_op_token(ps); is_op(kind(t))) + if ps.space_sensitive && preceding_whitespace(t) && + is_both_unary_and_binary(t, isdot) && + !preceding_whitespace(peek_token(ps, 2)) + # The following is two elements of a hcat + # [x +y] ==> (hcat x (call-pre + y)) + # [x+y +z] ==> (hcat (call-i x + y) (call-pre + z)) + # Conversely the following are infix calls + # [x +₁y] ==> (vect (call-i x +₁ y)) + # [x+y+z] ==> (vect (call-i x + y z)) + # [x+y + z] ==> (vect (call-i x + y z)) + break + end + bump_dotted(ps, isdot, remap_kind=K"Identifier") + down(ps) + if kind(t) in chain_ops && !is_suffixed(t) && !isdot + # a + b + c ==> (call-i a + b c) + # a + b .+ c ==> (dotcall-i (call-i a + b) + c) + parse_chain(ps, down, kind(t)) + end + # a +₁ b +₁ c ==> (call-i (call-i a +₁ b) +₁ c) + # a .+ b .+ c ==> (dotcall-i (dotcall-i a + b) + c) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end +end + +# parse left to right chains of a given binary operator +# +# flisp: parse-chain +function parse_chain(ps::ParseState, down, op_kind) + while true + isdot, t = peek_dotted_op_token(ps) + if kind(t) != op_kind || is_suffixed(t) || isdot + break + end + if ps.space_sensitive && preceding_whitespace(t) && + is_both_unary_and_binary(t, false) && + !preceding_whitespace(peek_token(ps, 2)) + # [x +y] ==> (hcat x (call-pre + y)) + break + end + bump(ps, TRIVIA_FLAG) + down(ps) + end +end + +# flisp: parse-rational +# x // y // z ==> (call-i (call-i x // y) // z) +function parse_rational(ps::ParseState) + parse_LtoR(ps, parse_shift, is_prec_rational) +end + +# flisp: parse-shift +# x >> y >> z ==> (call-i (call-i x >> y) >> z) +function parse_shift(ps::ParseState) + parse_LtoR(ps, parse_unary_subtype, is_prec_bitshift) +end + +# parse `<: A where B` as `<: (A where B)` (issue #21545) +# +# flisp: parse-unary-subtype +function parse_unary_subtype(ps::ParseState) + t = peek_token(ps) + if is_type_operator(t, false) + k2 = peek(ps, 2) + if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" + # return operator by itself + # <: ) ==> <: + # <: \n ==> <: + # <: = ==> <: + bump(ps) + elseif k2 in KSet"{ (" + # parse <:{T}(x::T) or <:(x::T) like other unary operators + # <:{T}(x::T) ==> (call (curly <: T) (:: x T)) + # <:(x::T) ==> (<:-pre (parens (:: x T))) + parse_where(ps, parse_juxtapose) + else + # <: x ==> (<:-pre x) + # <: A where B ==> (<:-pre (where A B)) + # <: <: x ==> (<:-pre (<:-pre x)) + mark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_unary_subtype(ps) + emit(ps, mark, kind(t), PREFIX_OP_FLAG) + end + else + parse_where(ps, parse_juxtapose) + end +end + +# flisp: parse-where-chain +function parse_where_chain(ps0::ParseState, mark) + ps = ParseState(ps0, where_enabled=false) + while peek(ps) == K"where" + bump(ps, TRIVIA_FLAG) # where + bump_trivia(ps) + k = peek(ps) + if k == K"{" + # x where \n {T} ==> (where x (braces T)) + # x where {T,S} ==> (where x (braces T S)) + # Also various nonsensical forms permitted + # x where {T S} ==> (where x (bracescat (row T S))) + # x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys))))) + m = position(ps) + bump(ps, TRIVIA_FLAG) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, m, ckind, cflags, dim) + emit(ps, mark, K"where") + else + # x where T ==> (where x T) + # x where \n T ==> (where x T) + # x where T<:S ==> (where x (<: T S)) + parse_comparison(ps) + emit(ps, mark, K"where") + end + end +end + +# flisp: parse-where +function parse_where(ps::ParseState, down) + # `where` needs to be below unary for the following to work + # +(x::T,y::T) where {T} = x + mark = position(ps) + down(ps) + if ps.where_enabled && peek(ps) == K"where" + parse_where_chain(ps, mark) + end +end + +# Juxtaposition. Kinda ugh but soo useful for units and Field identities like `im` +# +# flisp: parse-juxtapose +function parse_juxtapose(ps::ParseState) + mark = position(ps) + parse_unary(ps) + n_terms = 1 + while true + t = peek_token(ps) + k = kind(t) + prev_k = peek_behind(ps).kind + is_juxtapose = false + if !preceding_whitespace(t) && + (is_number(prev_k) || + (!is_number(k) && # disallow "x.3" and "f(2)2" + k != K"@" && # disallow "x@y" + !(is_block_form(prev_k) || + is_syntactic_unary_op(prev_k) || + is_initial_reserved_word(ps, prev_k) ))) && + (!is_operator(k) || is_radical_op(k)) && + !is_closing_token(ps, k) + if prev_k == K"string" || is_string_delim(t) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="cannot juxtapose string literal") + # JuliaLang/julia#20575 + # Error, but assume juxtapose for recovery + # "a""b" ==> (juxtapose (string "a") (error-t) (string "b")) + # "a"x ==> (juxtapose (string "a") (error-t) x) + # "$y"x ==> (juxtapose (string y) (error-t) x) + # "a"begin end ==> (juxtapose (string \"a\") (error-t) (block)) + is_juxtapose = true + elseif !is_initial_reserved_word(ps, k) + # 2x ==> (juxtapose 2 x) + # 2(x) ==> (juxtapose 2 (parens x)) + # (2)(3)x ==> (juxtapose (parens 2) (parens 3) x) + # (x-1)y ==> (juxtapose (parens (call-i x - 1)) y) + # x'y ==> (juxtapose (call-post x ') y) + # 1√x ==> (juxtapose 1 (call-pre √ x)) + is_juxtapose = true + end + end + if !is_juxtapose + # x.3 ==> x + # f(2)2 ==> (call f 2) + # x' y ==> (call-post x ') + # x 'y ==> x + # x@y ==> x + break + end + if is_radical_op(t) + parse_unary(ps) + else + parse_factor(ps) + end + n_terms += 1 + end + if n_terms > 1 + emit(ps, mark, K"juxtapose") + end +end + +# Parse numeric literal prefixes, calls to unary operators and prefix +# calls involving arbitrary operators with bracketed arglists (as opposed to +# infix notation) +# +# flisp: parse-unary, parse-unary-call +function parse_unary(ps::ParseState) + mark = position(ps) + bump_trivia(ps) + (op_dotted, op_t) = peek_dotted_op_token(ps) + op_k = kind(op_t) + if ( + !is_operator(op_k) || + is_word_operator(op_k) || + (op_k in KSet": ' .'") || + (is_syntactic_unary_op(op_k) && !op_dotted) || + is_syntactic_operator(op_k) + ) + # `op_t` is not an initial operator + # :T ==> (quote-: T) + # in::T ==> (:: in T) + # isa::T ==> (:: isa T) + parse_factor(ps) + return + end + t2 = peek_token(ps, 2+op_dotted) + k2 = kind(t2) + if op_k in KSet"- +" && !is_suffixed(op_t) && !op_dotted + if !preceding_whitespace(t2) && (k2 in KSet"Integer Float Float32" || + (op_k == K"+" && k2 in KSet"BinInt HexInt OctInt")) + + k3 = peek(ps, 3) + if is_prec_power(k3) || k3 in KSet"[ {" + # `[`, `{` (issue #18851) and `^` have higher precedence than + # unary negation + # -2^x ==> (call-pre - (call-i 2 ^ x)) + # -2[1, 3] ==> (call-pre - (ref 2 1 3)) + bump(ps, remap_kind=K"Identifier") + parse_factor(ps) + emit(ps, mark, K"call", PREFIX_OP_FLAG) + else + # We have a signed numeric literal. Glue the operator to the + # next token to create a signed literal: + # -2 ==> -2 + # +2.0 ==> 2.0 + # -1.0f0 ==> -1.0f0 + # -2*x ==> (call-i -2 * x) + # +0xff ==> 0xff + bump_glue(ps, kind(t2), EMPTY_FLAGS) + parse_factor_with_initial_ex(ps, mark) + end + return + end + end + if is_closing_token(ps, k2) || k2 in KSet"NewlineWs =" + # Standalone operators parsed as `op` or `(. op)` + # +) ==> + + # +\n ==> + + # + = ==> + + # .+ ==> (. +) + # .& ==> (. &) + parse_atom(ps) + elseif k2 == K"{" || (!is_unary_op(op_t, op_dotted) && k2 == K"(") + # Call with type parameters or non-unary prefix call + # +{T}(x::T) ==> (call (curly + T) (:: x T)) + # *(x) ==> (call * x) + # .*(x) ==> (call .* x) + parse_factor(ps) + elseif k2 == K"(" + # Cases like +(a;b) are ambiguous: are they prefix calls to + with b as + # a keyword argument, or is `a;b` a block? We resolve this with a + # simple heuristic: if there were any commas (or an initial splat), it + # was a function call. + # + # (The flisp parser only considers commas before `;` and thus gets this + # last case wrong) + op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + + space_before_paren = preceding_whitespace(t2) + if space_before_paren + # Setup possible whitespace error between operator and ( + ws_mark = position(ps) + bump_trivia(ps) + ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE") + ws_mark_end = position(ps) + end + + mark_before_paren = position(ps) + bump(ps, TRIVIA_FLAG) # ( + initial_semi = peek(ps, skip_newlines=true) == K";" + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_paren_call = had_commas || had_splat || + (initial_semi && num_subexprs > 0) || + (initial_semi && num_semis == 1) || + (num_semis == 0 && num_subexprs == 0) + return (needs_parameters=is_paren_call, + is_paren_call=is_paren_call, + is_block=!is_paren_call && num_semis > 0) + end + + # The precedence between unary + and any following infix ^ depends on + # whether the parens are a function call or not + if opts.is_paren_call + if space_before_paren + # Whitespace not allowed before prefix function call bracket + # + (a,b) ==> (call + (error) a b) + reset_node!(ps, ws_error_pos, kind=K"error") + emit_diagnostic(ps, ws_mark, ws_mark_end, + error="whitespace not allowed between prefix function call and argument list") + end + # Prefix function calls for operators which are both binary and unary + # +(a,b) ==> (call + a b) + # +(a=1,) ==> (call + (= a 1)) + # +(a...) ==> (call + (... a)) + # +(a;b,c) ==> (call + a (parameters b c)) + # +(;a) ==> (call + (parameters a)) + # +() ==> (call +) + # Prefix calls have higher precedence than ^ + # +(a,b)^2 ==> (call-i (call + a b) ^ 2) + # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) + if is_type_operator(op_t, op_dotted) + # <:(a,) ==> (<: a) + emit(ps, mark, op_k, opts.delim_flags) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) + else + emit(ps, mark, K"call", opts.delim_flags) + end + parse_call_chain(ps, mark) + parse_factor_with_initial_ex(ps, mark) + else + # Unary function calls with brackets as grouping, not an arglist + # .+(a) ==> (dotcall-pre + (parens a)) + if opts.is_block + # +(a;b) ==> (call-pre + (block-p a b)) + emit(ps, mark_before_paren, K"block", PARENS_FLAG) + else + emit(ps, mark_before_paren, K"parens") + end + # Not a prefix operator call but a block; `=` is not `kw` + # +(a=1) ==> (call-pre + (parens (= a 1))) + # Unary operators have lower precedence than ^ + # +(a)^2 ==> (call-pre + (call-i (parens a) ^ 2)) + # .+(a)^2 ==> (dotcall-pre + (call-i (parens a) ^ 2)) + # +(a)(x,y)^2 ==> (call-pre + (call-i (call (parens a) x y) ^ 2)) + parse_call_chain(ps, mark_before_paren) + parse_factor_with_initial_ex(ps, mark_before_paren) + if is_type_operator(op_t, op_dotted) + # <:(a) ==> (<:-pre (parens a)) + emit(ps, mark, op_k, PREFIX_OP_FLAG) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG, kind=op_k) + else + if op_dotted + emit(ps, mark, K"dotcall", PREFIX_OP_FLAG) + # Reset the extra (non-terminal) K"." (e.g. in `(. +)`) node to just `. +` + reset_node!(ps, op_pos, kind=K"TOMBSTONE") + else + emit(ps, mark, K"call", PREFIX_OP_FLAG) + end + end + end + else + if is_unary_op(op_t, op_dotted) + # Normal unary calls + # +x ==> (call-pre + x) + # √x ==> (call-pre √ x) + # .~x ==> (dotcall-pre ~ x) + # Things which are not quite negative literals + # -0x1 ==> (call-pre - 0x01) + # - 2 ==> (call-pre - 2) + # .-2 ==> (dotcall-pre - 2) + op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier") + else + # /x ==> (call-pre (error /) x) + # +₁ x ==> (call-pre (error +₁) x) + # .<: x ==> (dotcall-pre (error (. <:)) x) + bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = emit(ps, mark, K"error", error="not a unary operator") + end + parse_unary(ps) + if is_type_operator(op_t, op_dotted) + reset_node!(ps, op_pos, flags=TRIVIA_FLAG) + emit(ps, mark, op_k, PREFIX_OP_FLAG) + else + emit(ps, mark, op_dotted ? K"dotcall" : K"call", PREFIX_OP_FLAG) + end + end +end + +# handle ^ and .^ +# +# x^y ==> (call-i x ^ y) +# x^y^z ==> (call-i x ^ (call-i y ^ z)) +# x .^ y ==> (dotcall-i x ^ y) +# begin x end::T ==> (:: (block x) T) +# +# flisp: parse-factor +function parse_factor(ps::ParseState) + mark = position(ps) + parse_call(ps) + parse_factor_with_initial_ex(ps, mark) +end + +# flisp: parse-factor-with-initial-ex +function parse_factor_with_initial_ex(ps::ParseState, mark) + parse_decl_with_initial_ex(ps, mark) + if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t))) + bump_dotted(ps, isdot, remap_kind=K"Identifier") + parse_factor_after(ps) + emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) + end +end + +# flisp: parse-factor-after +function parse_factor_after(ps::ParseState) + parse_RtoL(ps, parse_juxtapose, is_prec_power, parse_factor_after) +end + +# Parse type declarations and lambda syntax +# a::b ==> (::-i a b) +# a->b ==> (-> a b) +# +# flisp: parse-decl-with-initial-ex +function parse_decl_with_initial_ex(ps::ParseState, mark) + while peek(ps) == K"::" + # a::b::c ==> (::-i (::-i a b) c) + bump(ps, TRIVIA_FLAG) + parse_where(ps, parse_call) + emit(ps, mark, K"::", INFIX_FLAG) + end + if peek(ps) == K"->" + kb = peek_behind(ps).kind + if kb == K"tuple" + # (x,y) -> z + # (x) -> y + # (x; a=1) -> y + elseif kb == K"where" + # `where` and `->` have the "wrong" precedence when writing anon functions. + # So ignore this case to allow use of grouping brackets with `where`. + # This needs to worked around in lowering :-( + # (x where T) -> y ==> (-> (x where T) y) + else + # x -> y ==> (-> (tuple x) y) + # a::b->c ==> (-> (tuple (::-i a b)) c) + emit(ps, mark, K"tuple") + end + bump(ps, TRIVIA_FLAG) + # -> is unusual: it binds tightly on the left and loosely on the right. + parse_eq_star(ps) + emit(ps, mark, K"->") + end +end + +# parse function call, indexing, dot, and transpose expressions +# also handles looking for syntactic reserved words +# +# flisp: parse-call +function parse_call(ps::ParseState) + if peek_initial_reserved_words(ps) + parse_resword(ps) + else + mark = position(ps) + # f(x) ==> (call f x) + # $f(x) ==> (call ($ f) x) + parse_unary_prefix(ps) + parse_call_chain(ps, mark) + end +end + +# parse syntactic unary operators +# +# &a ==> (& a) +# ::a ==> (::-pre a) +# $a ==> ($ a) +# +# flisp: parse-unary-prefix +function parse_unary_prefix(ps::ParseState, has_unary_prefix=false) + mark = position(ps) + (isdot, t) = peek_dotted_op_token(ps) + k = kind(t) + if is_syntactic_unary_op(k) && !isdot + k2 = peek(ps, 2) + if k in KSet"& $" && (is_closing_token(ps, k2) || k2 == K"NewlineWs") + # &) ==> & + # $\n ==> $ + bump(ps) + else + bump(ps, TRIVIA_FLAG) + if k in KSet"& ::" + # &a ==> (& a) + parse_where(ps, parse_call) + else + # $a ==> ($ a) + # $$a ==> ($ ($ a)) + # $&a ==> ($ (& a)) + parse_unary_prefix(ps, true) + end + # Only need PREFIX_OP_FLAG for :: + f = k == K"::" ? PREFIX_OP_FLAG : EMPTY_FLAGS + emit(ps, mark, k, f) + end + else + # .&(x,y) ==> (call .& x y) + parse_atom(ps, true, has_unary_prefix) + end +end + +function maybe_parsed_macro_name(ps, processing_macro_name, mark) + if processing_macro_name + emit(ps, mark, K"macro_name") + end + return false +end + +# Parses a chain of suffixes at function call precedence, leftmost binding +# tightest. This handles +# * Bracketed calls like a() b[] c{} +# * Field access like a.b.c +# - Various dotted syntax like f.() and f.:x +# * Adjoint suffix like a' +# * String macros like a"str" b"""str""" c`str` d```str``` +# +# f(a).g(b) ==> (call (. (call f a) g) b) +# +# flisp: parse-call-chain, parse-call-with-initial-ex +function parse_call_chain(ps::ParseState, mark, is_macrocall=false) + if is_number(peek_behind(ps).kind) && peek(ps) == K"(" + # juxtaposition with numbers is multiply, not call + # 2(x) ==> (* 2 x) + return + end + processing_macro_name = is_macrocall + saw_misplaced_atsym = false + misplaced_atsym_mark = nothing + # source range of the @-prefixed part of a macro + macro_atname_range = nothing + # $A.@x ==> (macrocall (. ($ A) (macro_name x))) + maybe_strmac = true + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind + while true + maybe_strmac_1 = false + t = peek_token(ps) + k = kind(t) + if !is_macrocall && ps.space_sensitive && preceding_whitespace(t) && + k in KSet"( [ { \" \"\"\" ` ```" + # [f (x)] ==> (hcat f (parens x)) + # [f x] ==> (hcat f x) + break + elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' .")) + # Macro calls with space-separated arguments + # @foo a b ==> (macrocall (macro_name foo) a b) + # @foo (x) ==> (macrocall (macro_name foo) (parens x)) + # @foo (x,y) ==> (macrocall (macro_name foo) (tuple-p x y)) + # [@foo x] ==> (vect (macrocall (macro_name foo) x)) + # [@foo] ==> (vect (macrocall (macro_name foo))) + # @var"#" a ==> (macrocall (macro_name (var #)) a) + # A.@x y ==> (macrocall (. A (macro_name x)) y) + # A.@var"#" a ==> (macrocall (. A (macro_name (var #))) a) + # @+x y ==> (macrocall (macro_name +) x y) + # A.@.x ==> (macrocall (. A (macro_name .)) x) + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + let ps = with_space_sensitive(ps) + # Space separated macro arguments + # A.@foo a b ==> (macrocall (. A (macro_name foo)) a b) + # @A.foo a b ==> (macrocall (macro_name (. A foo)) a b) + n_args = parse_space_separated_exprs(ps) + is_doc_macro = last_identifier_orig_kind == K"doc" + if is_doc_macro && n_args == 1 + # Parse extended @doc args on next line + # @doc x\ny ==> (macrocall (macro_name doc) x y) + # A.@doc x\ny ==> (macrocall (. A (macro_name doc)) x y) + # @A.doc x\ny ==> (macrocall (macro_name (. A doc)) x y) + # @doc x y\nz ==> (macrocall (macro_name doc) x y) + # + # Excluded cases + # @doc x\n\ny ==> (macrocall (macro_name doc) x) + # @doc x\nend ==> (macrocall (macro_name doc) x) + k2 = peek(ps, 2) + if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) && + k2 != K"NewlineWs" + bump(ps) # newline + parse_eq(ps) + end + end + emit(ps, mark, K"macrocall") + end + break + elseif k == K"(" + # f(a,b) ==> (call f a b) + # f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2))) + # f(a; b; c) ==> (call f a (parameters b) (parameters c)) + # (a=1)() ==> (call (parens (= a 1))) + # f (a) ==> (call f (error-t) a) + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + opts = parse_call_arglist(ps, K")") + if peek(ps) == K"do" + # f(x) do y body end ==> (call f x (do (tuple y) (block body))) + parse_do(ps) + end + emit(ps, mark, is_macrocall ? K"macrocall" : K"call", + # TODO: Add PARENS_FLAG to all calls which use them? + (is_macrocall ? PARENS_FLAG : EMPTY_FLAGS)|opts.delim_flags) + if is_macrocall + # @x(a, b) ==> (macrocall-p (macro_name x) a b) + # A.@x(y) ==> (macrocall-p (. A (macro_name x)) y) + # A.@x(y).z ==> (. (macrocall-p (. A (macro_name x)) y) z) + is_macrocall = false + # @f()() ==> (call (macrocall-p (macro_name f))) + macro_atname_range = nothing + end + elseif k == K"[" + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + m = position(ps) + # a [i] ==> (ref a (error-t) i) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true), + K"]", ps.end_symbol) + if is_macrocall + # @S[a,b] ==> (macrocall (macro_name S) (vect a b)) + # @S[a b] ==> (macrocall (macro_name S) (hcat a b)) + # @S[a; b] ==> (macrocall (macro_name S) (vcat a b)) + # A.@S[a] ==> (macrocall (. A (macro_name S)) (vect a)) + # @S[a].b ==> (. (macrocall (macro_name S) (vect a)) b) + #v1.7: @S[a ;; b] ==> (macrocall (macro_name S) (ncat-2 a b)) + #v1.6: @S[a ;; b] ==> (macrocall (macro_name S) (error (ncat-2 a b))) + emit(ps, m, ckind, cflags | set_numeric_flags(dim)) + check_ncat_compat(ps, m, ckind) + emit(ps, mark, K"macrocall") + is_macrocall = false + macro_atname_range = nothing + else + # a[i] ==> (ref a i) + # a[i,j] ==> (ref a i j) + # (a=1)[] ==> (ref (parens (= a 1))) + # a[end] ==> (ref a end) + # T[x y] ==> (typed_hcat T x y) + # T[x ; y] ==> (typed_vcat T x y) + # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) + # T[x for x in xs] ==> (typed_comprehension T (generator x (iteration (in x xs)))) + #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) + outk = ckind == K"vect" ? K"ref" : + ckind == K"hcat" ? K"typed_hcat" : + ckind == K"vcat" ? K"typed_vcat" : + ckind == K"comprehension" ? K"typed_comprehension" : + ckind == K"ncat" ? K"typed_ncat" : + internal_error("unrecognized kind in parse_cat ", string(ckind)) + emit(ps, mark, outk, cflags | set_numeric_flags(dim)) + check_ncat_compat(ps, mark, ckind) + end + elseif k == K"." + # Check if this is a dotted operator, not field access + k2 = peek(ps, 2) + if is_operator(k2) && !is_word_operator(k2) && k2 != K":" && k2 != K"$" && k2 != K"'" && k2 != K"?" + # This is a dotted operator like .=, .+, etc., not field access + # Let the appropriate parser handle it + break + end + # x .y ==> (. x (error-t) y) + bump_disallowed_space(ps) + emark = position(ps) + if !isnothing(macro_atname_range) + # Allow `@` in macrocall only in first and last position + # A.B.@x ==> (macrocall (. (. A B) (macro_name x))) + # @A.B.x ==> (macrocall (macro_name (. (. A B) x))) + # A.@B.x ==> (macrocall (. (. A (error-t) B) (macro_name (error-t) x))) + emit_diagnostic(ps, macro_atname_range..., + error="`@` must appear on first or last macro name component") + # Recover by treating the `@` as if it had been on the last identifier + saw_misplaced_atsym = true + reset_node!(ps, macro_atname_range[2], kind=K"TOMBSTONE") + reset_node!(ps, macro_atname_range[1], kind=K"error") + end + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"(" + if is_macrocall + # Recover by pretending we do have the syntax + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + # @M.(x) ==> (macrocall (dotcall (macro_name M) (error-t) x)) + bump_invisible(ps, K"error", TRIVIA_FLAG) + emit_diagnostic(ps, mark, + error="dot call syntax not supported for macros") + end + # f.(a,b) ==> (dotcall f a b) + # f. (x) ==> (dotcall f (error-t) x) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + opts = parse_call_arglist(ps, K")") + emit(ps, mark, K"dotcall", opts.delim_flags) + elseif k == K":" + # A.:+ ==> (. A (quote-: +)) + # A.: + ==> (. A (error-t) (quote-: +)) + m = position(ps) + bump(ps, TRIVIA_FLAG) + bump_disallowed_space(ps) + parse_atom(ps, false) + emit(ps, m, K"quote", COLON_QUOTE) + emit(ps, mark, K".") + elseif k == K"$" + # f.$x ==> (. f ($ x)) + # f.$(x+y) ==> (. f ($ (call + x y))) + # A.$B.@x ==> (macrocall (. (. A ($ B)) (macro_name x))) + # @A.$x a ==> (macrocall (macro_name (. A (error x))) a) + m = position(ps) + bump(ps, TRIVIA_FLAG) + parse_atom(ps) + if is_macrocall + emit(ps, m, K"error", error="invalid macro name") + else + emit(ps, m, K"$") + end + last_identifier_orig_kind = K"$" + emit(ps, mark, K".") + elseif k == K"@" + # A macro call after some prefix A has been consumed + # A.@x ==> (macrocall (. A (macro_name x))) + # A.@x a ==> (macrocall (. A (macro_name x)) a) + m = position(ps) + if is_macrocall + # @A.B.@x a ==> (macrocall (. (. A B) (error-t) (macro_name x)) a) + bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path") + else + bump(ps, TRIVIA_FLAG) + end + parse_macro_name(ps) + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind + !is_macrocall && emit(ps, m, K"macro_name") + macro_atname_range = (m, position(ps)) + is_macrocall = true + emit(ps, mark, K".") + elseif k == K"'" + # f.' => (dotcall-post f (error ')) + bump(ps, remap_kind=K"Identifier") # bump ' + # TODO: Reclaim dotted postfix operators :-) + emit(ps, emark, K"error", + error="the .' operator for transpose is discontinued") + emit(ps, mark, K"dotcall", POSTFIX_OP_FLAG) + else + if saw_misplaced_atsym + # If we saw a misplaced `@` earlier, this might be the place + # where it should have been. Opportunistically bump the + # zero-width error token here. If that's not right, we'll + # reset it later. + if misplaced_atsym_mark !== nothing + reset_node!(ps, misplaced_atsym_mark[1], kind=K"TOMBSTONE") + reset_node!(ps, misplaced_atsym_mark[2], kind=K"TOMBSTONE") + end + macro_name_mark = position(ps) + bump_invisible(ps, K"error", TRIVIA_FLAG) + aterror_mark = position(ps) + end + # Field/property syntax + # f.x.y ==> (. (. f x) y) + parse_atom(ps, false) + if saw_misplaced_atsym + emit(ps, macro_name_mark, K"macro_name") + misplaced_atsym_mark = (aterror_mark, position(ps)) + end + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind + maybe_strmac_1 = true + emit(ps, mark, K".") + end + elseif k == K"'" && !preceding_whitespace(t) + # f' ==> (call-post f ') + # f'ᵀ ==> (call-post f 'ᵀ) + bump(ps, remap_kind=K"Identifier") + emit(ps, mark, K"call", POSTFIX_OP_FLAG) + elseif k == K"{" + processing_macro_name = maybe_parsed_macro_name( + ps, processing_macro_name, mark) + # Type parameter curlies and macro calls + m = position(ps) + # S {a} ==> (curly S (error-t) a) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + opts = parse_call_arglist(ps, K"}") + if is_macrocall + # @S{a,b} ==> (macrocall (macro_name S) (braces a b)) + # A.@S{a} ==> (macrocall (. A (macro_name S)) (braces a)) + # @S{a}.b ==> (. (macrocall (macro_name S) (braces a)) b) + emit(ps, m, K"braces", opts.delim_flags) + emit(ps, mark, K"macrocall") + min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") + is_macrocall = false + macro_atname_range = nothing + else + # S{a,b} ==> (curly S a b) + emit(ps, mark, K"curly", opts.delim_flags) + end + elseif k in KSet" \" \"\"\" ` ``` " && + !preceding_whitespace(t) && maybe_strmac && + (# Must mirror the logic in lex_quote() for consistency + origk = last_identifier_orig_kind; + origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk)) + # Custom string and command literals + # x"str" ==> (macrocall @x_str (string-r "str")) + # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) + # x"" ==> (macrocall @x_str (string-r "")) + # x`` ==> (macrocall @x_cmd (cmdstring-r "")) + # Triple quoted processing for custom strings + # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) + # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) + # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) + # + # Use a special token kind for string and cmd macro names so the + # names can be expanded later as necessary. + name_kind = is_string_delim(k) ? K"StrMacroName" : K"CmdMacroName" + reset_node!(ps, last_identifier_pos, kind=name_kind) + parse_string(ps, true) + t = peek_token(ps) + k = kind(t) + if !preceding_whitespace(t) && is_string_macro_suffix(k) + # Macro suffixes can include keywords and numbers + # x"s"y ==> (macrocall @x_str (string-r "s") "y") + # x"s"end ==> (macrocall @x_str (string-r "s") "end") + # x"s"in ==> (macrocall @x_str (string-r "s") "in") + # x"s"2 ==> (macrocall @x_str (string-r "s") 2) + # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0) + suffix_kind = (k == K"Identifier" || is_keyword(k) || + is_word_operator(k)) ? K"String" : k + bump(ps, remap_kind=suffix_kind) + end + emit(ps, mark, K"macrocall") + else + break + end + maybe_strmac = maybe_strmac_1 + end +end + +# Parse the `A<:B` part of type definitions like `struct A<:B end` +# +# flisp: parse-subtype-spec +function parse_subtype_spec(ps::ParseState) + # Wart: why isn't the flisp parser more strict here? + # <: is the only operator which isn't a syntax error, but + # parse_comparison allows all sorts of things. + parse_comparison(ps, true) +end + +# flisp: parse-struct-field +function parse_struct_field(ps::ParseState) + mark = position(ps) + const_field = peek(ps) == K"const" + if const_field + bump(ps, TRIVIA_FLAG) + end + parse_eq(ps) + if const_field + # Const fields https://github.com/JuliaLang/julia/pull/43305 + #v1.8: struct A const a end ==> (struct A (block (const x))) + #v1.7: struct A const a end ==> (struct A (block (error (const x)))) + emit(ps, mark, K"const") + min_supported_version(v"1.8", ps, mark, "`const` struct field") + end +end + +# parse expressions or blocks introduced by syntactic reserved words. +# +# The caller should use peek_initial_reserved_words to determine whether +# to call parse_resword, or whether contextual keywords like `mutable` are +# simple identifiers. +# +# flisp: parse-resword +function parse_resword(ps::ParseState) + # In normal_context + # begin f() where T = x end ==> (block (= (where (call f) T) x)) + ps = normal_context(ps) + bump_trivia(ps) + mark = position(ps) + word = peek(ps) + if word in KSet"begin quote" + # begin end ==> (block) + # begin a ; b end ==> (block a b) + # begin\na\nb\nend ==> (block a b) + bump(ps, TRIVIA_FLAG) + parse_block_inner(ps, parse_docstring) + bump_closing_token(ps, K"end") + emit(ps, mark, K"block") + if word == K"quote" + # quote end ==> (quote (block)) + # quote body end ==> (quote (block body)) + emit(ps, mark, K"quote") + end + elseif word == K"while" + # while cond body end ==> (while cond (block body)) + # while x < y \n a \n b \n end ==> (while (call-i x < y) (block a b)) + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"while") + elseif word == K"for" + # for x in xs end ==> (for (iteration (in x xs)) (block)) + # for x in xs, y in ys \n a \n end ==> (for (iteration (in x xs) (in y ys)) (block a)) + bump(ps, TRIVIA_FLAG) + parse_iteration_specs(ps) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"for") + elseif word == K"let" + bump(ps, TRIVIA_FLAG) + m = position(ps) + if peek(ps) in KSet"NewlineWs ;" + # let end ==> (let (block) (block)) + # let ; end ==> (let (block) (block)) + # let ; body end ==> (let (block) (block body)) + else + # let x=1\n end ==> (let (block (= x 1)) (block)) + # let x=1 ; end ==> (let (block (= x 1)) (block)) + # let x::1 ; end ==> (let (block (::-i x 1)) (block)) + # let x ; end ==> (let (block x) (block)) + # let x=1,y=2 ; end ==> (let (block (= x 1) (= y 2) (block))) + # let x+=1 ; end ==> (let (block (op= x + 1)) (block)) + parse_comma_separated(ps, parse_eq_star) + end + emit(ps, m, K"block") + k = peek(ps) + if k in KSet"NewlineWs ;" + bump(ps, TRIVIA_FLAG) + elseif k == K"end" + # pass + else + recover(is_closer_or_newline, ps, TRIVIA_FLAG, + error="let variables should end in `;` or newline") + end + # let\na\nb\nend ==> (let (block) (block a b)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"let") + elseif word == K"if" + parse_if_elseif(ps) + elseif word in KSet"global local" + # global x ==> (global x) + # local x ==> (local x) + bump(ps, TRIVIA_FLAG) + const_mark = nothing + if peek(ps) == K"const" + const_mark = position(ps) + bump(ps, TRIVIA_FLAG) + end + had_assignment = parse_global_local_const_vars(ps) + if !isnothing(const_mark) + # global const x = 1 ==> (global (const (= x 1))) + # local const x = 1 ==> (local (const (= x 1))) + emit(ps, const_mark, K"const") + if !had_assignment + # global const x ==> (global (error (const x))) + emit(ps, mark, K"error", error="expected assignment after `const`") + end + end + emit(ps, mark, word) + elseif word == K"const" + # const x = 1 ==> (const (= x 1)) + bump(ps, TRIVIA_FLAG) + scope_mark = nothing + scope_k = peek(ps) + if scope_k in KSet"local global" + scope_mark = position(ps) + bump(ps, TRIVIA_FLAG) + end + had_assignment = parse_global_local_const_vars(ps) + if !isnothing(scope_mark) + # const global x = 1 ==> (const (global (= x 1))) + # const local x = 1 ==> (const (local (= x 1))) + emit(ps, scope_mark, scope_k) + end + emit(ps, mark, K"const") + if !had_assignment + # const x .= 1 ==> (error (const (.= x 1))) + emit(ps, mark, K"error", error="expected assignment after `const`") + end + elseif word in KSet"function macro" + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) + has_body = parse_function_signature(ps, word == K"function") + if has_body + # The function body + # function f() \n a \n b end ==> (function (call f) (block a b)) + # function f() end ==> (function (call f) (block)) + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, word) + else + # Function/macro definition with no methods + # function f end ==> (function f) + # (function f \n end) ==> (parens (function f)) + # function f \n\n end ==> (function f) + # function $f end ==> (function ($ f)) + # macro f end ==> (macro f) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, word) + end + elseif word == K"abstract" + # Abstract type definitions + # abstract type A end ==> (abstract A) + # abstract type A ; end ==> (abstract A) + # abstract type \n\n A \n\n end ==> (abstract A) + # abstract type A <: B end ==> (abstract (<: A B)) + # abstract type A <: B{T,S} end ==> (abstract (<: A (curly B T S))) + # Oddities allowed by parser + # abstract type A < B end ==> (abstract (call-i A < B)) + bump(ps, TRIVIA_FLAG) + @check peek(ps) == K"type" + bump(ps, TRIVIA_FLAG) + parse_subtype_spec(ps) + bump_semicolon_trivia(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"abstract") + elseif word in KSet"struct mutable" + # struct A <: B \n a::X \n end ==> (struct (<: A B) (block (::-i a X))) + # struct A \n a \n b \n end ==> (struct A (block a b)) + #v1.7: struct A const a end ==> (struct A (block (error (const a)))) + #v1.8: struct A const a end ==> (struct A (block (const a))) + is_mut = word == K"mutable" + if is_mut + # mutable struct A end ==> (struct-mut A (block)) + bump(ps, TRIVIA_FLAG) + else + # struct A end ==> (struct A (block)) + end + @check peek(ps) == K"struct" + bump(ps, TRIVIA_FLAG) + parse_subtype_spec(ps) + parse_block(ps, ps1->parse_docstring(ps1, parse_struct_field)) + bump_closing_token(ps, K"end") + emit(ps, mark, K"struct", is_mut ? MUTABLE_FLAG : EMPTY_FLAGS) + elseif word == K"primitive" + # primitive type A 32 end ==> (primitive A 32) + # primitive type A 32 ; end ==> (primitive A 32) + # primitive type A $N end ==> (primitive A ($ N)) + # primitive type A <: B \n 8 \n end ==> (primitive (<: A B) 8) + bump(ps, TRIVIA_FLAG) + @check peek(ps) == K"type" + bump(ps, TRIVIA_FLAG) + let ps = with_space_sensitive(ps) + parse_subtype_spec(ps) + parse_cond(ps) + end + bump_semicolon_trivia(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"primitive") + elseif word == K"try" + parse_try(ps) + elseif word == K"return" + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"NewlineWs" || is_closing_token(ps, k) + # return\nx ==> (return) + # return) ==> (return) + else + # return x ==> (return x) + # return x,y ==> (return (tuple x y)) + parse_eq(ps) + end + emit(ps, mark, K"return") + elseif word in KSet"break continue" + # break ==> (break) + # continue ==> (continue) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, word) + k = peek(ps) + if !(k in KSet"NewlineWs ; ) : EndMarker" || (k == K"end" && !ps.end_symbol)) + recover(is_closer_or_newline, ps, TRIVIA_FLAG, + error="unexpected token after $(untokenize(word))") + end + elseif word in KSet"module baremodule" + # module A end ==> (module A (block)) + # baremodule A end ==> (module-bare A (block)) + bump(ps, TRIVIA_FLAG) + if is_reserved_word(peek(ps)) + # module do \n end ==> (module (error do) (block)) + bump(ps, error="Invalid module name") + else + # module $A end ==> (module ($ A) (block)) + parse_unary_prefix(ps) + end + # module A \n a \n b \n end ==> (module A (block a b)) + # module A \n "x"\na \n end ==> (module A (block (doc (string "x") a))) + parse_block(ps, parse_public) + bump_closing_token(ps, K"end") + emit(ps, mark, K"module", + word == K"baremodule" ? BARE_MODULE_FLAG : EMPTY_FLAGS) + elseif word in KSet"export public" + # export a ==> (export a) + # export @a ==> (export (macro_name a)) + # export a, \n @b ==> (export a (macro_name b)) + # export +, == ==> (export + ==) + # export \n a ==> (export a) + # export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b)))) + bump(ps, TRIVIA_FLAG) + parse_comma_separated(ps, x->parse_import_atsym(x, false)) + emit(ps, mark, word) + elseif word in KSet"import using" + parse_imports(ps) + elseif word == K"do" + bump(ps, TRIVIA_FLAG, error="invalid `do` syntax") + else + internal_error("unhandled reserved word ", string(word)) + end +end + +# Parse if-elseif-else-end expressions +# +# if a xx elseif b yy else zz end ==> (if a (block xx) (elseif b (block yy) (block zz))) +function parse_if_elseif(ps, is_elseif=false, is_elseif_whitespace_err=false) + mark = position(ps) + word = peek(ps) + if is_elseif_whitespace_err + # Only get here on recovery from error case - pretend we're parsing elseif. + word = K"elseif" + else + bump(ps, TRIVIA_FLAG) + end + cond_mark = position(ps) + if peek(ps) in KSet"NewlineWs end" + # if end ==> (if (error) (block)) + # if \n end ==> (if (error) (block)) + bump_trivia(ps, error="missing condition in `$(untokenize(word))`") + else + # if a end ==> (if a (block)) + # if a xx end ==> (if a (block xx)) + parse_cond(ps) + end + # if a \n\n xx \n\n end ==> (if a (block xx)) + parse_block(ps) + bump_trivia(ps) + k = peek(ps) + if k == K"elseif" + # if a xx elseif b yy end ==> (if a (block xx) (elseif b (block yy))) + parse_if_elseif(ps, true) + elseif k == K"else" + emark = position(ps) + bump(ps, TRIVIA_FLAG) + if peek(ps) == K"if" + # Recovery: User wrote `else if` by mistake ? + # if a xx else if b yy end ==> (if a (block xx) (error-t) (elseif b (block yy))) + bump(ps, TRIVIA_FLAG) + emit(ps, emark, K"error", TRIVIA_FLAG, + error="use `elseif` instead of `else if`") + parse_if_elseif(ps, true, true) + else + # if a xx else yy end ==> (if a (block xx) (block yy)) + parse_block(ps) + end + end + if !is_elseif + bump_closing_token(ps, K"end") + end + emit(ps, mark, word) +end + +# Like parse_assignment, but specialized so that we can omit the +# tuple when there's commas but no assignment. +function parse_global_local_const_vars(ps) + mark = position(ps) + n_commas = parse_comma(ps, false) + (isdot, t) = peek_dotted_op_token(ps) + if is_prec_assignment(t) + if n_commas >= 1 + # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) + emit(ps, mark, K"tuple") + end + # const x = 1 ==> (const (= x 1)) + # global x ~ 1 ==> (global (call-i x ~ 1)) + # global x += 1 ==> (global (+= x 1)) + parse_assignment_with_initial_ex(ps, mark, parse_comma) + else + # global x,y ==> (global x y) + end + return kind(t) == K"=" && !isdot +end + +# Parse function and macro definitions +function parse_function_signature(ps::ParseState, is_function::Bool) + is_anon_func = false + parsed_call = false + needs_parse_call = true + + mark = position(ps) + if !is_function + # Parse macro name + parse_unary_prefix(ps) + kb = peek_behind(ps).orig_kind + if is_initial_reserved_word(ps, kb) + # macro while(ex) end ==> (macro (call (error while) ex) (block)) + emit(ps, mark, K"error", error="invalid macro name") + else + # macro f() end ==> (macro (call f) (block)) + # macro (:)(ex) end ==> (macro (call (parens :) ex) (block)) + # macro (type)(ex) end ==> (macro (call (parens type) ex) (block)) + # macro $f() end ==> (macro (call ($ f)) (block)) + # macro ($f)() end ==> (macro (call (parens ($ f))) (block)) + end + else + if peek(ps) != K"(" + # function f() end ==> (function (call f)) + parse_unary_prefix(ps) + else + # When an initial parenthesis is present, we need to distinguish + # between + # * The function name in parens, followed by (args...) + # * An anonymous function argument list in parens + # * The whole function declaration, in parens + bump(ps, TRIVIA_FLAG) + is_empty_tuple = peek(ps, skip_newlines=true) == K")" + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + _parsed_call = was_eventually_call(ps) + _needs_parse_call = peek(ps, 2) ∈ KSet"( ." + _is_anon_func = (!_needs_parse_call && !_parsed_call) || had_commas + return (needs_parameters = _is_anon_func, + is_anon_func = _is_anon_func, + parsed_call = _parsed_call, + needs_parse_call = _needs_parse_call, + maybe_grouping_parens = !had_commas && !had_splat && num_semis == 0 && num_subexprs == 1) + end + is_anon_func = opts.is_anon_func + parsed_call = opts.parsed_call + needs_parse_call = opts.needs_parse_call + if is_anon_func + # function (x) body end ==> (function (tuple-p x) (block body)) + # function (x::f()) end ==> (function (tuple-p (::-i x (call f))) (block)) + # function (x,y) end ==> (function (tuple-p x y) (block)) + # function (x=1) end ==> (function (tuple-p (= x 1)) (block)) + # function (;x=1) end ==> (function (tuple-p (parameters (= x 1))) (block)) + # function (f(x),) end ==> (function (tuple-p (call f x)) (block)) + ambiguous_parens = opts.maybe_grouping_parens && + peek_behind(ps).kind in KSet"macrocall $" + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) + if ambiguous_parens + # Got something like `(@f(x))`. Is it anon `(@f(x),)` or named sig `@f(x)` ?? + emit(ps, mark, K"error", error="Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") + end + elseif is_empty_tuple + # Weird case which is consistent with parse_paren but will be + # rejected in lowering + # function ()(x) end ==> (function (call (tuple-p) x) (block)) + emit(ps, mark, K"tuple", PARENS_FLAG) + else + # function (A).f() end ==> (function (call (. (parens A) f)) (block)) + # function (:)() end ==> (function (call (parens :)) (block)) + # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) + # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) + # function (:*=(f))() end ==> (function (call (parens (call (quote-: *=) f))) (block)) + emit(ps, mark, K"parens", PARENS_FLAG) + end + end + if !is_anon_func + kb = peek_behind(ps).orig_kind + if is_reserved_word(kb) + # function begin() end ==> (function (call (error begin)) (block)) + emit(ps, mark, K"error", error="invalid function name") + else + # function f() end ==> (function (call f) (block)) + # function type() end ==> (function (call type) (block)) + # function \n f() end ==> (function (call f) (block)) + # function $f() end ==> (function (call ($ f)) (block)) + # function (::Type{T})(x) end ==> (function (call (parens (::-pre (curly Type T))) x) (block)) + end + end + end + if needs_parse_call + # Parse function argument list + # function f(x,y) end ==> (function (call f x y) (block)) + # function f{T}() end ==> (function (call (curly f T)) (block)) + # function A.f() end ==> (function (call (. A f)) (block)) + parse_call_chain(ps, mark) + sig_kind = peek_behind(ps).kind + if sig_kind in KSet"Identifier var $" && peek(ps, skip_newlines=true) == K"end" + # function f end ==> (function f) + # function $f end ==> (function $f) + return false + elseif sig_kind == K"macrocall" + min_supported_version(v"1.12", ps, mark, "macro call as function signature") + elseif sig_kind != K"call" + # function f body end ==> (function (error f) (block body)) + emit(ps, mark, K"error", + error="Invalid signature in $(is_function ? "function" : "macro") definition") + end + end + if is_function && peek(ps) == K"::" + # Function return type + # function f()::T end ==> (function (::-i (call f) T) (block)) + # function f()::g(T) end ==> (function (::-i (call f) (call g T)) (block)) + bump(ps, TRIVIA_FLAG) + parse_call(ps) + emit(ps, mark, K"::", INFIX_FLAG) + end + if peek(ps) == K"where" + # Function signature where syntax + # function f() where {T} end ==> (function (where (call f) (braces T)) (block)) + # function f() where T end ==> (function (where (call f) T) (block)) + parse_where_chain(ps, mark) + end + # function f()::S where T end ==> (function (where (::-i (call f) S) T) (block)) + # + # Ugly cases for compat where extra parentheses existed and we've + # already parsed at least the call part of the signature + # + # function (f() where T) end ==> (function (where (call f) T) (block)) + # function (f()) where T end ==> (function (where (call f) T) (block)) + # function (f() where T) where U end ==> (function (where (where (call f) T) U) (block)) + # function (f()::S) end ==> (function (parens (::-i (call f) S)) (block)) + # function ((f()::S) where T) end ==> (function (where (parens (::-i (call f) S)) T) (block)) + # + # TODO: Warn for use of parens? The precedence of `::` and + # `where` don't work inside parens so this is a bit of a syntax + # oddity/aberration. + return true +end + +# Parse a try block +# +# try \n x \n catch e \n y \n finally \n z end ==> (try (block x) (catch e (block y)) (finally (block z))) +#v1.8: try \n x \n catch e \n y \n else z finally \n w end ==> (try (block x) (catch e (block y)) (else (block z)) (finally (block w))) +# +# flisp: embedded in parse_resword +function parse_try(ps) + mark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_block(ps) + has_catch = false + has_finally = false + bump_trivia(ps) + if peek(ps) == K"catch" + has_catch = true + parse_catch(ps) + end + bump_trivia(ps) + if peek(ps) == K"else" + # catch-else syntax: https://github.com/JuliaLang/julia/pull/42211 + # + #v1.8: try catch ; else end ==> (try (block) (catch □ (block)) (else (block))) + else_mark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_block(ps) + if !has_catch + #v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y))) + emit(ps, else_mark, K"error", error="Expected `catch` before `else`") + end + #v1.7: try catch ; else end ==> (try (block) (catch □ (block)) (else (error (block)))) + min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`") + emit(ps, else_mark, K"else") + end + bump_trivia(ps) + if peek(ps) == K"finally" + finally_mark = position(ps) + # try x finally y end ==> (try (block x) (finally (block y))) + has_finally = true + bump(ps, TRIVIA_FLAG) + parse_block(ps) + emit(ps, finally_mark, K"finally") + end + # Wart: the flisp parser allows finally before catch, the *opposite* order + # in which these blocks execute. + bump_trivia(ps) + if !has_catch && peek(ps) == K"catch" + # try x finally y catch e z end ==> (try (block x) (finally (block y)) (catch e (block z))) + m = position(ps) + parse_catch(ps) + emit_diagnostic(ps, m, + warning="`catch` after `finally` will execute out of order") + end + missing_recovery = !has_catch && !has_finally + if missing_recovery + # try x end ==> (try (block x) (error-t)) + bump_invisible(ps, K"error", TRIVIA_FLAG) + end + bump_closing_token(ps, K"end") + emit(ps, mark, K"try") + if missing_recovery + emit_diagnostic(ps, mark, error="try without catch or finally") + end +end + +function parse_catch(ps::ParseState) + mark = position(ps) + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k in KSet"NewlineWs ;" || is_closing_token(ps, k) + # try x catch end ==> (try (block x) (catch □ (block))) + # try x catch ; y end ==> (try (block x) (catch □ (block y))) + # try x catch \n y end ==> (try (block x) (catch □ (block y))) + bump_invisible(ps, K"Placeholder") + else + # try x catch e y end ==> (try (block x) (catch e (block y))) + # try x catch $e y end ==> (try (block x) (catch ($ e) (block y))) + m = position(ps) + parse_eq_star(ps) + if !(peek_behind(ps).kind in KSet"Identifier var $") + # try x catch e+3 y end ==> (try (block x) (catch (error (call-i e + 3)) (block y))) + emit(ps, m, K"error", error="a variable name is expected after `catch`") + end + end + parse_block(ps) + emit(ps, mark, K"catch") +end + +# flisp: parse-do +function parse_do(ps::ParseState) + mark = position(ps) + bump(ps, TRIVIA_FLAG) # do + ps = normal_context(ps) + m = position(ps) + if peek(ps) in KSet"NewlineWs ;" + # f() do\nend ==> (call f (do (tuple) (block))) + # f() do ; body end ==> (call f (do (tuple) (block body))) + # this trivia needs to go into the tuple due to the way position() + # works. + bump(ps, TRIVIA_FLAG) + else + # f() do x, y\n body end ==> (call f (do (tuple x y) (block body))) + parse_comma_separated(ps, parse_range) + end + emit(ps, m, K"tuple") + parse_block(ps) + bump_closing_token(ps, K"end") + emit(ps, mark, K"do") +end + +function _is_valid_macro_name(peektok) + return !is_error(peektok.kind) && (peektok.is_leaf || peektok.kind == K"var") +end + +# flisp: parse-macro-name +function parse_macro_name(ps::ParseState) + # @! x ==> (macrocall @! x) + # @.. x ==> (macrocall (macro_name ..) x) + # @$ x ==> (macrocall (macro_name $) x) + # @var"#" x ==> (macrocall (macro_name (var #)) x) + bump_disallowed_space(ps) + mark = position(ps) + parse_atom(ps, false) + b = peek_behind(ps, skip_parens=false) + if b.kind == K"parens" + emit_diagnostic(ps, mark, + warning="parenthesizing macro names is unnecessary") + elseif !_is_valid_macro_name(b) + # @[x] y z ==> (macrocall (macro_name (error (vect x))) y z) + emit(ps, mark, K"error", error="invalid macro name") + end +end + +# Parse an identifier, interpolation or @-prefixed symbol +# +# flisp: parse-atsym +function parse_import_atsym(ps::ParseState, allow_quotes=true) + bump_trivia(ps) + if peek(ps) == K"@" + mark = position(ps) + # export @a ==> (export (macro_name a)) + # export @var"'" ==> (export (macro_name (var '))) + # export a, \n @b ==> (export a (macro_name b)) + bump(ps, TRIVIA_FLAG) + parse_macro_name(ps) + emit(ps, mark, K"macro_name") + else + # export a ==> (export a) + # export \n a ==> (export a) + # export $a, $(a*b) ==> (export ($ a) (parens ($ (call * a b)))) + # export (x::T) ==> (export (error (parens (::-i x T)))) + # export outer ==> (export outer) + # export ($f) ==> (export ($ f)) + mark = position(ps) + # Syntax Edition TODO: make all the various ways to quote things inside + # import paths an error and require `var""` in the few remaining cases. + if allow_quotes && peek(ps) == K":" && !is_closing_token(ps, peek(ps,2)) + # import A.:+ ==> (import (importpath A (quote-: +))) + emit_diagnostic(ps, warning="quoting with `:` is not required here") + end + parse_unary_prefix(ps) + pos = position(ps) + warn_parens = false + if peek_behind(ps, pos).kind == K"parens" + # import A.(:+) ==> (import (importpath A (parens (quote-: +)))) + pos = first_child_position(ps, pos) + warn_parens = true + end + if allow_quotes && peek_behind(ps, pos).kind == K"quote" + pos = first_child_position(ps, pos) + if peek_behind(ps, pos).kind == K"parens" + # import A.:(+) ==> (import (importpath A (quote-: (parens +)))) + pos = first_child_position(ps, pos) + warn_parens = true + end + end + b = peek_behind(ps, pos) + if warn_parens && b.orig_kind != K".." + emit_diagnostic(ps, mark, warning="parentheses are not required here") + end + ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || + (!b.is_leaf && b.kind in KSet"$ var") + if !ok + emit(ps, mark, K"error", error="expected identifier") + end + end +end + +# Parse import and using syntax +# +# flisp: parse-imports +function parse_imports(ps::ParseState) + mark = position(ps) + word = peek(ps) + @check word in KSet"import using" + bump(ps, TRIVIA_FLAG) + emark = position(ps) + initial_as = parse_import(ps, word, false) + t = peek_token(ps) + k = kind(t) + has_import_prefix = false # true if we have `prefix:` in `import prefix: stuff` + has_comma = false + if k == K":" && !preceding_whitespace(t) + bump(ps, TRIVIA_FLAG) + has_import_prefix = true + if initial_as + # import A as B: x ==> (import (: (error (as (importpath A) B)) (importpath x))) + emit(ps, emark, K"error", error="`as` before `:` in import/using") + end + elseif k == K"," + bump(ps, TRIVIA_FLAG) + has_comma = true + end + if has_import_prefix || has_comma + # import A, y ==> (import (importpath A) (importpath y)) + # import A: x, y ==> (import (: (importpath A) (importpath x) (importpath y))) + # import A: +, == ==> (import (: (importpath A) (importpath +) (importpath ==))) + has_import_prefix_ = has_import_prefix + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) + if peek(ps) == K":" + # Error recovery + # import A: x, B: y ==> (import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y)))) + emark = position(ps) + bump(ps, TRIVIA_FLAG) + parse_comma_separated(ps, ps1->parse_import(ps1, word, has_import_prefix_)) + emit(ps, emark, K"error", TRIVIA_FLAG, + error="`:` can only be used when importing a single module. Split imports into multiple lines") + end + end + if has_import_prefix + # import A: x ==> (import (: (importpath A) (importpath x))) + emit(ps, mark, K":") + end + # using A ==> (using (importpath A)) + # import A ==> (import (importpath A)) + emit(ps, mark, word) +end + +# Parse individual module path and renaming with `as` +# +# flisp: parse-import +function parse_import(ps::ParseState, word, has_import_prefix) + mark = position(ps) + parse_import_path(ps) + # import A: x, y ==> (import (: (importpath A) (importpath x) (importpath y))) + if peek(ps) == K"as" + # import A as B ==> (import (as (importpath A) B)) + # import A: x as y ==> (import (: (importpath A) (as (importpath x) y))) + # using A: x as y ==> (using (: (importpath A) (as (importpath x) y))) + bump(ps, TRIVIA_FLAG) + parse_import_atsym(ps, false) + emit(ps, mark, K"as") + if word == K"using" && !has_import_prefix + # using A as B ==> (using (error (as (importpath A) B))) + # using A, B as C ==> (using (importpath A) (error (as (importpath B) C))) + emit(ps, mark, K"error", + error="`using` with `as` renaming requires a `:` and context module") + end + #v1.5: import A as B ==> (import (error (as (importpath A) B))) + min_supported_version(v"1.6", ps, mark, "`import ... as`") + return true + else + return false + end +end + +# flisp: parse-import-path +function parse_import_path(ps::ParseState) + mark = position(ps) + bump_trivia(ps) + # The tokenizer produces conjoined dotted tokens .. and ... + # When parsing import we must split these into single dots + # import .A ==> (import (importpath . A)) + # import ..A ==> (import (importpath . . A)) + # import ...A ==> (import (importpath . . . A)) + # import ....A ==> (import (importpath . . . . A)) + # Dots with spaces are allowed (a misfeature?) + # import . .A ==> (import (importpath . . A)) + # Modules with operator symbol names + # import .⋆ ==> (import (importpath . ⋆)) + first_dot = true + while true + t = peek_token(ps) + k = kind(t) + if !first_dot && preceding_whitespace(t) + emit_diagnostic(ps, whitespace=true, + warning="space between dots in import path") + end + if k == K"." + bump(ps) + elseif k == K".." + bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) + elseif k == K"..." + bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) + else + break + end + first_dot = false + end + # import @x ==> (import (importpath (macro_name x))) + # import $A ==> (import (importpath ($ A))) + parse_import_atsym(ps, false) + while true + t = peek_token(ps) + k = kind(t) + if k == K"." + # import A.B ==> (import (importpath A B)) + # import $A.@x ==> (import (importpath ($ A) (macro_name x))) + # import A.B.C ==> (import (importpath A B C)) + # import A.⋆.f ==> (import (importpath A ⋆ f)) + next_tok = peek_token(ps, 2) + if is_operator(kind(next_tok)) + if preceding_whitespace(t) + # Whitespace in import path allowed but discouraged + # import A .== ==> (import (importpath A ==)) + emit_diagnostic(ps, whitespace=true, + warning="space between dots in import path") + end + bump_trivia(ps) + else + bump_disallowed_space(ps) + end + bump(ps, TRIVIA_FLAG) + parse_import_atsym(ps) + elseif k == K"..." + # Import the .. operator + # import A... ==> (import (importpath A ..)) + bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) + elseif k in KSet"NewlineWs ; , : EndMarker" + # import A; B ==> (import (importpath A)) + break + else + # Could we emit a more comprehensible error here? + break + end + end + emit(ps, mark, K"importpath") +end + +# parse comma-separated assignments, like "i=1:n,j=1:m,..." +# +# flisp: parse-comma-separated +function parse_comma_separated(ps::ParseState, down) + n_subexprs = 0 + while true + down(ps) + n_subexprs += 1 + if peek(ps) == K"," + bump(ps, TRIVIA_FLAG) + else + break + end + end + return n_subexprs +end + +# FIXME(sschaub): for backwards compatibility, allows newline before =/in/∈ +# in generator expressions. See issue #37393 +function peek_skip_newline_in_gen(ps::ParseState, n=1) + k = peek(ps, n) + if ps.for_generator && k == K"NewlineWs" + k = peek(ps, n+1) + end + return k +end + +# parse comma-separated "assignment" but allowing `in` and `∈` as assignment operators +# +# i = rhs ==> (= i rhs) +# i in rhs ==> (= i rhs) +# i ∈ rhs ==> (= i rhs) +# +# i = 1:10 ==> (= i (call : 1 10)) +# (i,j) in iter ==> (= (tuple-p i j) iter) +# +# flisp: parse-iteration-spec +function parse_iteration_spec(ps::ParseState) + mark = position(ps) + k = peek(ps) + # Handle `outer` contextual keyword + parse_pipe_lt(with_space_sensitive(ps)) + if peek_behind(ps).orig_kind == K"outer" + if peek_skip_newline_in_gen(ps) in KSet"= in ∈" + # Not outer keyword + # outer = rhs ==> (iteration (in outer rhs)) + # outer <| x = rhs ==> (iteration (in (call-i outer <| x) rhs)) + else + # outer i = rhs ==> (iteration (in (outer i) rhs)) + # outer (x,y) = rhs ==> (iteration (in (outer (tuple-p x y)) rhs)) + reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) + parse_pipe_lt(ps) + emit(ps, mark, K"outer") + end + end + if peek_skip_newline_in_gen(ps) in KSet"= in ∈" + bump(ps, TRIVIA_FLAG) + parse_pipe_lt(ps) + else + # Recovery heuristic + recover(ps, error="invalid iteration spec: expected one of `=` `in` or `∈`") do ps, k + k in KSet", NewlineWs" || is_closing_token(ps, k) + end + # Or try parse_pipe_lt ??? + end + emit(ps, mark, K"in") +end + +# Parse an iteration spec, or a comma separate list of such for for loops and +# generators +function parse_iteration_specs(ps::ParseState) + mark = position(ps) + n_iters = parse_comma_separated(ps, parse_iteration_spec) + emit(ps, mark, K"iteration") +end + +# flisp: parse-space-separated-exprs +function parse_space_separated_exprs(ps::ParseState) + ps = with_space_sensitive(ps) + n_sep = 0 + while true + k = peek(ps) + if is_closing_token(ps, k) || k == K"NewlineWs" || + (ps.for_generator && k == K"for") + break + end + parse_eq(ps) + n_sep += 1 + end + return n_sep +end + +# like parse-arglist, but with `for` parsed as a generator +# +# flisp: parse-call-arglist +function parse_call_arglist(ps::ParseState, closer) + ps = ParseState(ps, for_generator=true) + + parse_brackets(ps, closer, false) do _, _, _, _ + return (needs_parameters=true,) + end +end + +# Parse the suffix of comma-separated array expressions such as +# [x, suffix]. Consumes `closer`, but does not emit the AST node for the +# surrounding brackets. +# +# flisp: parse-vect +function parse_vect(ps::ParseState, closer, prefix_trailing_comma) + # [x, y] ==> (vect x y) + # [x, y] ==> (vect x y) + # [x,y ; z] ==> (vect x y (parameters z)) + # [x=1, y=2] ==> (vect (= x 1) (= y 2)) + # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) + opts = parse_brackets(ps, closer) do _, _, _, num_subexprs + return (needs_parameters=true, + num_subexprs=num_subexprs) + end + delim_flags = opts.delim_flags + if opts.num_subexprs == 0 && prefix_trailing_comma + delim_flags |= TRAILING_COMMA_FLAG + end + return (K"vect", delim_flags) +end + +# Parse generators +# +# We represent generators quite differently from `Expr`: +# * Iteration variables and their iterators are grouped within K"iteration" +# nodes, as in the short form of `for` loops. +# * The `generator` kind is used for both cartesian and flattened generators +# +# (x for a in as for b in bs) ==> (parens (generator x (iteration (in a as)) (iteration (in b bs)))) +# (x for a in as, b in bs) ==> (parens (generator x (iteration (in a as) (in b bs)))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (iteration (in a as) (in b bs)) z))) +# +# flisp: parse-generator +function parse_generator(ps::ParseState, mark) + while (t = peek_token(ps); kind(t) == K"for") + if !preceding_whitespace(t) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (iteration (in x xs)))) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="Expected space before `for` in generator") + end + bump(ps, TRIVIA_FLAG) + iter_mark = position(ps) + parse_iteration_specs(ps) + if peek(ps) == K"if" + # (x for a in as if z) ==> (parens (generator x (filter (iteration (in a as)) z))) + bump(ps, TRIVIA_FLAG) + parse_cond(ps) + emit(ps, iter_mark, K"filter") + end + end + emit(ps, mark, K"generator") +end + +# flisp: parse-comprehension +function parse_comprehension(ps::ParseState, mark, closer) + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) + ps = ParseState(ps, whitespace_newline=true, + space_sensitive=false, + end_symbol=false) + parse_generator(ps, mark) + bump_closing_token(ps, closer) + return (K"comprehension", EMPTY_FLAGS) +end + +# Parse array concatenation syntax with multiple semicolons +# +# Normal matrix construction syntax +# [x y ; z w] ==> (vcat (row x y) (row z w)) +# [x y ; z w ; a b] ==> (vcat (row x y) (row z w) (row a b)) +# [x ; y ; z] ==> (vcat x y z) +# [x;] ==> (vcat x) +# [x y] ==> (hcat x y) +# +# Mismatched rows +# [x y ; z] ==> (vcat (row x y) z) +# +# Single elements in rows +#v1.7: [x ; y ;; z ] ==> (ncat-2 (nrow-1 x y) z) +#v1.7: [x y ;;; z ] ==> (ncat-3 (row x y) z) +# +# Higher dimensional ncat +# Row major +#v1.7: [x y ; z w ;;; a b ; c d] ==> +# (ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d))) +# Column major +#v1.7: [x ; y ;; z ; w ;;; a ; b ;; c ; d] ==> +# (ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d))) +# +# flisp: parse-array +function parse_array(ps::ParseState, mark, closer, end_is_symbol) + ps = ParseState(ps, end_symbol=end_is_symbol) + + array_order = Ref(:unknown) + # Outer array parsing loop - parse chain of separators with descending + # precedence such as + #v1.7: [a ; b ;; c ;;; d ;;;; e] ==> (ncat-4 (ncat-3 (ncat-2 (ncat-1 a b) c) d) e) + # + # Ascending and equal precedence is handled by parse_array_inner. + # + # This is a variant of a Pratt parser, but we have a separate outer loop + # because there's no minimum precedence/binding power - you can always get + # a lower binding power by adding more semicolons. + # + # For an excellent overview of Pratt parsing, see + # https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html + (dim, binding_power) = parse_array_separator(ps, array_order) + if binding_power == typemin(Int) + # [x@y ==> (hcat x (error-t ✘ y)) + bump_closing_token(ps, closer) + return (K"hcat", 0) + end + while true + (next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order) + if next_bp == typemin(Int) + break + end + if binding_power == 0 + emit(ps, mark, K"row") + else + emit(ps, mark, K"nrow", set_numeric_flags(dim)) + end + dim = next_dim + binding_power = next_bp + end + bump_closing_token(ps, closer) + return binding_power == -1 ? (K"vcat", 0) : + binding_power == 0 ? (K"hcat", 0) : + (K"ncat", dim) +end + +# Parse equal and ascending precedence chains of array concatenation operators - +# semicolons, newlines and whitespace. Invariants: +# +# * The caller must have already consumed +# - The left hand side +# - The concatenation operator, providing `binding_power`. +# So eg, we're here in the input stream, either at an element or closing token +# | +# [a ;; b ; c ] +# [a ;; ] +# +# * The caller must call emit() to delimit the AST node for this binding power. +# +function parse_array_inner(ps, binding_power, array_order) + mark = NO_POSITION + dim = -1 + bp = binding_power + while true + if bp < binding_power + return (dim, bp) + end + # Allow trailing separators + # [a ;] ==> (vcat a) + # [a ; b;;] ==> (ncat-2 (nrow-1 a b)) + if is_closing_token(ps, peek(ps)) + return (typemin(Int), typemin(Int)) + end + if bp == binding_power + # Parse one expression + mark = position(ps) + parse_eq_star(ps) + (next_dim, next_bp) = parse_array_separator(ps, array_order) + else # bp > binding_power + # Recurse to parse a separator with greater binding power. Eg: + # [a ;; b ; c ] + # | ^------ the next input is here + # '---------- the mark is here + (next_dim, next_bp) = parse_array_inner(ps, bp, array_order) + if bp == 0 + emit(ps, mark, K"row") + else + emit(ps, mark, K"nrow", set_numeric_flags(dim)) + end + end + dim, bp = next_dim, next_bp + end +end + +# Parse a separator in an array concatenation +# +# Here we return a tuple (dim, binding_power) containing +# * Dimension on which the next separator acts +# * Binding power (precedence) of the separator, where whitespace binds +# tightest: ... < `;;;` < `;;` < `;`,`\n` < whitespace. We choose binding +# power of 0 for whitespace and negative numbers for other separators. +# +function parse_array_separator(ps, array_order) + sep_mismatch_err = "cannot mix space and ;; separators in an array expression, except to wrap a line" + mark = position(ps) + t = peek_token(ps, skip_newlines=true) + if kind(t) == K";" + # Newlines before semicolons are not significant + # [a \n ;] ==> (vcat a) + bump_trivia(ps) + n_semis = 1 + while true + bump(ps, TRIVIA_FLAG) + t = peek_token(ps) + if kind(t) != K";" + break + end + if preceding_whitespace(t) + bump_disallowed_space(ps) + end + n_semis += 1 + end + had_newline = peek(ps) == K"NewlineWs" + # Newlines after semicolons are not significant + # [a ; \n] ==> (vcat a) + # [a ; \n\n b] ==> (vcat a b) + #v1.7: [a ;; \n b] ==> (ncat-2 a b) + bump_trivia(ps) + if n_semis == 2 + if array_order[] === :row_major + if had_newline + # In hcat with spaces as separators, `;;` is a line + # continuation character + #v1.7: [a b ;; \n c] ==> (hcat a b c) + #v1.7: [a b \n ;; c] ==> (ncat-2 (row a b (error-t)) c) + return (2, 0) + else + # Can't mix spaces and multiple ;; + #v1.7: [a b ;; c] ==> (ncat-2 (row a b (error-t)) c) + emit(ps, mark, K"error", TRIVIA_FLAG, error=sep_mismatch_err) + end + else + array_order[] = :column_major + end + end + return (n_semis, -n_semis) + end + t = peek_token(ps) + k = kind(t) + if k == K"NewlineWs" + bump_trivia(ps) + if peek(ps) == K"]" + # Linebreaks not significant before closing `]` + # [a b\n\n] ==> (hcat a b) + return (typemin(Int), typemin(Int)) + else + # Treat a linebreak prior to a value as a semicolon (ie, separator + # for the first dimension) if no previous semicolons observed + # [a \n b] ==> (vcat a b) + return (1, -1) + end + elseif k == K"," + # Treat `,` as semicolon for the purposes of recovery + # [a; b, c] ==> (vcat a b (error-t) c) + bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression") + return (1, -1) + else + if preceding_whitespace(t) && !is_closing_token(ps, k) + if array_order[] === :column_major + # Can't mix multiple ;'s and spaces + #v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c)) + bump_trivia(ps, TRIVIA_FLAG, error=sep_mismatch_err) + else + array_order[] = :row_major + end + return (2, 0) + else + # Something else; use typemin to exit array parsing + return (typemin(Int), typemin(Int)) + end + end +end + +# Parse array concatenation/construction/indexing syntax inside of `[]` or `{}`. +# The opening bracket has been consumed. +# +# flisp: parse-cat +function parse_cat(ps::ParseState, closer, end_is_symbol) + ps = ParseState(ps, range_colon_enabled=true, + space_sensitive=true, + where_enabled=true, + whitespace_newline=false, + for_generator=true) + k = peek(ps, skip_newlines=true) + mark = position(ps) + if k == closer + # [] ==> (vect) + ckind, cflags = parse_vect(ps, closer, false) + return (ckind, cflags, 0) + elseif k == K";" + #v1.8: [;] ==> (ncat-1) + #v1.8: [;;] ==> (ncat-2) + #v1.8: [\n ;; \n ] ==> (ncat-2) + #v1.7: [;;] ==> (ncat-2 (error)) + bump_trivia(ps) + dim, _ = parse_array_separator(ps, Ref(:unknown)) + min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax") + bump_closing_token(ps, closer) + return (K"ncat", EMPTY_FLAGS, dim) + end + parse_eq_star(ps) + k = peek(ps, skip_newlines=true) + if k == K"," || (is_closing_token(ps, k) && k != K";") + prefix_trailing_comma = k == K"," + if prefix_trailing_comma + # [x,] ==> (vect x) + bump(ps, TRIVIA_FLAG; skip_newlines = true) + end + # [x] ==> (vect x) + # [x \n ] ==> (vect x) + # [x ==> (vect x (error-t)) + ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma) + return (ckind, cflags, 0) + elseif k == K"for" + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) + # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) + ckind, cflags = parse_comprehension(ps, mark, closer) + return (ckind, cflags, 0) + else + # [x y] ==> (hcat x y) + # and other forms; See parse_array. + ckind, dim = parse_array(ps, mark, closer, end_is_symbol) + return (ckind, EMPTY_FLAGS, dim) + end +end + +function check_ncat_compat(ps, mark, k) + # https://github.com/JuliaLang/julia/pull/33697 + if k == K"ncat" + min_supported_version(v"1.7", ps, mark, "multidimensional array syntax") + end +end + +# Parse un-prefixed parenthesized syntax. This is hard because parentheses are +# *very* overloaded! +# +# flisp: parse-paren / parse-paren- +function parse_paren(ps::ParseState, check_identifiers=true, has_unary_prefix=false) + ps = ParseState(ps, range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + whitespace_newline=true) + mark = position(ps) + @check peek(ps) == K"(" + bump(ps, TRIVIA_FLAG) # K"(" + after_paren_mark = position(ps) + (isdot, tok) = peek_dotted_op_token(ps) + k = kind(tok) + if k == K")" + # () ==> (tuple-p) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG) + elseif is_syntactic_operator(k) + # allow :(=) etc in unchecked contexts, eg quotes + # :(=) ==> (quote-: (parens =)) + parse_atom(ps, check_identifiers) + bump_closing_token(ps, K")") + emit(ps, mark, K"parens") + elseif !check_identifiers && k == K"::" && + peek(ps, 2, skip_newlines=true) == K")" + # allow :(::) as a special case + # :(::) ==> (quote-: (parens ::)) + bump(ps) + bump(ps, TRIVIA_FLAG, skip_newlines=true) + emit(ps, mark, K"parens") + else + # Deal with all other cases of tuple or block syntax via the generic + # parse_brackets + initial_semi = peek(ps) == K";" + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + is_tuple = had_commas || (had_splat && num_semis >= 1) || + (initial_semi && (num_semis == 1 || num_subexprs > 0)) || + (peek(ps, 2) == K"->" && (peek_behind(ps).kind != K"where" && !has_unary_prefix)) + return (needs_parameters=is_tuple, + is_tuple=is_tuple, + is_block=num_semis > 0) + end + if opts.is_tuple + # Tuple syntax with commas + # (x,) ==> (tuple-p x) + # (x,y) ==> (tuple-p x y) + # (x=1, y=2) ==> (tuple-p (= x 1) (= y 2)) + # + # Named tuple with initial semicolon + # (;) ==> (tuple-p (parameters)) + # (; a=1) ==> (tuple-p (parameters (= a 1))) + # + # Extra credit: nested parameters and frankentuples + # (x...;) ==> (tuple-p (... x) (parameters)) + # (x...; y) ==> (tuple-p (... x) (parameters y)) + # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) + # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) + # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) + elseif opts.is_block + # Blocks + # (;;) ==> (block-p) + # (a=1;) ==> (block-p (= a 1)) + # (a;b;;c) ==> (block-p a b c) + # (a=1; b=2) ==> (block-p (= a 1) (= b 2)) + emit(ps, mark, K"block", PARENS_FLAG) + else + # Parentheses used for grouping + # (a * b) ==> (parens (call-i * a b)) + # (a=1) ==> (parens (= a 1)) + # (x) ==> (parens x) + # (a...) ==> (parens (... a)) + emit(ps, mark, K"parens") + end + end +end + +# Handle bracketed syntax inside any of () [] or {} where there's a mixture +# of commas and semicolon delimiters. +# +# For parentheses this is tricky because there's various cases to disambiguate, +# depending on outside context and the content of the brackets (number of +# semicolons, presence of commas or splats). The `after_parse` function must be +# provided by the caller to disambiguate these cases. +# +# Expressions (X; Y; Z) with more semicolons are also allowed by the flisp +# parser and generally parse as nested parameters blocks. This is invalid Julia +# syntax so the parse tree is pretty strange in these cases! Some macros +# probably use it though. Example: +# +# (a,b=1; c,d=2; e,f=3) ==> (tuple-p a (= b 1) (parameters c (= d 2)) (parameters e (= f 3))) +# +# flisp: parts of parse-paren- and parse-arglist +function parse_brackets(after_parse::Function, + ps::ParseState, closing_kind, generator_is_last=true) + ps = ParseState(ps, range_colon_enabled=true, + space_sensitive=false, + where_enabled=true, + whitespace_newline=true) + params_positions = acquire_positions(ps.stream) + last_eq_before_semi = 0 + num_subexprs = 0 + num_semis = 0 + had_commas = false + had_splat = false + param_start = nothing + trailing_comma = false + while true + k = peek(ps) + if k == closing_kind + break + elseif k == K";" + # Start of parameters list + # a, b; c d ==> a b (parameters c d) + if !isnothing(param_start) + push!(params_positions, emit(ps, param_start, K"TOMBSTONE")) + end + num_semis += 1 + param_start = position(ps) + bump(ps, TRIVIA_FLAG) + bump_trivia(ps) + elseif is_closing_token(ps, k) + trailing_comma = false + # Error; handled below in bump_closing_token + break + else + mark = position(ps) + parse_eq_star(ps) + trailing_comma = false + num_subexprs += 1 + if num_subexprs == 1 + had_splat = peek_behind(ps).kind == K"..." + end + k = peek(ps, skip_newlines=true) + if k == K"for" + # Generator syntax + # (x for a in as) ==> (parens (generator x (iteration (in a as)))) + parse_generator(ps, mark) + if generator_is_last + break + end + k = peek(ps, skip_newlines=true) + end + if k == K"," + had_commas = true + bump(ps, TRIVIA_FLAG) + trailing_comma = true + elseif k == K";" || k == closing_kind + # Handled above + continue + else + # Error - recovery done when consuming closing_kind + break + end + end + end + if !isnothing(param_start) && position(ps) != param_start + push!(params_positions, emit(ps, param_start, K"TOMBSTONE")) + end + opts = after_parse(had_commas, had_splat, num_semis, num_subexprs) + # Emit nested parameter nodes if necessary + if opts.needs_parameters + for pos in params_positions + reset_node!(ps, pos, kind=K"parameters") + end + end + release_positions(ps.stream, params_positions) + bump_closing_token(ps, closing_kind, " or `,`") + return (; opts..., delim_flags=trailing_comma ? TRAILING_COMMA_FLAG : EMPTY_FLAGS) +end + +_is_indentation(b::UInt8) = (b == u8" " || b == u8"\t") + +# Parse a string, embedded interpolations and deindent triple quoted strings +# by marking indentation characters as whitespace trivia. +# +# flisp: parse-string-literal-, parse-interpolate +function parse_string(ps::ParseState, raw::Bool) + mark = position(ps) + delim_k = peek(ps) + triplestr = delim_k in KSet"\"\"\" ```" + string_chunk_kind = delim_k in KSet"\" \"\"\"" ? K"String" : K"CmdString" + indent_ref_i = 0 + indent_ref_len = typemax(Int) + indent_chunks = acquire_positions(ps.stream) + txtbuf = unsafe_textbuf(ps) + chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS + bump(ps, TRIVIA_FLAG) + first_chunk = true + n_nontrivia_chunks = 0 + removed_initial_newline = false + had_interpolation = false + prev_chunk_newline = false + while true + t = peek_full_token(ps) + k = kind(t) + if k == K"$" + if raw + # FIXME: This case is actually a tokenization error: + # The `K"$"` token should not occur when a raw string + # is being parsed, but this would require the lexer to know + # about the parse state. (see also parse_atom) + break + end + if prev_chunk_newline + # """\n$x\n a""" ==> (string-s x "\n" " a") + indent_ref_i = first_byte(t) + indent_ref_len = 0 + end + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"(" + # "a $(x + y) b" ==> (string "a " (parens (call-i x + y)) " b") + # "hi$("ho")" ==> (string "hi" (parens (string "ho"))) + m = position(ps) + bump(ps, TRIVIA_FLAG) + opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs + return (needs_parameters=false, + simple_interp=!had_commas && num_semis == 0 && num_subexprs == 1) + end + if !opts.simple_interp || peek_behind(ps, skip_parens=false).kind == K"generator" + # "$(x,y)" ==> (string (parens (error x y))) + emit(ps, m, K"error", error="invalid interpolation syntax") + end + emit(ps, m, K"parens") + elseif k == K"var" + # var identifiers disabled in strings + # "$var" ==> (string var) + bump(ps, remap_kind=K"Identifier") + elseif k == K"Identifier" || is_keyword(k) || is_word_operator(k) + # "a $foo b" ==> (string "a " foo " b") + # "$outer" ==> (string outer) + # "$in" ==> (string in) + parse_atom(ps) + else + bump_invisible(ps, K"error", + error="identifier or parenthesized expression expected after \$ in string") + end + first_chunk = false + n_nontrivia_chunks += 1 + had_interpolation = true + prev_chunk_newline = false + elseif k == string_chunk_kind + if triplestr && first_chunk && span(t) <= 2 && + begin + s = span(t) + b = txtbuf[last_byte(t)] + # Test whether the string is a single logical newline + (s == 1 && (b == u8"\n" || b == u8"\r")) || + (s == 2 && (txtbuf[first_byte(t)] == u8"\r" && b == u8"\n")) + end + # First line of triple string is a newline only: mark as trivia. + # """\nx""" ==> (string-s "x") + # """\n\nx""" ==> (string-s "\n" "x") + bump(ps, TRIVIA_FLAG) + first_chunk = false + prev_chunk_newline = true + else + if triplestr + # Triple-quoted dedenting: + # Various newlines (\n \r \r\n) and whitespace (' ' \t) + # """\n x\n y""" ==> (string-s "x\n" "y") + # ```\n x\n y``` ==> (macrocall :(Core.var"@cmd") (cmdstring-s-r "x\n" "y")) + # """\r x\r y""" ==> (string-s "x\n" "y") + # """\r\n x\r\n y""" ==> (string-s "x\n" "y") + # Spaces or tabs or mixtures acceptable + # """\n\tx\n\ty""" ==> (string-s "x\n" "y") + # """\n \tx\n \ty""" ==> (string-s "x\n" "y") + # + # Mismatched tab vs space not deindented + # Find minimum common prefix in mismatched whitespace + # """\n\tx\n y""" ==> (string-s "\tx\n" " y") + # """\n x\n y""" ==> (string-s "x\n" " y") + # """\n x\n y""" ==> (string-s " x\n" "y") + # """\n \tx\n y""" ==> (string-s "\tx\n" " y") + # """\n x\n \ty""" ==> (string-s " x\n" "\ty") + # + # Empty lines don't affect dedenting + # """\n x\n\n y""" ==> (string-s "x\n" "\n" "y") + # Non-empty first line doesn't participate in deindentation + # """ x\n y""" ==> (string-s " x\n" "y") + # + # Dedenting and interpolations + # """\n $a\n $b""" ==> (string-s a "\n" b) + # """\n $a \n $b""" ==> (string-s a " \n" b) + # """\n $a\n $b\n""" ==> (string-s " " a "\n" " " b "\n") + # + if prev_chunk_newline && (b = txtbuf[first_byte(t)]; + b != u8"\n" && b != u8"\r") + # Compute length of longest common prefix of mixed + # spaces and tabs, in bytes + # + # Initial whitespace is never regarded as indentation + # in any triple quoted string chunk, as it's always + # preceded in the source code by a visible token of + # some kind; either a """ delimiter or $() + # interpolation. + if indent_ref_i == 0 + # No indentation found yet. Find indentation we'll + # use as a reference + i = first_byte(t) - 1 + while i < last_byte(t) && _is_indentation(txtbuf[i+1]) + i += 1 + end + indent_ref_i = first_byte(t) + indent_ref_len = i - first_byte(t) + 1 + else + # Matching the current indentation with reference, + # shortening length if necessary. + j = 0 + while j < span(t) && j < indent_ref_len + if txtbuf[j + first_byte(t)] != txtbuf[j + indent_ref_i] + break + end + j += 1 + end + indent_ref_len = min(indent_ref_len, j) + end + # Prepare a place for indentiation trivia, if necessary + push!(indent_chunks, bump_invisible(ps, K"TOMBSTONE")) + end + b = txtbuf[last_byte(t)] + prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r') + end + bump(ps, chunk_flags) + first_chunk = false + n_nontrivia_chunks += 1 + end + elseif k == K"ErrorInvalidInterpolationTerminator" || + k == K"ErrorBidiFormatting" || + k == K"ErrorInvalidUTF8" + # Treat these errors as string chunks + bump(ps) + n_nontrivia_chunks += 1 + else + break + end + end + had_end_delim = peek(ps) == delim_k + if triplestr && prev_chunk_newline && had_end_delim + # Newline at end of string + # """\n x\n y\n""" ==> (string-s " x\n" " y\n") + indent_ref_len = 0 + end + if triplestr && indent_ref_len > 0 + for pos in indent_chunks + reset_node!(ps, pos, kind=K"Whitespace", flags=TRIVIA_FLAG) + rhs_empty = steal_token_bytes!(ps, pos, indent_ref_len) + if rhs_empty + # Empty chunks after dedent are removed + # """\n \n """ ==> (string-s "\n") + n_nontrivia_chunks -= 1 + end + end + end + release_positions(ps.stream, indent_chunks) + if had_end_delim + if n_nontrivia_chunks == 0 + # Empty strings, or empty after triple quoted processing + # "" ==> (string "") + # """\n """ ==> (string-s "") + bump_invisible(ps, string_chunk_kind, chunk_flags) + end + bump(ps, TRIVIA_FLAG) + else + # Missing delimiter recovery + # "str ==> (string "str" (error-t)) + bump_invisible(ps, K"error", TRIVIA_FLAG, error="unterminated string literal") + end + # String interpolations + # "$x$y$z" ==> (string x y z) + # "$(x)" ==> (string (parens x)) + # "$x" ==> (string x) + # """$x""" ==> (string-s x) + # + # Strings with embedded whitespace trivia + # "a\\\nb" ==> (string "a" "b") + # "a\\\rb" ==> (string "a" "b") + # "a\\\r\nb" ==> (string "a" "b") + # "a\\\n \tb" ==> (string "a" "b") + # + # Strings with only a single valid string chunk + # "str" ==> (string "str") + # "a\\\n" ==> (string "a") + # "a\\\r" ==> (string "a") + # "a\\\r\n" ==> (string "a") + string_kind = delim_k in KSet"\" \"\"\"" ? K"string" : K"cmdstring" + str_flags = (triplestr ? TRIPLE_STRING_FLAG : EMPTY_FLAGS) | + (raw ? RAW_STRING_FLAG : EMPTY_FLAGS) + emit(ps, mark, string_kind, str_flags) +end + +function emit_braces(ps, mark, ckind, cflags, dim=0) + if ckind == K"hcat" + # {x y} ==> (bracescat (row x y)) + emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG) + elseif ckind == K"ncat" + # {x ;;; y} ==> (bracescat (nrow-3 x y)) + emit(ps, mark, K"nrow", set_numeric_flags(dim)) + end + check_ncat_compat(ps, mark, ckind) + outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" + delim_flags = outk == K"braces" ? (cflags & TRAILING_COMMA_FLAG) : EMPTY_FLAGS + emit(ps, mark, outk, delim_flags) +end + +# parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. +# +# If `check_identifiers` is true, identifiers are disallowed from being one of +# the syntactic operators or closing tokens. +# +# flisp: parse-atom +function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=false) + bump_trivia(ps) + mark = position(ps) + (leading_dot, leading_tok) = peek_dotted_op_token(ps) + leading_kind = kind(leading_tok) + # todo: Reorder to put most likely tokens first? + if leading_dot + is_operator(leading_kind) && @goto is_operator + bump(ps, remap_kind=K"Identifier") + if check_identifiers + # . ==> (error .) + emit(ps, mark, K"error", error="invalid identifier") + end + elseif is_error(leading_kind) + # Errors for bad tokens are emitted in validate_tokens() rather than + # here. + bump(ps) + elseif leading_kind == K"'" + # char literal + bump(ps, TRIVIA_FLAG) + k = peek(ps) + if k == K"'" + # '' ==> (char (error)) + bump_invisible(ps, K"error", error="empty character literal") + bump(ps, TRIVIA_FLAG) + elseif k == K"EndMarker" + # ' ==> (char (error)) + bump_invisible(ps, K"error", error="unterminated character literal") + else + if k == K"Char" + bump(ps) + elseif is_error(k) + bump(ps) + else + # FIXME: This case is actually a tokenization error. + # Make a best-effort attempt to workaround this for now by + # remapping the kind. This needs to be fixed by rewinding the + # tokenizer's buffer and re-tokenizing the next token as a + # char. (A lot of work for a very obscure edge case) + # + # x in'c' ==> (call-i x in (char 'c')) + bump(ps, remap_kind=K"Char") + end + if peek(ps) == K"'" + # 'a' ==> (char 'a') + # 'α' ==> (char 'α') + # '\xce\xb1' ==> (char 'α') + bump(ps, TRIVIA_FLAG) + else + # 'a ==> (char 'a' (error-t)) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="unterminated character literal") + end + end + emit(ps, mark, K"char") + elseif leading_kind == K"Char" + # FIXME: This is a tokenization error and should be preceded with + # K"'". However this workaround is better than emitting a bare Char. + bump(ps, remap_kind=K"Identifier") + elseif leading_kind == K":" + # symbol/expression quote + # :foo ==> (quote-: foo) + t = peek_token(ps, 2) + k = kind(t) + if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t)) + # : is a literal colon in some circumstances + # :) ==> : + # : end ==> : + bump(ps) # K":" + return + end + bump(ps, TRIVIA_FLAG) # K":" + if preceding_whitespace(t) + # : foo ==> (quote-: (error-t) foo) + # :\nfoo ==> (quote-: (error-t) foo) + bump_trivia(ps, TRIVIA_FLAG, + error="whitespace not allowed after `:` used for quoting") + end + # Being inside quote makes keywords into identifiers at the + # first level of nesting + # :end ==> (quote-: end) + # :(end) ==> (quote-: (parens (error-t))) + # Being inside quote makes end non-special again (issue #27690) + # a[:(end)] ==> (ref a (quote-: (error-t end))) + parse_atom(ParseState(ps, end_symbol=false), false) + emit(ps, mark, K"quote", COLON_QUOTE) + elseif check_identifiers && leading_kind == K"=" && is_plain_equals(peek_token(ps)) && !leading_dot + # = ==> (error =) + bump(ps, error="unexpected `=`") + elseif leading_kind == K"Identifier" + # xx ==> xx + # x₁ ==> x₁ + bump(ps) + elseif is_word_operator(leading_kind) + # where=1 ==> (= where 1) + bump(ps, remap_kind=K"Identifier") + elseif is_operator(leading_kind) +@label is_operator + # + ==> + + # .+ ==> (. +) + bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind= + is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") + if check_identifiers && !is_valid_identifier(leading_kind) + # += ==> (error (op= +)) + # ? ==> (error ?) + # .+= ==> (error (. (op= +))) + emit(ps, mark, K"error", error="invalid identifier") + else + # Quoted syntactic operators allowed + # :+= ==> (quote-: (op= +)) + end + elseif is_keyword(leading_kind) + if leading_kind == K"var" && (t = peek_token(ps,2); + kind(t) == K"\"" && !preceding_whitespace(t)) + # var"x" ==> (var x) + # Raw mode unescaping + # var"" ==> (var ) + # var"\"" ==> (var ") + # var"\\"" ==> (var \") + # var"\\x" ==> (var \\x) + # + # NB: Triple quoted var identifiers are not implemented, but with + # the complex deindentation rules they seem like a misfeature + # anyway, maybe? + # var"""x""" !=> x + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + if peek(ps) == K"String" + bump(ps, RAW_STRING_FLAG; remap_kind=K"Identifier") + else + bump_invisible(ps, K"Identifier", RAW_STRING_FLAG) + end + if peek(ps) == K"\"" + bump(ps, TRIVIA_FLAG) + else + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="unterminated `var\"\"` identifier") + end + t = peek_token(ps) + k = kind(t) + if preceding_whitespace(t) || is_operator(k) || + k in KSet"( ) [ ] { } , ; @ EndMarker" + # var"x"+ ==> x + # var"x") ==> x + # var"x"( ==> x + elseif is_string_macro_suffix(k) + # var"x"end ==> (var x (error-t)) + # var"x"1 ==> (var x (error-t)) + # var"x"y ==> (var x (error-t)) + bump(ps, TRIVIA_FLAG, error="suffix not allowed after `var\"...\"` syntax") + elseif k == K"`" || k == K"\"" || k == K"\"\"\"" || k == K"```" + # Disallow `var"#""str". To allow this we'd need to fix `raw` + # detection in lex_quote to be consistent with the parser. + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="`var\"...\"` syntax not supported as string macro name") + end + emit(ps, mark, K"var") + elseif check_identifiers && is_closing_token(ps, leading_kind) + # :(end) ==> (quote-: (error end)) + bump(ps, error="invalid identifier") + else + # Remap keywords to identifiers. + # :end ==> (quote-: end) + # :<: ==> (quote-: <:) + bump(ps, remap_kind=K"Identifier") + end + elseif leading_kind == K"(" # parens or tuple + parse_paren(ps, check_identifiers, has_unary_prefix) + elseif leading_kind == K"[" # cat expression + bump(ps, TRIVIA_FLAG) + ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol) + emit(ps, mark, ckind, cflags | set_numeric_flags(dim)) + check_ncat_compat(ps, mark, ckind) + elseif leading_kind == K"{" # cat expression + bump(ps, TRIVIA_FLAG) + ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol) + emit_braces(ps, mark, ckind, cflags, dim) + elseif leading_kind == K"@" # macro call + # Macro names can be keywords + # @end x ==> (macrocall (macro_name end) x) + bump(ps, TRIVIA_FLAG) + parse_macro_name(ps) + parse_call_chain(ps, mark, true) + elseif is_string_delim(leading_kind) + parse_string(ps, false) + elseif leading_kind in KSet"` ```" + # `` ==> (cmdstring-r "") + # `cmd` ==> (cmdstring-r "cmd") + # ```cmd``` ==> (cmdstring-s-r "cmd") + parse_string(ps, true) + elseif is_literal(leading_kind) + # 42 ==> 42 + bump(ps) + elseif is_closing_token(ps, leading_kind) + # Leave closing token in place for other productions to + # recover with + # ) ==> error + msg = leading_kind == K"EndMarker" ? + "premature end of input" : + "unexpected `$(untokenize(leading_kind))`" + emit_diagnostic(ps, error=msg) + bump_invisible(ps, K"error") + else + bump(ps, error="invalid syntax atom") + end +end diff --git a/JuliaSyntax/src/julia/parser_api.jl b/JuliaSyntax/src/julia/parser_api.jl new file mode 100644 index 0000000000000..a3e2162bc985b --- /dev/null +++ b/JuliaSyntax/src/julia/parser_api.jl @@ -0,0 +1,223 @@ +# The main parser API. +# +# This is defined separately from parser.jl so that: +# * parser.jl doesn't need to refer to any tree data structures +# * It's clear which parts are the public API + +struct ParseError <: Exception + source::SourceFile + diagnostics::Vector{Diagnostic} + incomplete_tag::Symbol # Used only for Base Expr(:incomplete) support +end + +function ParseError(stream::ParseStream; incomplete_tag=:none, kws...) + source = SourceFile(stream; kws...) + ParseError(source, stream.diagnostics, incomplete_tag) +end + +function Base.showerror(io::IO, err::ParseError) + # Only show the first parse error for now - later errors are often + # misleading due to the way recovery works + i = findfirst(is_error, err.diagnostics) + if isnothing(i) + i = lastindex(err.diagnostics) + level_info = " some warnings detected:" + else + level_info = "" + end + println(io, "ParseError:", level_info) + show_diagnostics(io, err.diagnostics[1:i], err.source) +end + +sourcefile(err::ParseError) = err.source + +""" + parse!(stream::ParseStream; rule=:all) + +Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data +structures may be extracted from `stream` with the [`build_tree`](@ref) function. + +`rule` may be any of +* `:all` (default) — parse a whole "file" of top level statements. In this + mode, the parser expects to fully consume the input. +* `:statement` — parse a single statement, or statements separated by semicolons. +* `:atom` — parse a single syntax "atom": a literal, identifier, or + parenthesized expression. +""" +function parse!(stream::ParseStream; rule::Symbol=:all) + if rule == :toplevel + Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!) + rule = :all + end + ps = ParseState(stream) + if rule === :all + parse_toplevel(ps) + elseif rule === :statement + parse_stmts(ps) + elseif rule === :atom + parse_atom(ps) + else + throw(ArgumentError("Unknown grammar rule $rule")) + end + validate_tokens(stream) + stream +end + +""" + parse!(TreeType, io::IO; rule=:all, version=VERSION) + +Parse Julia source code from a seekable `IO` object. The output is a tuple +`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned +directly after the last byte which was consumed during parsing. +""" +function parse!(::Type{TreeType}, io::IO; + rule::Symbol=:all, version=VERSION, kws...) where {TreeType} + stream = ParseStream(io; version=version) + parse!(stream; rule=rule) + tree = build_tree(TreeType, stream; kws...) + seek(io, last_byte(stream)) + tree, stream.diagnostics +end + +function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, + ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, + ignore_warnings=ignore_errors, kws...) where {T} + stream = ParseStream(text, index; version=version) + if ignore_trivia && rule != :all + bump_trivia(stream, skip_newlines=true) + end + parse!(stream; rule=rule) + if need_eof + if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || + (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) + emit_diagnostic(stream, error="unexpected text after parsing $rule") + end + end + if (!ignore_errors && any_error(stream.diagnostics)) || + (!ignore_warnings && !isempty(stream.diagnostics)) + throw(ParseError(stream, filename=filename, first_line=first_line)) + end + tree = build_tree(T, stream; filename=filename, first_line=first_line, kws...) + tree, last_byte(stream) + 1 +end + +_parse_docs = """ + # Parse a single expression/statement + parsestmt(TreeType, text, [index]; + version=VERSION, + ignore_trivia=true, + filename=nothing, + ignore_errors=false, + ignore_warnings=ignore_errors) + + # Parse all statements at top level (file scope) + parseall(...) + + # Parse a single syntax atom + parseatom(...) + +Parse Julia source code string `text` into a data structure of type `TreeType`. +`parsestmt` parses a single Julia statement, `parseall` parses top level statements +at file scope and `parseatom` parses a single Julia identifier or other "syntax +atom". + +If `text` is passed without `index`, all the input text must be consumed and a +tree data structure is returned. When an integer byte `index` is passed, a +tuple `(tree, next_index)` will be returned containing the next index in `text` +to resume parsing. By default whitespace and comments before and after valid +code are ignored but you can turn this off by setting `ignore_trivia=false`. + +`version` (default `VERSION`) may be used to set the syntax version to +any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been +added after v"1.0", emitting an error if it's not compatible with the requested +`version`. + +Pass `filename` to set any file name information embedded within the output +tree, if applicable. This will also annotate errors and warnings with the +source file name. + +A `ParseError` will be thrown if any errors or warnings occurred during +parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To +also avoid exceptions due to errors, use `ignore_errors=true`. +""" + +"$_parse_docs" +parsestmt(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1] + +"$_parse_docs" +parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:all, true, T, text; kws...)[1] + +"$_parse_docs" +parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1] + +parsestmt(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...) +parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:all, false, T, text, index; kws...) +parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...) + +#------------------------------------------------------------------------------- +# Tokens interface +""" +Token type resulting from calling `tokenize(text)` + +Use +* `kind(tok)` to get the token kind +* `untokenize(tok, text)` to retrieve the text +* Predicates like `is_error(tok)` to query token categories and flags +""" +struct Token + head::SyntaxHead + range::UnitRange{UInt32} +end + +Token() = Token(SyntaxHead(K"None", EMPTY_FLAGS), 0:0) + +head(t::Token) = t.head + +""" + tokenize(text; operators_as_identifiers=true) + +Returns the tokenized UTF-8 encoded `text` as a vector of `Token`s. The +text for the token can be retrieved by using `untokenize()`. The full text can be +reconstructed with, for example, `join(untokenize.(tokenize(text), text))`. + +This interface works on UTF-8 encoded string or buffer data only. + +The keyword `operators_as_identifiers` specifies whether operators in +identifier-position should have `K"Identifier"` as their kind, or be emitted as +more specific operator kinds. For example, whether the `+` in `a + b` should be +emitted as `K"Identifier"` (the default) or as `K"+"`. +""" +function tokenize(text; operators_as_identifiers=true) + ps = ParseStream(text) + parse!(ps, rule=:all) + ts = ps.output + output_tokens = Token[] + byte_start::UInt32 = ps.output[1].byte_span + 1 + for i = 2:length(ts) + if kind(ts[i]) == K"TOMBSTONE" || is_non_terminal(ts[i]) + continue + end + r = byte_start:(byte_start+ts[i].byte_span - 1) + byte_start = last(r) + 1 + k = kind(ts[i]) + if k == K"Identifier" && !operators_as_identifiers + orig_k = ts[i].orig_kind + if is_operator(orig_k) && !is_word_operator(orig_k) + k = orig_k + end + end + f = flags(ts[i]) + push!(output_tokens, Token(SyntaxHead(k,f), r)) + end + output_tokens +end + +function untokenize(token::Token, text::AbstractString) + text[first(token.range):thisind(text, last(token.range))] +end + +function untokenize(token::Token, text::Vector{UInt8}) + text[token.range] +end + +@deprecate parse parsestmt diff --git a/JuliaSyntax/src/julia/tokenize.jl b/JuliaSyntax/src/julia/tokenize.jl new file mode 100644 index 0000000000000..2bd0f56df1b84 --- /dev/null +++ b/JuliaSyntax/src/julia/tokenize.jl @@ -0,0 +1,1314 @@ +module Tokenize + +export tokenize, untokenize + +using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline + +import ..JuliaSyntax: kind, + is_literal, is_contextual_keyword, is_word_operator + +#------------------------------------------------------------------------------- +# Character-based predicates for tokenization +import Base.Unicode + +const EOF_CHAR = typemax(Char) + +function is_identifier_char(c::Char) + c == EOF_CHAR && return false + isvalid(c) || return false + return Base.is_id_char(c) +end + +function is_identifier_start_char(c::Char) + c == EOF_CHAR && return false + isvalid(c) || return false + c == '🢲' && return false # First divergence from Base.is_id_start_char + return Base.is_id_start_char(c) +end + +function is_invisible_char(c::Char) + # These are the chars considered invisible by the reference parser. + # TODO: There's others we could add? See for example + # https://invisible-characters.com/ + return c == '\u00ad' || # soft hyphen + c == '\u200b' || # zero width space + c == '\u200c' || # zero width non-joiner + c == '\u200d' || # zero width joiner + c == '\u200e' || # left-to-right mark + c == '\u200f' || # right-to-left mark + c == '\u2060' || # word joiner + c == '\u2061' # function application + # https://github.com/JuliaLang/julia/issues/49850 + # c == '\u115f' || # Hangul Choseong filler +end + +# Chars that we will never allow to be part of a valid non-operator identifier +function is_never_id_char(ch::Char) + isvalid(ch) || return true + cat = Unicode.category_code(ch) + c = UInt32(ch) + return ( + # spaces and control characters: + (cat >= Unicode.UTF8PROC_CATEGORY_ZS && cat <= Unicode.UTF8PROC_CATEGORY_CS) || + + # ASCII and Latin1 non-connector punctuation + (c < 0xff && + cat >= Unicode.UTF8PROC_CATEGORY_PD && cat <= Unicode.UTF8PROC_CATEGORY_PO) || + + c == UInt32('`') || + + # mathematical brackets + (c >= 0x27e6 && c <= 0x27ef) || + # angle, corner, and lenticular brackets + (c >= 0x3008 && c <= 0x3011) || + # tortoise shell, square, and more lenticular brackets + (c >= 0x3014 && c <= 0x301b) || + # fullwidth parens + (c == 0xff08 || c == 0xff09) || + # fullwidth square brackets + (c == 0xff3b || c == 0xff3d) + ) +end + +readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) + +# Some unicode operators are normalized by the tokenizer into their equivalent +# kinds. See also normalize_identifier() +const _ops_with_unicode_aliases = [ + # \minus '−' is normalized into K"-", + '−' => K"-" + # Lookalikes which are normalized into K"⋅", + # https://github.com/JuliaLang/julia/pull/25157, + '\u00b7' => K"⋅" # '·' Middle Dot,, + '\u0387' => K"⋅" # '·' Greek Ano Teleia,, +] + +function _nondot_symbolic_operator_kinds() + op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") + setdiff(reinterpret.(Kind, op_range), [ + K"ErrorInvalidOperator" + K"Error**" + K"..." + K"." + K"where" + K"isa" + K"in" + K".'" + K"op=" + ]) +end + +function _char_in_set_expr(varname, firstchars) + codes = sort!(UInt32.(unique(firstchars))) + terms = [] + i = 1 + while i <= length(codes) + j = i + while j < length(codes) && codes[j+1] == codes[j]+1 + j += 1 + end + if i == j + push!(terms, :($varname == $(codes[i]))) + else + push!(terms, :($(codes[i]) <= $varname <= $(codes[j]))) + end + i = j+1 + end + foldr((t1,t2)->:($t1 || $t2), terms) +end + +@eval function is_operator_start_char(c) + if c == EOF_CHAR || !isvalid(c) + return false + end + u = UInt32(c) + return $(_char_in_set_expr(:u, + append!(first.(string.(_nondot_symbolic_operator_kinds())), + first.(_ops_with_unicode_aliases)))) +end + +# Checks whether a Char is an operator which can be prefixed with a dot `.` +function is_dottable_operator_start_char(c) + return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c) +end + +@eval function isopsuffix(c::Char) + c == EOF_CHAR && return false + isvalid(c) || return false + u = UInt32(c) + if (u < 0xa1 || u > 0x10ffff) + return false + end + cat = Base.Unicode.category_code(u) + if (cat == Base.Unicode.UTF8PROC_CATEGORY_MN || + cat == Base.Unicode.UTF8PROC_CATEGORY_MC || + cat == Base.Unicode.UTF8PROC_CATEGORY_ME) + return true + end + # Additional allowed cases + return $(_char_in_set_expr(:u, + collect("²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽꜛꜜꜝ"))) +end + +function optakessuffix(k) + (K"BEGIN_OPS" <= k <= K"END_OPS") && + !( + k == K"..." || + K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || + k == K"?" || + k == K"<:" || + k == K">:" || + k == K"&&" || + k == K"||" || + k == K"in" || + k == K"isa" || + k == K"≔" || + k == K"⩴" || + k == K":" || + k == K".." || + k == K"$" || + k == K"::" || + k == K"where" || + k == K"." || + k == K"!" || + k == K".'" || + k == K"->" || + K"¬" <= k <= K"∜" + ) +end + +const _unicode_ops = let + ks = _nondot_symbolic_operator_kinds() + ss = string.(ks) + + ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss) + if length(s) == 1 && !isascii(s[1])]) + for ck in _ops_with_unicode_aliases + push!(ops, ck) + end + ops +end + +#------------------------------------------------------------------------------- +# Tokens + +struct RawToken + kind::Kind + # Offsets into a string or buffer + startbyte::Int # The byte where the token start in the buffer + endbyte::Int # The byte where the token ended in the buffer + suffix::Bool +end +function RawToken(kind::Kind, startbyte::Int, endbyte::Int) + RawToken(kind, startbyte, endbyte, false) +end +RawToken() = RawToken(K"error", 0, 0, false) + +const EMPTY_TOKEN = RawToken() + +kind(t::RawToken) = t.kind + +startbyte(t::RawToken) = t.startbyte +endbyte(t::RawToken) = t.endbyte + + +function untokenize(t::RawToken, str::String) + String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) +end + +function Base.show(io::IO, t::RawToken) + print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) + print(io, rpad(kind(t), 15, " ")) +end + +#------------------------------------------------------------------------------- +# Lexer + +@inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') +@inline isbinary(c::Char) = c == '0' || c == '1' +@inline isoctal(c::Char) = '0' ≤ c ≤ '7' +@inline iswhitespace(c::Char) = (isvalid(c) && Base.isspace(c)) || c === '\ufeff' + +struct StringState + triplestr::Bool + raw::Bool + delim::Char + paren_depth::Int +end + +""" +`Lexer` reads from an input stream and emits a single token each time +`next_token` is called. + +Ideally a lexer is stateless but some state is needed here for: +* Disambiguating cases like x' (adjoint) vs 'x' (character literal) +* Tokenizing code within string interpolations +""" +mutable struct Lexer{IO_t <: IO} + io::IO_t + + token_startpos::Int + + last_token::Kind + string_states::Vector{StringState} + chars::Tuple{Char,Char,Char,Char} + charspos::Tuple{Int,Int,Int,Int} +end + +function Lexer(io::IO) + c1 = ' ' + p1 = position(io) + if eof(io) + c2, p2 = EOF_CHAR, p1 + c3, p3 = EOF_CHAR, p1 + c4, p4 = EOF_CHAR, p1 + else + c2 = read(io, Char) + p2 = position(io) + if eof(io) + c3, p3 = EOF_CHAR, p2 + c4, p4 = EOF_CHAR, p2 + else + c3 = read(io, Char) + p3 = position(io) + if eof(io) + c4, p4 = EOF_CHAR, p3 + else + c4 = read(io, Char) + p4 = position(io) + end + end + end + Lexer(io, position(io), + K"error", Vector{StringState}(), + (c1,c2,c3,c4), (p1,p2,p3,p4)) +end +Lexer(str::AbstractString) = Lexer(IOBuffer(str)) + +""" + tokenize(x) + +Returns an `Iterable` containing the tokenized input. Can be reverted by e.g. +`join(untokenize.(tokenize(x)))`. +""" +tokenize(x) = Lexer(x) + +# Iterator interface +Base.IteratorSize(::Type{<:Lexer}) = Base.SizeUnknown() +Base.IteratorEltype(::Type{<:Lexer}) = Base.HasEltype() +Base.eltype(::Type{<:Lexer}) = RawToken + + +function Base.iterate(l::Lexer) + l.token_startpos = position(l) + + t = next_token(l) + return t, t.kind == K"EndMarker" +end + +function Base.iterate(l::Lexer, isdone::Any) + isdone && return nothing + t = next_token(l) + return t, t.kind == K"EndMarker" +end + +function Base.show(io::IO, l::Lexer) + print(io, typeof(l), " at position: ", position(l)) +end + +""" + startpos(l::Lexer) + +Return the latest `RawToken`'s starting position. +""" +startpos(l::Lexer) = l.token_startpos + +""" + startpos!(l::Lexer, i::Integer) + +Set a new starting position. +""" +startpos!(l::Lexer, i::Integer) = l.token_startpos = i + +""" + peekchar(l::Lexer) + +Returns the next character without changing the lexer's state. +""" +peekchar(l::Lexer) = l.chars[2] + +""" +dpeekchar(l::Lexer) + +Returns the next two characters without changing the lexer's state. +""" +dpeekchar(l::Lexer) = l.chars[2], l.chars[3] + +""" +peekchar3(l::Lexer) + +Returns the next three characters without changing the lexer's state. +""" +peekchar3(l::Lexer) = l.chars[2], l.chars[3], l.chars[4] + +""" + position(l::Lexer) + +Returns the current position. +""" +Base.position(l::Lexer) = l.charspos[1] + +""" + eof(l::Lexer) + +Determine whether the end of the lexer's underlying buffer has been reached. +""" +Base.eof(l::Lexer) = eof(l.io) + +Base.seek(l::Lexer, pos) = seek(l.io, pos) + +""" + start_token!(l::Lexer) + +Updates the lexer's state such that the next `RawToken` will start at the current +position. +""" +function start_token!(l::Lexer) + l.token_startpos = l.charspos[1] +end + +""" + readchar(l::Lexer) + +Returns the next character and increments the current position. +""" +function readchar(l::Lexer) + c = readchar(l.io) + l.chars = (l.chars[2], l.chars[3], l.chars[4], c) + l.charspos = (l.charspos[2], l.charspos[3], l.charspos[4], position(l.io)) + return l.chars[1] +end + +""" + accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) + +Consumes the next character `c` if either `f::Function(c)` returns true, `c == f` +for `c::Char` or `c in f` otherwise. Returns `true` if a character has been +consumed and `false` otherwise. +""" +@inline function accept(l::Lexer, f::Union{Function, Char, Vector{Char}, String}) + c = peekchar(l) + if isa(f, Function) + ok = f(c) + elseif isa(f, Char) + ok = c == f + else + ok = c in f + end + ok && readchar(l) + return ok +end + +""" + accept_batch(l::Lexer, f) + +Consumes all following characters until `accept(l, f)` is `false`. +""" +@inline function accept_batch(l::Lexer, f) + ok = false + while accept(l, f) + ok = true + end + return ok +end + +""" + emit(l::Lexer, kind::Kind) + +Returns a `RawToken` of kind `kind` with contents `str` and starts a new `RawToken`. +""" +function emit(l::Lexer, kind::Kind, maybe_op=true) + suffix = false + if optakessuffix(kind) && maybe_op + while isopsuffix(peekchar(l)) + readchar(l) + suffix = true + end + end + + tok = RawToken(kind, startpos(l), position(l) - 1, suffix) + + l.last_token = kind + return tok +end + +""" + next_token(l::Lexer) + +Returns the next `RawToken`. +""" +function next_token(l::Lexer, start = true) + start && start_token!(l) + if !isempty(l.string_states) + lex_string_chunk(l) + else + _next_token(l, readchar(l)) + end +end + +function _next_token(l::Lexer, c) + if c == EOF_CHAR + return emit(l, K"EndMarker") + elseif iswhitespace(c) + return lex_whitespace(l, c) + elseif c == '[' + return emit(l, K"[") + elseif c == ']' + return emit(l, K"]") + elseif c == '{' + return emit(l, K"{") + elseif c == ';' + return emit(l, K";") + elseif c == '}' + return emit(l, K"}") + elseif c == '(' + return emit(l, K"(") + elseif c == ')' + return emit(l, K")") + elseif c == ',' + return emit(l, K",") + elseif c == '*' + return lex_star(l); + elseif c == '^' + return lex_circumflex(l); + elseif c == '@' + return emit(l, K"@") + elseif c == '?' + return emit(l, K"?") + elseif c == '$' + return lex_dollar(l); + elseif c == '⊻' + return lex_xor(l); + elseif c == '~' + return emit(l, K"~") + elseif c == '#' + return lex_comment(l) + elseif c == '=' + return lex_equal(l) + elseif c == '!' + return lex_exclaim(l) + elseif c == '>' + return lex_greater(l) + elseif c == '<' + return lex_less(l) + elseif c == ':' + return lex_colon(l) + elseif c == '|' + return lex_bar(l) + elseif c == '&' + return lex_amper(l) + elseif c == '\'' + return lex_prime(l) + elseif c == '÷' + return lex_division(l) + elseif c == '"' + return lex_quote(l); + elseif c == '%' + return lex_percent(l); + elseif c == '/' + return lex_forwardslash(l); + elseif c == '\\' + return lex_backslash(l); + elseif c == '.' + return lex_dot(l); + elseif c == '+' + return lex_plus(l); + elseif c == '-' + return lex_minus(l); + elseif c == '−' # \minus '−' treated as hyphen '-' + return emit(l, accept(l, '=') ? K"op=" : K"-") + elseif c == '`' + return lex_backtick(l); + elseif is_identifier_start_char(c) + return lex_identifier(l, c) + elseif isdigit(c) + return lex_digit(l, K"Integer") + elseif (k = get(_unicode_ops, c, K"None")) != K"None" + return emit(l, k) + else + emit(l, + !isvalid(c) ? K"ErrorInvalidUTF8" : + is_invisible_char(c) ? K"ErrorInvisibleChar" : + is_identifier_char(c) ? K"ErrorIdentifierStart" : + K"ErrorUnknownCharacter") + end +end + +# UAX #9: Unicode Bidirectional Algorithm +# https://unicode.org/reports/tr9/ +# Very partial implementation - just enough to check correct nesting in strings +# and multiline comments. +function update_bidi_state((embedding_nesting, isolate_nesting), c) + if c == '\n' + embedding_nesting = 0 + isolate_nesting = 0 + elseif c == '\U202A' || c == '\U202B' || c == '\U202D' || c == '\U202E' # LRE RLE LRO RLO + embedding_nesting += 1 + elseif c == '\U202C' # PDF + embedding_nesting -= 1 + elseif c == '\U2066' || c == '\U2067' || c == '\U2068' # LRI RLI FSI + isolate_nesting += 1 + elseif c == '\U2069' # PDI + isolate_nesting -= 1 + end + return (embedding_nesting, isolate_nesting) +end + +# We're inside a string; possibly reading the string characters, or maybe in +# Julia code within an interpolation. +function lex_string_chunk(l) + state = last(l.string_states) + if state.paren_depth > 0 + # Read normal Julia code inside an interpolation but track nesting of + # parentheses. + # TODO: This stateful tracking should probably, somehow, be done by the + # parser instead? Especially for recovery of unbalanced parens inside + # interpolations? + c = readchar(l) + if c == '(' + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth + 1) + return emit(l, K"(") + elseif c == ')' + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth - 1) + return emit(l, K")") + else + return _next_token(l, c) + end + end + pc = peekchar(l) + if l.last_token == K"$" + pc = peekchar(l) + # Interpolated symbol or expression + if pc == '(' + readchar(l) + l.string_states[end] = StringState(state.triplestr, state.raw, state.delim, + state.paren_depth + 1) + return emit(l, K"(") + elseif is_identifier_start_char(pc) + return lex_identifier(l, readchar(l)) + else + # Getting here is a syntax error - fall through to reading string + # characters and let the parser deal with it. + end + elseif l.last_token == K"Identifier" && + !(pc == EOF_CHAR || is_operator_start_char(pc) || is_never_id_char(pc)) + # Only allow certain characters after interpolated vars + # https://github.com/JuliaLang/julia/pull/25234 + readchar(l) + return emit(l, K"ErrorInvalidInterpolationTerminator") + end + if pc == EOF_CHAR + return emit(l, K"EndMarker") + elseif !state.raw && pc == '$' + # Start interpolation + readchar(l) + return emit(l, K"$") + elseif !state.raw && pc == '\\' && (pc2 = dpeekchar(l)[2]; + pc2 == '\r' || pc2 == '\n') + # Process escaped newline as whitespace + readchar(l) + readchar(l) + if pc2 == '\r' && peekchar(l) == '\n' + readchar(l) + end + while (pc = peekchar(l); pc == ' ' || pc == '\t') + readchar(l) + end + return emit(l, K"Whitespace") + elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) + if state.delim == '\'' && l.last_token == K"'" && dpeekchar(l)[2] == '\'' + # Handle ''' + readchar(l) + return emit(l, K"Char") + end + # Terminate string + pop!(l.string_states) + readchar(l) + if state.triplestr + readchar(l); readchar(l) + return emit(l, state.delim == '"' ? + K"\"\"\"" : K"```") + else + return emit(l, state.delim == '"' ? K"\"" : + state.delim == '`' ? K"`" : K"'", false) + end + end + # Read a chunk of string characters + init_bidi_state = (0,0) + bidi_state = init_bidi_state + valid = true + if state.raw + # Raw strings treat all characters as literals with the exception that + # the closing quotes can be escaped with an odd number of \ characters. + while true + pc = peekchar(l) + if string_terminates(l, state.delim, state.triplestr) || pc == EOF_CHAR + break + elseif state.triplestr && (pc == '\n' || pc == '\r') + # triple quoted newline splitting + readchar(l) + if pc == '\n' + bidi_state = init_bidi_state + elseif pc == '\r' && peekchar(l) == '\n' + bidi_state = init_bidi_state + readchar(l) + end + break + end + c = readchar(l) + if c == '\\' + n = 1 + while peekchar(l) == '\\' + readchar(l) + n += 1 + end + if peekchar(l) == state.delim && !iseven(n) + readchar(l) + end + end + bidi_state = update_bidi_state(bidi_state, c) + valid &= isvalid(c) + end + else + while true + pc = peekchar(l) + if pc == '$' || pc == EOF_CHAR + break + elseif state.triplestr && (pc == '\n' || pc == '\r') + # triple quoted newline splitting + readchar(l) + if pc == '\n' + bidi_state = init_bidi_state + elseif pc == '\r' && peekchar(l) == '\n' + readchar(l) + bidi_state = init_bidi_state + end + break + elseif pc == state.delim && string_terminates(l, state.delim, state.triplestr) + break + elseif pc == '\\' + # Escaped newline + _, pc2, pc3 = peekchar3(l) + if pc2 == '\r' || pc2 == '\n' + if pc2 == '\n' || pc3 == '\n' + bidi_state = init_bidi_state + end + break + end + end + c = readchar(l) + if c == '\\' + c = readchar(l) + c == EOF_CHAR && break + end + bidi_state = update_bidi_state(bidi_state, c) + valid &= isvalid(c) + end + end + outk = !valid ? K"ErrorInvalidUTF8" : + state.delim == '\'' ? K"Char" : + bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : + state.delim == '"' ? K"String" : + state.delim == '`' ? K"CmdString" : + (@assert(state.delim in KSet"' \" `"); K"error") + return emit(l, outk) +end + +# Lex whitespace, a whitespace char `c` has been consumed +function lex_whitespace(l::Lexer, c) + k = K"Whitespace" + while true + if c == '\n' + k = K"NewlineWs" + end + pc, ppc = dpeekchar(l) + # stop on non whitespace and limit to a single newline in a token + if !iswhitespace(pc) || + (k == K"NewlineWs" && (pc == '\n' || (pc == '\r' && ppc == '\n'))) + break + end + c = readchar(l) + end + return emit(l, k) +end + +function lex_comment(l::Lexer) + if peekchar(l) != '=' + valid = true + while true + pc, ppc = dpeekchar(l) + if pc == '\n' || (pc == '\r' && ppc == '\n') || pc == EOF_CHAR + return emit(l, valid ? K"Comment" : K"ErrorInvalidUTF8") + end + valid &= isvalid(pc) + readchar(l) + end + else + c = readchar(l) # consume the '=' + init_bidi_state = (0,0) + bidi_state = init_bidi_state + skip = true # true => c was part of the prev comment marker pair + nesting = 1 + valid = true + while true + if c == EOF_CHAR + return emit(l, K"ErrorEofMultiComment") + end + nc = readchar(l) + bidi_state = update_bidi_state(bidi_state, nc) + valid &= isvalid(nc) + if skip + skip = false + else + if c == '#' && nc == '=' + nesting += 1 + skip = true + elseif c == '=' && nc == '#' + nesting -= 1 + skip = true + if nesting == 0 + outk = !valid ? K"ErrorInvalidUTF8" : + bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : + K"Comment" + return emit(l, outk) + end + end + end + c = nc + end + end +end + +# Lex a greater char, a '>' has been consumed +function lex_greater(l::Lexer) + if accept(l, '>') + if accept(l, '>') + if accept(l, '=') + return emit(l, K"op=") + else # >>>?, ? not a = + return emit(l, K">>>") + end + elseif accept(l, '=') + return emit(l, K"op=") + else + return emit(l, K">>") + end + elseif accept(l, '=') + return emit(l, K">=") + elseif accept(l, ':') + return emit(l, K">:") + else + return emit(l, K">") + end +end + +# Lex a less char, a '<' has been consumed +function lex_less(l::Lexer) + if accept(l, '<') + if accept(l, '=') + return emit(l, K"op=") + else # '<') + return emit(l, K"<-->") + elseif accept(l, '-') + return emit(l, K"ErrorInvalidOperator") + else + return emit(l, K"<--") + end + end + else + return emit(l, K"<") + end +end + +# Lex all tokens that start with an = character. +# An '=' char has been consumed +function lex_equal(l::Lexer) + if accept(l, '=') + if accept(l, '=') + emit(l, K"===") + else + emit(l, K"==") + end + elseif accept(l, '>') + emit(l, K"=>") + else + emit(l, K"=") + end +end + +# Lex a colon, a ':' has been consumed +function lex_colon(l::Lexer) + if accept(l, ':') + return emit(l, K"::") + elseif accept(l, '=') + return emit(l, K":=") + else + return emit(l, K":") + end +end + +function lex_exclaim(l::Lexer) + if accept(l, '=') + if accept(l, '=') + return emit(l, K"!==") + else + return emit(l, K"!=") + end + else + return emit(l, K"!") + end +end + +function lex_percent(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + else + return emit(l, K"%") + end +end + +function lex_bar(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + elseif accept(l, '>') + return emit(l, K"|>") + elseif accept(l, '|') + return emit(l, K"||") + else + emit(l, K"|") + end +end + +function lex_plus(l::Lexer) + if accept(l, '+') + return emit(l, K"++") + elseif accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"+") +end + +function lex_minus(l::Lexer) + if accept(l, '-') + if accept(l, '>') + return emit(l, K"-->") + else + return emit(l, K"ErrorInvalidOperator") # "--" is an invalid operator + end + elseif l.last_token != K"." && accept(l, '>') + return emit(l, K"->") + elseif accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"-") +end + +function lex_star(l::Lexer) + if accept(l, '*') + return emit(l, K"Error**") # "**" is an invalid operator use ^ + elseif accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"*") +end + +function lex_circumflex(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"^") +end + +function lex_division(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"÷") +end + +function lex_dollar(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"$") +end + +function lex_xor(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"⊻") +end + +function accept_number(l::Lexer, f::F) where {F} + lexed_number = false + while true + pc, ppc = dpeekchar(l) + if pc == '_' && !f(ppc) + return lexed_number + elseif f(pc) || pc == '_' + readchar(l) + else + return lexed_number + end + lexed_number = true + end +end + +# A digit has been consumed +function lex_digit(l::Lexer, kind) + accept_number(l, isdigit) + pc,ppc = dpeekchar(l) + if pc == '.' + if ppc == '.' + # Number followed by K".." or K"..." + return emit(l, kind) + elseif kind === K"Float" + # If we enter the function with kind == K"Float" then a '.' has been parsed. + readchar(l) + return emit(l, K"ErrorInvalidNumericConstant") + elseif is_dottable_operator_start_char(ppc) + readchar(l) + return emit(l, K"ErrorAmbiguousNumericConstant") # `1.+` + end + readchar(l) + + kind = K"Float" + accept(l, '_') && return emit(l, K"ErrorInvalidNumericConstant") # `1._` + had_fraction_digs = accept_number(l, isdigit) + pc, ppc = dpeekchar(l) + if (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') + kind = pc == 'f' ? K"Float32" : K"Float" + readchar(l) + accept(l, "+-−") + if accept_batch(l, isdigit) + pc,ppc = dpeekchar(l) + if pc === '.' && !is_dottable_operator_start_char(ppc) + readchar(l) + return emit(l, K"ErrorInvalidNumericConstant") # `1.e1.` + end + else + return emit(l, K"ErrorInvalidNumericConstant") # `1.e` + end + elseif pc == '.' && ppc != '.' && !is_dottable_operator_start_char(ppc) + readchar(l) + return emit(l, K"ErrorInvalidNumericConstant") # `1.1.` + elseif !had_fraction_digs && (is_identifier_start_char(pc) || + pc == '(' || pc == '[' || pc == '{' || + pc == '@' || pc == '`' || pc == '"') + return emit(l, K"ErrorAmbiguousNumericDotMultiply") # `1.(` `1.x` + end + elseif (pc == 'e' || pc == 'E' || pc == 'f') && (isdigit(ppc) || ppc == '+' || ppc == '-' || ppc == '−') + kind = pc == 'f' ? K"Float32" : K"Float" + readchar(l) + accept(l, "+-−") + if accept_batch(l, isdigit) + pc,ppc = dpeekchar(l) + if pc === '.' && !is_dottable_operator_start_char(ppc) + accept(l, '.') + return emit(l, K"ErrorInvalidNumericConstant") # `1e1.` + end + else + return emit(l, K"ErrorInvalidNumericConstant") # `1e+` + end + elseif position(l) - startpos(l) == 1 && l.chars[1] == '0' + kind == K"Integer" + is_bin_oct_hex_int = false + if pc == 'x' + kind = K"HexInt" + isfloat = false + readchar(l) + had_digits = accept_number(l, ishex) + pc,ppc = dpeekchar(l) + if pc == '.' && ppc != '.' + readchar(l) + had_digits |= accept_number(l, ishex) + isfloat = true + end + if accept(l, "pP") + kind = K"Float" + accept(l, "+-−") + if !accept_number(l, isdigit) || !had_digits + return emit(l, K"ErrorInvalidNumericConstant") # `0x1p` `0x.p0` + end + elseif isfloat + return emit(l, K"ErrorHexFloatMustContainP") # `0x.` `0x1.0` + end + is_bin_oct_hex_int = !isfloat + elseif pc == 'b' + readchar(l) + had_digits = accept_number(l, isbinary) + kind = K"BinInt" + is_bin_oct_hex_int = true + elseif pc == 'o' + readchar(l) + had_digits = accept_number(l, isoctal) + kind = K"OctInt" + is_bin_oct_hex_int = true + end + if is_bin_oct_hex_int + pc = peekchar(l) + if !had_digits || isdigit(pc) || is_identifier_start_char(pc) + accept_batch(l, c->isdigit(c) || is_identifier_start_char(c)) + # `0x` `0xg` `0x_` `0x-` + # `0b123` `0o78p` `0xenomorph` `0xaα` + return emit(l, K"ErrorInvalidNumericConstant") + end + end + end + return emit(l, kind) +end + +function lex_prime(l) + if l.last_token == K"Identifier" || + is_contextual_keyword(l.last_token) || + is_word_operator(l.last_token) || + l.last_token == K"." || + l.last_token == K")" || + l.last_token == K"]" || + l.last_token == K"}" || + l.last_token == K"'" || + l.last_token == K"end" || + is_literal(l.last_token) + # FIXME ^ This doesn't cover all cases - probably needs involvement + # from the parser state. + return emit(l, K"'") + else + push!(l.string_states, StringState(false, true, '\'', 0)) + return emit(l, K"'", false) + end +end + +function lex_amper(l::Lexer) + if accept(l, '&') + return emit(l, K"&&") + elseif accept(l, '=') + return emit(l, K"op=") + else + return emit(l, K"&") + end +end + +# Parse a token starting with a quote. +# A '"' has been consumed +function lex_quote(l::Lexer) + raw = l.last_token == K"Identifier" || + is_contextual_keyword(l.last_token) || + is_word_operator(l.last_token) + pc, dpc = dpeekchar(l) + triplestr = pc == '"' && dpc == '"' + push!(l.string_states, StringState(triplestr, raw, '"', 0)) + if triplestr + readchar(l) + readchar(l) + emit(l, K"\"\"\"") + else + emit(l, K"\"") + end +end + +function string_terminates(l, delim::Char, triplestr::Bool) + if triplestr + c1, c2, c3 = peekchar3(l) + c1 === delim && c2 === delim && c3 === delim + else + peekchar(l) === delim + end +end + +# Parse a token starting with a forward slash. +# A '/' has been consumed +function lex_forwardslash(l::Lexer) + if accept(l, '/') + if accept(l, '=') + return emit(l, K"op=") + else + return emit(l, K"//") + end + elseif accept(l, '=') + return emit(l, K"op=") + else + return emit(l, K"/") + end +end + +function lex_backslash(l::Lexer) + if accept(l, '=') + return emit(l, K"op=") + end + return emit(l, K"\\") +end + +function lex_dot(l::Lexer) + if accept(l, '.') + if accept(l, '.') + l.last_token == K"@" && return emit(l, K"Identifier") + return emit(l, K"...") + else + if is_dottable_operator_start_char(peekchar(l)) + readchar(l) + return emit(l, K"ErrorInvalidOperator") + else + l.last_token == K"@" && return emit(l, K"Identifier") + return emit(l, K"..") + end + end + elseif Base.isdigit(peekchar(l)) + return lex_digit(l, K"Float") + else + l.last_token == K"@" && return emit(l, K"Identifier") + return emit(l, K".") + end +end + +# A ` has been consumed +function lex_backtick(l::Lexer) + pc, dpc = dpeekchar(l) + triplestr = pc == '`' && dpc == '`' + # Backticks always contain raw strings only. See discussion on bug + # https://github.com/JuliaLang/julia/issues/3150 + raw = true + push!(l.string_states, StringState(triplestr, raw, '`', 0)) + if triplestr + readchar(l) + readchar(l) + emit(l, K"```") + else + emit(l, K"`") + end +end + +const MAX_KW_LENGTH = 10 +const ascii_is_identifier_char = Bool[is_identifier_char(Char(b)) for b=0x00:0x7f] +function lex_identifier(l::Lexer, c) + h = simple_hash(c, UInt64(0)) + n = 1 + ascii = isascii(c) + graphemestate = Ref(Int32(ascii)) # all ASCII id chars are UTF8PROC_BOUNDCLASS_OTHER + graphemestate_peek = Ref(zero(Int32)) + while true + pc, ppc = dpeekchar(l) + ascii = ascii && isascii(pc) + if ascii # fast path + pc_byte = pc % UInt8 + @inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1] + break + end + elseif @callsite_inline Unicode.isgraphemebreak!(graphemestate, c, pc) + if (pc == '!' && ppc == '=') || !is_identifier_char(pc) + break + end + elseif pc in ('\u200c','\u200d') # ZWNJ/ZWJ control characters + # ZWJ/ZWNJ only within grapheme sequences, not at end + graphemestate_peek[] = graphemestate[] + if @callsite_inline Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc) + break + end + end + c = readchar(l) + h = simple_hash(c, h) + n += 1 + end + + if n > MAX_KW_LENGTH + emit(l, K"Identifier") + elseif h == _true_hash || h == _false_hash + emit(l, K"Bool") + else + emit(l, get(_kw_hash, h, K"Identifier")) + end +end + +# This creates a hash for chars in [a-z] using 5 bit per char. +# Requires an additional input-length check somewhere, because +# this only works up to ~12 chars. +@inline function simple_hash(c::Char, h::UInt64) + bytehash = (clamp(c - 'a' + 1, -1, 30) % UInt8) & 0x1f + h << 5 + bytehash +end + +function simple_hash(str) + ind = 1 + h = UInt64(0) + L = min(lastindex(str), MAX_KW_LENGTH) + while ind <= L + h = simple_hash(str[ind], h) + ind = nextind(str, ind) + end + h +end + +kws = [ +K"baremodule", +K"begin", +K"break", +K"catch", +K"const", +K"continue", +K"do", +K"else", +K"elseif", +K"end", +K"export", +K"finally", +K"for", +K"function", +K"global", +K"if", +K"import", +K"let", +K"local", +K"macro", +K"module", +K"public", +K"quote", +K"return", +K"struct", +K"try", +K"using", +K"while", +K"in", +K"isa", +K"where", + +K"abstract", +K"as", +K"doc", +K"mutable", +K"outer", +K"primitive", +K"type", +K"var", +] + +const _true_hash = simple_hash("true") +const _false_hash = simple_hash("false") +const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws) + +end # module diff --git a/JuliaSyntax/src/porcelain/green_node.jl b/JuliaSyntax/src/porcelain/green_node.jl new file mode 100644 index 0000000000000..7838ff733c0bf --- /dev/null +++ b/JuliaSyntax/src/porcelain/green_node.jl @@ -0,0 +1,176 @@ +""" + struct GreenNode + +An explicit pointer-y representation of the green tree produced by the parser. +See [`RawGreenNode`](@ref) for documentation on working with the implicit green +tree directly. However, this representation is useful for introspection as it +provides O(1) access to the children (as well as forward iteration). +""" +struct GreenNode{Head} + head::Head + span::UInt32 + children::Union{Nothing,Vector{GreenNode{Head}}} +end + +function GreenNode(head::Head, span::Integer, children=nothing) where {Head} + GreenNode{Head}(head, span, children) +end + +# Accessors / predicates +is_leaf(node::GreenNode) = isnothing(node.children) +children(node::GreenNode) = node.children +numchildren(node::GreenNode) = isnothing(node.children) ? 0 : length(node.children) +head(node::GreenNode) = node.head + +""" + span(node) + +Get the number of bytes this node covers in the source text. +""" +span(node::GreenNode) = node.span + +Base.getindex(node::GreenNode, i::Int) = children(node)[i] +Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::GreenNode) = 1 +Base.lastindex(node::GreenNode) = children(node) === nothing ? 0 : length(children(node)) + +""" +Get absolute position and span of the child of `node` at the given tree `path`. +""" +function child_position_span(node::GreenNode, path::Int...) + n = node + p = 1 + for index in path + cs = children(n) + for i = 1:index-1 + p += span(cs[i]) + end + n = cs[index] + end + return n, p, n.span +end + +function highlight(io::IO, source::SourceFile, node::GreenNode, path::Int...; kws...) + _, p, span = child_position_span(node, path...) + q = p + span - 1 + highlight(io, source, p:q; kws...) +end + +Base.summary(node::GreenNode) = summary(node.head) + +function Base.hash(node::GreenNode, h::UInt) + children = node.children + if children === nothing + h = hash(nothing, h) + else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)` + for child in children + h = hash(child, h) + end + end + hash(node.head, hash(node.span, h)) +end +function Base.:(==)(n1::GreenNode, n2::GreenNode) + n1.head == n2.head && n1.span == n2.span && n1.children == n2.children +end + +# Pretty printing +function _show_green_node(io, node, indent, pos, str, show_trivia) + if !show_trivia && is_trivia(node) + return + end + posstr = "$(lpad(pos, 6)):$(rpad(pos+span(node)-1, 6)) │" + leaf = is_leaf(node) + if leaf + line = string(posstr, indent, summary(node)) + else + line = string(posstr, indent, '[', summary(node), ']') + end + if !is_trivia(node) && leaf + line = rpad(line, 40) * "✔" + end + if is_error(node) + line = rpad(line, 41) * "✘" + end + if leaf && !isnothing(str) + line = string(rpad(line, 43), ' ', repr(str[pos:prevind(str, pos + span(node))])) + end + line = line*"\n" + if is_error(node) + printstyled(io, line, color=:light_red) + else + print(io, line) + end + if !leaf + new_indent = indent*" " + p = pos + for x in children(node) + _show_green_node(io, x, new_indent, p, str, show_trivia) + p += x.span + end + end +end + +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode) + _show_green_node(io, node, "", 1, nothing, true) +end + +function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractString; show_trivia=true) + _show_green_node(io, node, "", 1, str, show_trivia) +end + +function _show_green_node_sexpr(io, node::GreenNode, position) + if is_leaf(node) + print(io, position, "-", position+node.span-1, "::", untokenize(head(node); unique=false)) + else + print(io, "(", untokenize(head(node); unique=false)) + p = position + for n in children(node) + print(io, ' ') + _show_green_node_sexpr(io, n, p) + p += n.span + end + print(io, ')') + end +end + +function Base.show(io::IO, node::GreenNode) + _show_green_node_sexpr(io, node, 1) +end + +function GreenNode(cursor::GreenTreeCursor) + chead = head(cursor) + T = typeof(chead) + if is_leaf(cursor) + return GreenNode{T}(head(cursor), span(cursor), nothing) + else + children = GreenNode{T}[] + for child in reverse(cursor) + pushfirst!(children, GreenNode(child)) + end + return GreenNode{T}(head(cursor), span(cursor), children) + end +end + +function build_tree(::Type{GreenNode}, stream::ParseStream; + # unused, but required since `_parse` is written generic + filename=nothing, first_line=1, keep_parens=false) + cursor = GreenTreeCursor(stream) + if has_toplevel_siblings(cursor) + # There are multiple toplevel nodes, e.g. because we're using this + # to test a partial parse. Wrap everything in K"wrapper" + all_processed = 0 + local cs + for child in reverse_toplevel_siblings(cursor) + c = GreenNode(child) + if !@isdefined(cs) + cs = GreenNode{SyntaxHead}[c] + else + pushfirst!(cs, c) + end + end + @assert length(cs) != 1 + return GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), stream.next_byte-1, cs) + else + return GreenNode(cursor) + end +end diff --git a/JuliaSyntax/src/porcelain/syntax_tree.jl b/JuliaSyntax/src/porcelain/syntax_tree.jl new file mode 100644 index 0000000000000..ad08b25e6a1df --- /dev/null +++ b/JuliaSyntax/src/porcelain/syntax_tree.jl @@ -0,0 +1,346 @@ +#------------------------------------------------------------------------------- +# AST interface, built on top of raw tree + +abstract type AbstractSyntaxData end + +mutable struct TreeNode{NodeData} # ? prevent others from using this with NodeData <: AbstractSyntaxData? + parent::Union{Nothing,TreeNode{NodeData}} + children::Union{Nothing,Vector{TreeNode{NodeData}}} + data::Union{Nothing,NodeData} + + # Use this constructor rather than the automatically generated one to pass + # Test.detect_unbound_args() test in Base. + function TreeNode{NodeData}(parent::Union{Nothing,TreeNode{NodeData}}, + children::Union{Nothing,Vector{TreeNode{NodeData}}}, + data::Union{Nothing,NodeData}) where {NodeData} + new{NodeData}(parent, children, data) + end +end + +# Exclude parent from hash and equality checks. This means that subtrees can compare equal. +function Base.hash(node::TreeNode, h::UInt) + h = hash(node.data, h) + children = node.children + if children === nothing + return hash(nothing, h) + else # optimization - avoid extra allocations from `hash(::AbstractVector, ::UInt)` + for child in children + h = hash(child, h) + end + return h + end +end +function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T + a.children == b.children && a.data == b.data +end + +# Implement "pass-through" semantics for field access: access fields of `data` +# as if they were part of `TreeNode` +function Base.getproperty(node::TreeNode, name::Symbol) + name === :parent && return getfield(node, :parent) + name === :children && return getfield(node, :children) + d = getfield(node, :data) + name === :data && return d + return getproperty(d, name) +end + +function Base.setproperty!(node::TreeNode, name::Symbol, x) + name === :parent && return setfield!(node, :parent, x) + name === :children && return setfield!(node, :children, x) + name === :data && return setfield!(node, :data, x) + d = getfield(node, :data) + return setfield!(d, name, x) +end + +const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData} + +struct SyntaxData <: AbstractSyntaxData + source::SourceFile + raw::GreenNode{SyntaxHead} + byte_end::UInt32 + val::Any +end +function Base.getproperty(data::SyntaxData, name::Symbol) + if name === :position + # Previous versions of JuliaSyntax had `position::Int`. + # Allow access for compatibility. It was renamed (with changed) semantics + # to `byte_end::UInt32` to match the rest of the code base, which identified + # nodes, by their last byte. + return Int(getfield(data, :byte_end) - getfield(data, :raw).span + UInt32(1)) + end + return getfield(data, name) +end + +Base.hash(data::SyntaxData, h::UInt) = + hash(data.source, hash(data.raw, hash(data.byte_end, + # Avoid dynamic dispatch: + # This does not support custom `hash` implementation that may be defined for `typeof(data.val)`, + # However, such custom user types should not generally appear in the AST. + Core.invoke(hash, Tuple{Any,UInt}, data.val, h)))) +function Base.:(==)(a::SyntaxData, b::SyntaxData) + a.source == b.source && a.raw == b.raw && a.byte_end == b.byte_end && a.val === b.val +end + +""" + SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; + keep_parens=false, position::Integer=1) + +An AST node with a similar layout to `Expr`. Typically constructed from source +text by calling one of the parser API functions such as [`parseall`](@ref) +""" +const SyntaxNode = TreeNode{SyntaxData} + +function SyntaxNode(source::SourceFile, cursor::RedTreeCursor; + keep_parens=false) + # Build the full GreenNode tree once upfront (including trivia) + green = GreenNode(cursor.green) + + GC.@preserve source begin + raw_offset, txtbuf = _unsafe_wrap_substring(source.code) + offset = raw_offset - source.byte_offset + _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens) + end +end + +function SyntaxNode(source::SourceFile, cursor::RedTreeCursor, green::GreenNode{SyntaxHead}; + keep_parens=false) + GC.@preserve source begin + raw_offset, txtbuf = _unsafe_wrap_substring(source.code) + offset = raw_offset - source.byte_offset + _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens) + end +end + +should_include_node(child) = !is_trivia(child) || is_error(child) + +function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int, + cursor::RedTreeCursor, green::GreenNode{SyntaxHead}, keep_parens::Bool) + if is_leaf(cursor) + # Here we parse the values eagerly rather than representing them as + # strings. Maybe this is good. Maybe not. + valrange = byte_range(cursor) + val = parse_julia_literal(txtbuf, head(cursor), valrange .+ offset) + return SyntaxNode(nothing, nothing, SyntaxData(source, green, cursor.byte_end, val)) + else + cs = SyntaxNode[] + green_children = children(green) + + # We need to match up the filtered SyntaxNode children with the unfiltered GreenNode children + # Both cursor and green children need to be traversed in the same order + # Since cursor iterates in reverse, we need to match from the end of green_children + green_idx = green_children === nothing ? 0 : length(green_children) + + for (i, child_cursor) in enumerate(reverse(cursor)) + if should_include_node(child_cursor) + pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child_cursor, green[end-i+1], keep_parens)) + end + end + + if !keep_parens && kind(cursor) == K"parens" && length(cs) == 1 + return cs[1] + end + node = SyntaxNode(nothing, cs, SyntaxData(source, green, cursor.byte_end, nothing)) + for c in cs + c.parent = node + end + return node + end +end + +""" + is_leaf(node) + +Determine whether the node is a leaf of the tree. In our trees a "leaf" +corresponds to a single token in the source text. +""" +is_leaf(node::TreeNode) = node.children === nothing + +""" + children(node) + +Return an iterable list of children for the node. For leaves, return `nothing`. +""" +children(node::TreeNode) = node.children + +""" + numchildren(node) + +Return `length(children(node))` but possibly computed in a more efficient way. +""" +numchildren(node::TreeNode) = (isnothing(node.children) ? 0 : length(node.children)) + +Base.getindex(node::AbstractSyntaxNode, i::Int) = children(node)[i] +Base.getindex(node::AbstractSyntaxNode, rng::UnitRange) = view(children(node), rng) +Base.firstindex(node::AbstractSyntaxNode) = 1 +Base.length(node::AbstractSyntaxNode) = length(children(node)) +Base.lastindex(node::AbstractSyntaxNode) = length(node) + +function Base.setindex!(node::SN, x::SN, i::Int) where {SN<:AbstractSyntaxNode} + children(node)[i] = x +end + +""" + head(x) + +Get the [`SyntaxHead`](@ref) of a node of a tree or other syntax-related data +structure. +""" +head(node::AbstractSyntaxNode) = head(node.raw) + +span(node::AbstractSyntaxNode) = node.raw.span + +byte_range(node::AbstractSyntaxNode) = (node.byte_end - span(node) + 1):node.byte_end + +first_byte(node::AbstractSyntaxNode) = first(byte_range(node)) +last_byte(node::AbstractSyntaxNode) = last(byte_range(node)) + +sourcefile(node::AbstractSyntaxNode) = node.source + +function leaf_string(ex) + if !is_leaf(ex) + throw(ArgumentError("leaf_string should be used for leaf nodes only")) + end + k = kind(ex) + value = ex.val + # TODO: Dispatch on kind extension module (??) + return k == K"Placeholder" ? "□"*string(value) : + is_identifier(k) ? string(value) : + value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here + repr(value) +end + +function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode, + indent, show_location, show_kind) + line, col = source_location(node) + if show_location + fname = filename(node) + # Add filename if it's changed from the previous node + if fname != current_filename[] + println(io, indent, " -file- │ ", repr(fname)) + current_filename[] = fname + end + posstr = "$(lpad(line, 4)):$(rpad(col,3))│$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))│" + else + posstr = "" + end + val = node.val + nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]" + treestr = string(indent, nodestr) + if show_kind && is_leaf(node) + treestr = rpad(treestr, 40)*" :: "*string(kind(node)) + end + println(io, posstr, treestr) + if !is_leaf(node) + new_indent = indent*" " + for n in children(node) + _show_syntax_node(io, current_filename, n, new_indent, show_location, show_kind) + end + end +end + +function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind) + if is_leaf(node) + if is_error(node) + print(io, "(", untokenize(head(node)), ")") + else + str = leaf_string(node) + k = kind(node) + if is_identifier(k) && !show_kind + str = lower_identifier_name(str, k) + end + print(io, str) + if show_kind + print(io, "::", kind(node)) + end + end + else + print(io, "(", untokenize(head(node))) + for n in children(node) + print(io, ' ') + _show_syntax_node_sexpr(io, n, show_kind) + end + print(io, ')') + end +end + +function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_location=false, show_kind=true) + println(io, "SyntaxNode:") + if show_location + println(io, "line:col│ byte_range │ tree") + end + _show_syntax_node(io, Ref(""), node, "", show_location, show_kind) +end + +function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode; show_kind=false) + _show_syntax_node_sexpr(io, node, show_kind) +end + +function Base.show(io::IO, node::AbstractSyntaxNode) + _show_syntax_node_sexpr(io, node, false) +end + +function Base.push!(node::SN, child::SN) where SN<:AbstractSyntaxNode + if is_leaf(node) + error("Cannot add children") + end + args = children(node) + push!(args, child) +end + +function Base.copy(node::TreeNode) + # copy the container but not the data (ie, deep copy the tree, shallow copy the data). copy(::Expr) is similar + # copy "un-parents" the top-level `node` that you're copying + newnode = typeof(node)(nothing, is_leaf(node) ? nothing : typeof(node)[], copy(node.data)) + if !is_leaf(node) + for child in children(node) + newchild = copy(child) + newchild.parent = newnode + push!(newnode, newchild) + end + end + return newnode +end + +# shallow-copy the data +Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.byte_end, data.val) + +function build_tree(::Type{SyntaxNode}, stream::ParseStream; + filename=nothing, first_line=1, keep_parens=false) + source = SourceFile(stream, filename=filename, first_line=first_line) + cursor = RedTreeCursor(stream) + if has_toplevel_siblings(cursor) + # There are multiple toplevel nodes, e.g. because we're using this + # to test a partial parse. Wrap everything in K"wrapper" + + # First build the full green tree for all children (including trivia) + green_children = GreenNode{SyntaxHead}[] + for child in reverse_toplevel_siblings(cursor) + pushfirst!(green_children, GreenNode(child.green)) + end + + # Create a wrapper GreenNode with children + green = GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), + stream.next_byte-1, green_children) + + # Now build SyntaxNodes, iterating through cursors and green nodes together + cs = SyntaxNode[] + for (i, child) in enumerate(reverse_toplevel_siblings(cursor)) + if should_include_node(child) + pushfirst!(cs, SyntaxNode(source, child, green[end-i+1], keep_parens=keep_parens)) + end + end + + length(cs) == 1 && return only(cs) + + node = SyntaxNode(nothing, cs, SyntaxData(source, green, + stream.next_byte-1, nothing)) + for c in cs + c.parent = node + end + return node + else + return SyntaxNode(source, cursor, keep_parens=keep_parens) + end +end + +@deprecate haschildren(x) !is_leaf(x) false diff --git a/JuliaSyntax/src/precompile.jl b/JuliaSyntax/src/precompile.jl new file mode 100644 index 0000000000000..9fb71c74d7132 --- /dev/null +++ b/JuliaSyntax/src/precompile.jl @@ -0,0 +1,12 @@ +# Just parse some file as a precompile workload +let filename = joinpath(@__DIR__, "julia/literal_parsing.jl") + text = read(filename, String) + parseall(Expr, text) + parseall(SyntaxNode, text) + if _has_v1_6_hooks + enable_in_core!() + Meta.parse("1 + 2") + Meta.parse(SubString("1 + 2")) + enable_in_core!(false) + end +end diff --git a/JuliaSyntax/src/utils.jl b/JuliaSyntax/src/utils.jl new file mode 100644 index 0000000000000..c21c251eb688a --- /dev/null +++ b/JuliaSyntax/src/utils.jl @@ -0,0 +1,144 @@ +# Compatibility hacks for older Julia versions +if VERSION < v"1.1" + isnothing(x) = x === nothing +end +if VERSION < v"1.4" + function only(x::Union{AbstractVector,AbstractString}) + if length(x) != 1 + error("Collection must contain exactly 1 element") + end + return x[1] + end +end +if VERSION < v"1.5" + import Base.peek +end + +@static if VERSION < v"1.8" + macro callsite_inline(call) + esc(call) + end +else + const var"@callsite_inline" = var"@inline" +end + + +_unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string)) + +#-------------------------------------------------- +# +# Internal error, used as assertion failure for cases we expect can't happen. +@noinline function internal_error(strs::Vararg{String, N}) where {N} + error("Internal error: ", strs...) +end + +# Like @assert, but always enabled and calls internal_error() +macro check(ex, msgs...) + msg = isempty(msgs) ? ex : msgs[1] + if isa(msg, AbstractString) + msg = String(msg) + elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) + msg = :(string($(esc(msg)))) + else + msg = string(msg) + end + return :($(esc(ex)) ? nothing : internal_error($msg)) +end + +# Really remove line numbers, even from Expr(:toplevel) +remove_linenums!(ex) = ex +function remove_linenums!(ex::Expr) + if ex.head === :block || ex.head === :quote || ex.head === :toplevel + filter!(ex.args) do x + !(isa(x, Expr) && x.head === :line || isa(x, LineNumberNode)) + end + end + for subex in ex.args + subex isa Expr && remove_linenums!(subex) + end + return ex +end + +# String macro to get the UInt8 code of an ascii character +macro u8_str(str) + c = str == "\\" ? '\\' : only(unescape_string(str)) + isascii(c) || error("Non-ascii character in u8_str") + codepoint(c) % UInt8 +end + +#------------------------------------------------------------------------------- +# Text printing/display utils + +const _fg_color_codes = Dict( + :black => 30, + :red => 31, + :green => 32, + :yellow => 33, + :blue => 34, + :magenta => 35, + :cyan => 36, + :white => 37, + :light_black => 90, # gray + :light_red => 91, + :light_green => 92, + :light_yellow => 93, + :light_blue => 94, + :light_magenta => 95, + :light_cyan => 96, + :light_white => 97, +) + +""" + _printstyled(io::IO, text; + fgcolor=nothing, bgcolor=nothing, href=nothing) + +Like Base.printstyled, but allows providing RGB colors for true color +terminals, both foreground and background colors, and hyperlinks. Colors may be +given as one of the standard color names as in `Base.printstyled`, an integer +for 256 color terms, or an (r,g,b) triple with `0 <= r <= 255` etc for true +color terminals. + +* `fgcolor` - set foreground color +* `bgcolor` - set background color +* `href` - set hyperlink reference +""" +function _printstyled(io::IO, text; fgcolor=nothing, bgcolor=nothing, href=nothing) + if (isnothing(fgcolor) && isnothing(bgcolor) && isnothing(href)) || !get(io, :color, false) + print(io, text) + return + end + colcode = "" + if !isnothing(fgcolor) + if fgcolor isa Symbol && haskey(_fg_color_codes, fgcolor) + colcode *= "\e[$(_fg_color_codes[fgcolor])m" + elseif fgcolor isa Integer && 0 <= fgcolor <= 255 + colcode *= "\e[38;5;$(fgcolor)m" + elseif fgcolor isa Tuple && length(fgcolor) == 3 && all(0 .<= fgcolor .<= 255) + colcode *= "\e[38;2;$(fgcolor[1]);$(fgcolor[2]);$(fgcolor[3])m" + else + error("Invalid ansi color $fgcolor") + end + end + if !isnothing(bgcolor) + if bgcolor isa Symbol && haskey(_fg_color_codes, bgcolor) + colcode *= "\e[$(10 + _fg_color_codes[bgcolor])m" + elseif bgcolor isa Integer && 0 <= bgcolor <= 255 + colcode *= "\e[48;5;$(bgcolor)m" + elseif bgcolor isa Tuple && length(bgcolor) == 3 && all(0 .<= bgcolor .<= 255) + colcode *= "\e[48;2;$(bgcolor[1]);$(bgcolor[2]);$(bgcolor[3])m" + else + error("Invalid ansi color $bgcolor") + end + end + colreset = "\e[0;0m" + first = true + for linepart in split(text, '\n') + first || print(io, '\n') + line = string(colcode, linepart, colreset) + if !isnothing(href) + line = "\e]8;;$href\e\\$line\e]8;;\e\\" + end + print(io, line) + first = false + end +end diff --git a/JuliaSyntax/sysimage/.gitignore b/JuliaSyntax/sysimage/.gitignore new file mode 100644 index 0000000000000..344cdc510a371 --- /dev/null +++ b/JuliaSyntax/sysimage/.gitignore @@ -0,0 +1,3 @@ +JuliaSyntax +Project.toml +Manifest.toml diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml new file mode 100644 index 0000000000000..7e31a0dd5bc26 --- /dev/null +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/Project.toml @@ -0,0 +1,10 @@ +name = "JuliaSyntaxCore" +uuid = "05e5f68f-ccd0-4d84-a81a-f557a333a331" +authors = ["Chris Foster and contributors"] +version = "0.1.0" + +[compat] +julia = "1.6" + +[deps] +JuliaSyntax = "54354a4c-6cac-4c00-8566-e7c1beb8bfd8" diff --git a/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl new file mode 100644 index 0000000000000..d1804b89053b7 --- /dev/null +++ b/JuliaSyntax/sysimage/JuliaSyntaxCore/src/JuliaSyntaxCore.jl @@ -0,0 +1,12 @@ +module JuliaSyntaxCore + +# A tiny module to hold initialization code for JuliaSyntax.jl integration with +# the runtime. + +using JuliaSyntax + +function __init__() + JuliaSyntax.enable_in_core!() +end + +end diff --git a/JuliaSyntax/sysimage/compile.jl b/JuliaSyntax/sysimage/compile.jl new file mode 100755 index 0000000000000..390901eb56cd1 --- /dev/null +++ b/JuliaSyntax/sysimage/compile.jl @@ -0,0 +1,47 @@ +#!/bin/bash +#= +[[ $1 == +* ]] && juliaup_arg=$1 && shift # release channel for juliaup +exec julia ${juliaup_arg} --startup-file=no -e 'include(popfirst!(ARGS))' "$0" "$@" +=# + +imgs_base_path = joinpath(first(DEPOT_PATH), "sysimages", "v$VERSION") +mkpath(imgs_base_path) + +using Libdl + +cd(@__DIR__) + +# Create a copy of JuliaSyntax so we can change the project UUID. +# This allows us to use an older version of JuliaSyntax for developing +# JuliaSyntax itself. +rm("JuliaSyntax", force=true, recursive=true) +mkdir("JuliaSyntax") +cp("../src", "JuliaSyntax/src") +cp("../test", "JuliaSyntax/test") +projstr = replace(read("../Project.toml", String), + "70703baa-626e-46a2-a12c-08ffd08c73b4"=>"54354a4c-6cac-4c00-8566-e7c1beb8bfd8") +write("JuliaSyntax/Project.toml", projstr) + +using Pkg +rm("Project.toml", force=true) +rm("Manifest.toml", force=true) +Pkg.activate(".") +Pkg.develop(path="./JuliaSyntax") +Pkg.develop(path="./JuliaSyntaxCore") +Pkg.add("PackageCompiler") + +image_path = joinpath(imgs_base_path, "juliasyntax_sysimage."*Libdl.dlext) + +using PackageCompiler +PackageCompiler.create_sysimage( + ["JuliaSyntaxCore"], + project=".", + sysimage_path=image_path, + precompile_execution_file="precompile_exec.jl", + incremental=true, +) + +@info """## System image compiled! + + Use it with `julia -J "$image_path"` + """ diff --git a/JuliaSyntax/sysimage/precompile.jl b/JuliaSyntax/sysimage/precompile.jl new file mode 100644 index 0000000000000..a1ae9555f33a2 --- /dev/null +++ b/JuliaSyntax/sysimage/precompile.jl @@ -0,0 +1,6 @@ +function precompile_JuliaSyntax(mod, juliasyntax_path) + Base.include(mod, joinpath(juliasyntax_path, "test", "test_utils.jl")) + Base.include(mod, joinpath(juliasyntax_path, "test", "parser.jl")) + JuliaSyntax.enable_in_core!() + Meta.parse("x+y+z-w .+ [a b c]") +end diff --git a/JuliaSyntax/sysimage/precompile_exec.jl b/JuliaSyntax/sysimage/precompile_exec.jl new file mode 100644 index 0000000000000..99c8069341275 --- /dev/null +++ b/JuliaSyntax/sysimage/precompile_exec.jl @@ -0,0 +1,3 @@ +import JuliaSyntax +include("precompile.jl") +precompile_JuliaSyntax(@__MODULE__(), pkgdir(JuliaSyntax)) diff --git a/JuliaSyntax/test/benchmark.jl b/JuliaSyntax/test/benchmark.jl new file mode 100644 index 0000000000000..0a4e260e485e9 --- /dev/null +++ b/JuliaSyntax/test/benchmark.jl @@ -0,0 +1,35 @@ +using BenchmarkTools +using JuliaSyntax + +include("test_utils.jl") + +function concat_base() + basedir = joinpath(Sys.BINDIR, "..", "share", "julia", "base") + io = IOBuffer() + for f in find_source_in_path(basedir) + write(io, read(f, String)) + println(io) + end + return String(take!(io)) +end + +all_base_code = concat_base() + +b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all) +b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code, ignore_warnings=true) +b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code, ignore_warnings=true) +b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code, ignore_warnings=true) +b_flisp = @benchmark JuliaSyntax.fl_parseall(all_base_code) + +@info "Benchmarks" ParseStream=b_ParseStream GreenNode=b_GreenNode SyntaxNode=b_SyntaxNode Expr=b_Expr flisp=b_flisp + + +# Allocation profiling +# +# using Profile.Allocs +# using PProf +# Allocs.clear() +# stream = JuliaSyntax.ParseStream(text); +# JuliaSyntax.peek(stream); +# Allocs.@profile sample_rate=1 JuliaSyntax.parsestmt(stream) +# PProf.Allocs.pprof() diff --git a/JuliaSyntax/test/diagnostics.jl b/JuliaSyntax/test/diagnostics.jl new file mode 100644 index 0000000000000..151aad919c0ed --- /dev/null +++ b/JuliaSyntax/test/diagnostics.jl @@ -0,0 +1,251 @@ +function diagnostic(str; only_first=false, allow_multiple=false, rule=:all, version=v"1.6") + stream = ParseStream(str; version=version) + parse!(stream, rule=rule) + if allow_multiple + stream.diagnostics + else + if !only_first + @test length(stream.diagnostics) == 1 + end + return isempty(stream.diagnostics) ? nothing : stream.diagnostics[1] + end +end + +@testset "token errors" begin + @test diagnostic("a\xf5b") == Diagnostic(2, 2, :error, "invalid UTF-8 sequence \"\\xf5\"") + @test diagnostic("# a\xf5b") == Diagnostic(1, 5, :error, "invalid UTF-8 sequence \"# a\\xf5b\"") + for c in ['\u00ad', '\u200b', '\u200c', '\u200d', + '\u200e', '\u200f', '\u2060', '\u2061'] + @test diagnostic("a$(c)b") == + Diagnostic(2, 1+sizeof(string(c)), :error, "invisible character $(repr(c))") + end + @test diagnostic("₁") == Diagnostic(1, 3, :error, "identifier cannot begin with character '₁'") + @test diagnostic(":⥻") == Diagnostic(2, 4, :error, "unknown unicode character '⥻'") + + @test diagnostic("\"X \u202a X\"") == Diagnostic(2, 8, :error, "unbalanced bidirectional unicode formatting \"X \\u202a X\"") + @test diagnostic("#= \u202a =#") == Diagnostic(1, 9, :error, "unbalanced bidirectional unicode formatting \"#= \\u202a =#\"") + @test diagnostic("\"X \u202a \$xx\u202c\"", allow_multiple=true) == [ + Diagnostic(2, 7, :error, "unbalanced bidirectional unicode formatting \"X \\u202a \"") + Diagnostic(11, 13, :error, "unbalanced bidirectional unicode formatting \"\\u202c\"") + ] + + @test diagnostic("0x") == Diagnostic(1, 2, :error, "invalid numeric constant") + @test diagnostic("0x0.1") == Diagnostic(1, 5, :error, "hex float literal must contain `p` or `P`") +end + +@testset "parser errors" begin + @test diagnostic("+ #==# (a,b)") == + Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("1 -+ (a=1, b=2)") == + Diagnostic(5, 5, :error, "whitespace not allowed between prefix function call and argument list") + @test diagnostic("\n+ (x, y)") == + Diagnostic(3, 3, :error, "whitespace not allowed between prefix function call and argument list") + + @test diagnostic("function (\$f) body end") == + Diagnostic(10, 13, :error, "Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") + + @test diagnostic("A.@B.x", only_first=true) == + Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component") + @test diagnostic("@M.(x)") == + Diagnostic(1, 3, :error, "dot call syntax not supported for macros") + + @test diagnostic("try x end") == + Diagnostic(1, 9, :error, "try without catch or finally") + # TODO: better range + @test diagnostic("@A.\$x a") == + Diagnostic(4, 5, :error, "invalid macro name") + + @test diagnostic("a, , b") == + Diagnostic(4, 4, :error, "unexpected `,`") + @test diagnostic(")", allow_multiple=true) == [ + Diagnostic(1, 1, :error, "unexpected `)`") + Diagnostic(1, 1, :error, "extra tokens after end of expression") + ] + + @test diagnostic("if\nfalse\nend") == + Diagnostic(3, 3, :error, "missing condition in `if`") + @test diagnostic("if false\nelseif\nend") == + Diagnostic(16, 16, :error, "missing condition in `elseif`") + + @test diagnostic("f(x::V) where {V) = x", allow_multiple=true) == [ + Diagnostic(17, 16, :error, "Expected `}` or `,`") + Diagnostic(17, 21, :error, "extra tokens after end of expression") + ] + @test diagnostic("[1)", allow_multiple=true) == [ + Diagnostic(3, 2, :error, "Expected `]` or `,`") + Diagnostic(3, 3, :error, "extra tokens after end of expression") + ] + @test diagnostic("f(x, y #=hi=#\ng(z)") == Diagnostic(7, 6, :error, "Expected `)` or `,`") + @test diagnostic("(x, y \nz") == Diagnostic(6, 5, :error, "Expected `)` or `,`") + @test diagnostic("function f(x, y \nz end") == Diagnostic(16, 15, :error, "Expected `)` or `,`") + + @test diagnostic("sin. (1)") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("x [i]") == + Diagnostic(2, 2, :error, "whitespace is not allowed here") + @test diagnostic("\nf() [i]") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() (i)") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() .i") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\nf() {i}") == + Diagnostic(5, 5, :error, "whitespace is not allowed here") + @test diagnostic("\n@ m") == + Diagnostic(3, 3, :error, "whitespace is not allowed here") + @test diagnostic("\nusing a .b") == + Diagnostic(9, 9, :error, "whitespace is not allowed here") + + @test diagnostic("const x") == + Diagnostic(1, 7, :error, "expected assignment after `const`") + @test diagnostic("global const x") == + Diagnostic(1, 14, :error, "expected assignment after `const`") + + @test diagnostic("(for i=1; println())") == + Diagnostic(20, 19, :error, "Expected `end`") + @test diagnostic("(try i=1; println())", allow_multiple=true) == [ + Diagnostic(2, 19, :error, "try without catch or finally") + Diagnostic(20, 19, :error, "Expected `end`") + ] + + @test diagnostic("\"\$(x,y)\"") == + Diagnostic(3, 7, :error, "invalid interpolation syntax") + + @test diagnostic("", rule=:statement) == + Diagnostic(1, 0, :error, "premature end of input") + @test diagnostic("", rule=:atom) == + Diagnostic(1, 0, :error, "premature end of input") +end + +@testset "parser warnings" begin + @test diagnostic("@(A)", only_first=true) == + Diagnostic(2, 4, :warning, "parenthesizing macro names is unnecessary") + @test diagnostic("try finally catch a ; b end") == + Diagnostic(13, 23, :warning, "`catch` after `finally` will execute out of order") + @test diagnostic("import . .A") == + Diagnostic(9, 10, :warning, "space between dots in import path") + @test diagnostic("import A .==") == + Diagnostic(9, 9, :warning, "space between dots in import path") + @test diagnostic("import A.:+") == + Diagnostic(10, 10, :warning, "quoting with `:` is not required here") + # No warnings for imports of `:` and parenthesized `(..)` + @test diagnostic("import A.:, :", allow_multiple=true) == [] + @test diagnostic("import A: (..)", allow_multiple=true) == [] + @test diagnostic("import A.(:+)") == + Diagnostic(10, 13, :warning, "parentheses are not required here") + @test diagnostic("export (x)") == + Diagnostic(8, 10, :warning, "parentheses are not required here") + @test diagnostic("import :A") == + Diagnostic(8, 9, :error, "expected identifier") + @test diagnostic("export :x") == + Diagnostic(8, 9, :error, "expected identifier") + @test diagnostic("public = 4", version=v"1.11") == + diagnostic("public[7] = 5", version=v"1.11") == + diagnostic("public() = 6", version=v"1.11") == + Diagnostic(1, 6, :warning, "using public as an identifier is deprecated") +end + +@testset "diagnostics for literal parsing" begin + # Float overflow/underflow + @test diagnostic("x = 10.0e1000;") == + Diagnostic(5, 13, :error, "overflow in floating point literal") + @test diagnostic("x = 10.0f1000;") == + Diagnostic(5, 13, :error, "overflow in floating point literal") + @test diagnostic("x = 10.0e-1000;") == + Diagnostic(5, 14, :warning, "underflow to zero in floating point literal") + @test diagnostic("x = 10.0f-1000;") == + Diagnostic(5, 14, :warning, "underflow to zero in floating point literal") + # Underflow boundary + @test diagnostic("5e-324", allow_multiple=true) == [] + @test diagnostic("2e-324") == + Diagnostic(1, 6, :warning, "underflow to zero in floating point literal") + + # Char + @test diagnostic("x = ''") == + Diagnostic(6, 5, :error, "empty character literal") + @test diagnostic("x = 'abc'") == + Diagnostic(6, 8, :error, "character literal contains multiple characters") + @test diagnostic("x = '\\xq'") == + Diagnostic(6, 7, :error, "invalid hex escape sequence") + @test diagnostic("x = '\\uq'") == + Diagnostic(6, 7, :error, "invalid unicode escape sequence") + @test diagnostic("x = '\\Uq'") == + Diagnostic(6, 7, :error, "invalid unicode escape sequence") + @test diagnostic("x = '\\777'") == + Diagnostic(6, 9, :error, "invalid octal escape sequence") + @test diagnostic("x = '\\k'") == + Diagnostic(6, 7, :error, "invalid escape sequence") + @test diagnostic("'\\", allow_multiple=true) == [ + Diagnostic(2, 2, :error, "invalid escape sequence"), + Diagnostic(3, 2, :error, "unterminated character literal") + ] + # Various cases from Base + @test diagnostic("'\\xff\\xff\\xff\\xff'") == + Diagnostic(2, 17, :error, "character literal contains multiple characters") + @test diagnostic("'\\100\\42'") == + Diagnostic(2, 8, :error, "character literal contains multiple characters") + @test diagnostic("'\\xff\\xff\\xff\\xff\\xff'") == + Diagnostic(2, 21, :error, "character literal contains multiple characters") + @test diagnostic("'abcd'") == + Diagnostic(2, 5, :error, "character literal contains multiple characters") + @test diagnostic("'\\uff\\xff'") == + Diagnostic(2, 9, :error, "character literal contains multiple characters") + @test diagnostic("'\\xffa'") == + Diagnostic(2, 6, :error, "character literal contains multiple characters") + @test diagnostic("'\\uffffa'") == + Diagnostic(2, 8, :error, "character literal contains multiple characters") + @test diagnostic("'\\U00002014a'") == + Diagnostic(2, 12, :error, "character literal contains multiple characters") + @test diagnostic("'\\1000'") == + Diagnostic(2, 6, :error, "character literal contains multiple characters") + + # String + @test diagnostic("x = \"abc\\xq\"") == + Diagnostic(9, 10, :error, "invalid hex escape sequence") + @test diagnostic("x = \"abc\\uq\"") == + Diagnostic(9, 10, :error, "invalid unicode escape sequence") + @test diagnostic("x = \"abc\\Uq\"") == + Diagnostic(9, 10, :error, "invalid unicode escape sequence") + @test diagnostic("x = \"abc\\777\"") == + Diagnostic(9, 12, :error, "invalid octal escape sequence") + @test diagnostic("x = \"abc\\k\"") == + Diagnostic(9, 10, :error, "invalid escape sequence") + @test diagnostic("x = \"abc\\k \\k\"", allow_multiple=true) == [ + Diagnostic(9, 10, :error, "invalid escape sequence"), + Diagnostic(12, 13, :error, "invalid escape sequence") + ] + @test diagnostic("\"\$x෴ \"") == + Diagnostic(4, 6, :error, "interpolated variable ends with invalid character; use `\$(...)` instead") +end + +@testset "diagnostic printing" begin + stream = JuliaSyntax.ParseStream("a -- b -- c") + JuliaSyntax.parse!(stream) + @test sprint(JuliaSyntax.show_diagnostics, stream) == """ + # Error @ line 1:3 + a -- b -- c + # └┘ ── invalid operator + # Error @ line 1:8 + a -- b -- c + # └┘ ── invalid operator""" + + stream = JuliaSyntax.ParseStream("a -- b") + JuliaSyntax.parse!(stream) + fname = "test.jl" + sf = SourceFile(stream, filename=fname) + url = JuliaSyntax._file_url(fname) + @test sprint(JuliaSyntax.show_diagnostics, stream.diagnostics, sf, + context=:color=>true) == """ + \e[90m# Error @ \e[0;0m\e]8;;$url#1:3\e\\\e[90mtest.jl:1:3\e[0;0m\e]8;;\e\\ + a \e[48;2;120;70;70m--\e[0;0m b + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + + if Sys.isunix() + tempdirname = mktempdir() + cd(tempdirname) do + rm(tempdirname) + # Test _file_url doesn't fail with nonexistent directories + @test isnothing(JuliaSyntax._file_url(joinpath("__nonexistent__", "test.jl"))) + end + end +end diff --git a/JuliaSyntax/test/expr.jl b/JuliaSyntax/test/expr.jl new file mode 100644 index 0000000000000..d7547848bef09 --- /dev/null +++ b/JuliaSyntax/test/expr.jl @@ -0,0 +1,843 @@ +@testset "Expr parsing with $method" for method in ["build_tree", "SyntaxNode conversion"] + parseatom, parsestmt, parseall = + if method == "build_tree" + ((s; kws...) -> JuliaSyntax.parseatom(Expr, s; kws...), + (s; kws...) -> JuliaSyntax.parsestmt(Expr, s; kws...), + (s; kws...) -> JuliaSyntax.parseall(Expr, s; kws...)) + else + ((s; kws...) -> Expr(JuliaSyntax.parseatom(SyntaxNode, s; keep_parens=true, kws...)), + (s; kws...) -> Expr(JuliaSyntax.parsestmt(SyntaxNode, s; keep_parens=true, kws...)), + (s; kws...) -> Expr(JuliaSyntax.parseall(SyntaxNode, s; keep_parens=true, kws...))) + end + + @testset "Quote nodes" begin + @test parseatom(":(a)") == QuoteNode(:a) + @test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a)) + @test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) + # Compatibility hack for VERSION >= v"1.4" + # https://github.com/JuliaLang/julia/pull/34077 + @test parseatom(":true") == Expr(:quote, true) + end + + @testset "Line numbers" begin + @testset "Blocks" begin + @test parsestmt("begin a\nb\n\nc\nend") == + Expr(:block, + LineNumberNode(1), + :a, + LineNumberNode(2), + :b, + LineNumberNode(4), + :c, + ) + @test parsestmt("(a;b;c)") == + Expr(:block, + :a, + LineNumberNode(1), + :b, + LineNumberNode(1), + :c, + ) + @test parsestmt("begin end") == + Expr(:block, + LineNumberNode(1) + ) + @test parsestmt("(;;)") == + Expr(:block) + + @test parseall("a\n\nb") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + :b, + ) + @test parsestmt("a;b") == + Expr(:toplevel, :a, :b) + + @test parsestmt("module A\n\nbody\nend") == + Expr(:module, + true, + :A, + Expr(:block, + LineNumberNode(1), + LineNumberNode(3), + :body, + ), + ) + + @test parseall("a\n\nx") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + :x + ) + @test parseall("a\n\nx;y") == + Expr(:toplevel, + LineNumberNode(1), + :a, + LineNumberNode(3), + Expr(:toplevel, :x, :y) + ) + end + + @testset "Function definition lines" begin + @test parsestmt("function f()\na\n\nb\nend") == + Expr(:function, + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :a, + LineNumberNode(4), + :b, + ) + ) + @test parsestmt("f() = 1") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + 1 + ) + ) + @test parsestmt("macro f()\na\nend") == + Expr(:macro, + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :a, + ) + ) + + # function/macro without methods + @test parsestmt("function f end") == + Expr(:function, :f) + @test parsestmt("macro f end") == + Expr(:macro, :f) + + # weird cases with extra parens + @test parsestmt("function (f() where T) end") == + Expr(:function, Expr(:where, Expr(:call, :f), :T), + Expr(:block, LineNumberNode(1), LineNumberNode(1))) + @test parsestmt("function (f()::S) end") == + Expr(:function, Expr(:(::), Expr(:call, :f), :S), + Expr(:block, LineNumberNode(1), LineNumberNode(1))) + end + + @testset "->" begin + @test parsestmt("a -> b") == + Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("(a,) -> b") == + Expr(:->, Expr(:tuple, :a), Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("(a where T) -> b") == + Expr(:->, Expr(:where, :a, :T), Expr(:block, LineNumberNode(1), :b)) + # @test parsestmt("a -> (\nb;c)") == + # Expr(:->, :a, Expr(:block, LineNumberNode(1), :b)) + @test parsestmt("a -> begin\nb\nc\nend") == + Expr(:->, :a, Expr(:block, + LineNumberNode(1), + LineNumberNode(2), :b, + LineNumberNode(3), :c)) + @test parsestmt("(a;b=1) -> c") == + Expr(:->, + Expr(:block, :a, LineNumberNode(1), Expr(:(=), :b, 1)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("(a...;b...) -> c") == + Expr(:->, + Expr(:tuple, Expr(:parameters, Expr(:(...), :b)), Expr(:(...), :a)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("(;) -> c") == + Expr(:->, + Expr(:tuple, Expr(:parameters)), + Expr(:block, LineNumberNode(1), :c)) + @test parsestmt("a::T -> b") == + Expr(:->, Expr(:(::), :a, :T), Expr(:block, LineNumberNode(1), :b)) + end + + @testset "elseif" begin + @test parsestmt("if a\nb\nelseif c\n d\nend") == + Expr(:if, + :a, + Expr(:block, + LineNumberNode(2), + :b), + Expr(:elseif, + Expr(:block, + LineNumberNode(3), # Line number for elseif condition + :c), + Expr(:block, + LineNumberNode(4), + :d), + ) + ) + end + + @testset "No line numbers in let bindings" begin + @test parsestmt("let i=is, j=js\nbody\nend") == + Expr(:let, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body + ) + ) + end + + @testset "Loops" begin + @test parsestmt("for x=xs\n\nend") == + Expr(:for, + Expr(:(=), :x, :xs), + Expr(:block, + LineNumberNode(1), + LineNumberNode(3) + ) + ) + @test parsestmt("for x=xs\ny\nend") == + Expr(:for, + Expr(:(=), :x, :xs), + Expr(:block, + LineNumberNode(2), + :y, + LineNumberNode(3) + ) + ) + @test parsestmt("while cond\n\nend") == + Expr(:while, + :cond, + Expr(:block, + LineNumberNode(1), + LineNumberNode(3) + ) + ) + @test parsestmt("while cond\ny\nend") == + Expr(:while, + :cond, + Expr(:block, + LineNumberNode(2), + :y, + LineNumberNode(3) + ) + ) + end + end + + @testset "Short form function line numbers" begin + # A block is added to hold the line number node + @test parsestmt("f() = xs") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + :xs)) + + @test parsestmt("f() =\n(a;b)") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + :a, + LineNumberNode(2), + :b)) + + @test parsestmt("f() =\nbegin\na\nb\nend") == + Expr(:(=), + Expr(:call, :f), + Expr(:block, + LineNumberNode(1), + LineNumberNode(3), + :a, + LineNumberNode(4), + :b)) + + @test parsestmt("let f(x) =\ng(x)=1\nend") == + Expr(:let, + Expr(:(=), + Expr(:call, :f, :x), + Expr(:block, + LineNumberNode(1), + Expr(:(=), + Expr(:call, :g, :x), + Expr(:block, + LineNumberNode(2), + 1)))), + Expr(:block, + LineNumberNode(3))) + + # `.=` doesn't introduce short form functions + @test parsestmt("f() .= xs") == + Expr(:(.=), Expr(:call, :f), :xs) + end + + @testset "for" begin + @test parsestmt("for i=is body end") == + Expr(:for, + Expr(:(=), :i, :is), + Expr(:block, + LineNumberNode(1), + :body, + LineNumberNode(1) + ) + ) + @test parsestmt("for i=is, j=js\nbody\nend") == + Expr(:for, + Expr(:block, + Expr(:(=), :i, :is), + Expr(:(=), :j, :js), + ), + Expr(:block, + LineNumberNode(2), + :body, + LineNumberNode(3), + ) + ) + end + + @testset "Long form anonymous functions" begin + @test parsestmt("function (xs...)\nbody end") == + Expr(:function, + Expr(:..., :xs), + Expr(:block, + LineNumberNode(1), + LineNumberNode(2), + :body)) + end + + @testset "String conversions" begin + # String unwrapping / wrapping + @test parsestmt("\"str\"") == "str" + @test parsestmt("\"\$(\"str\")\"") == + Expr(:string, Expr(:string, "str")) + # Concatenation of string chunks in triple quoted cases + @test parsestmt("```\n a\n b```") == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + "a\nb") + @test parsestmt("\"\"\"\n a\n \$x\n b\n c\"\"\"") == + Expr(:string, "a\n", :x, "\nb\nc") + # Incomplete cases + @test parsestmt("`x", ignore_errors=true) == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + Expr(:string, "x", Expr(:error))) + @test parsestmt("`", ignore_errors=true) == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), + Expr(:string, Expr(:error))) + end + + @testset "Char conversions" begin + @test parsestmt("'a'") == 'a' + @test parsestmt("'α'") == 'α' + @test parsestmt("'\\xce\\xb1'") == 'α' + end + + @testset "do block conversion" begin + @test parsestmt("f(x) do y\n body end") == + Expr(:do, + Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body))) + + @test parsestmt("@f(x) do y body end") == + Expr(:do, + Expr(:macrocall, Symbol("@f"), LineNumberNode(1), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + @test parsestmt("f(x; a=1) do y body end") == + Expr(:do, + Expr(:call, :f, Expr(:parameters, Expr(:kw, :a, 1)), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + # Test calls with do inside them + @test parsestmt("g(f(x) do y\n body end)") == + Expr(:call, + :g, + Expr(:do, + Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body)))) + end + + @testset "= to Expr(:kw) conversion" begin + # Call + @test parsestmt("f(a=1)") == + Expr(:call, :f, Expr(:kw, :a, 1)) + @test parsestmt("f(; b=2)") == + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2))) + @test parsestmt("f(a=1; b=2)") == + Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)) + @test parsestmt("f(a; b; c)") == + Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a) + @test parsestmt("+(a=1,)") == + Expr(:call, :+, Expr(:kw, :a, 1)) + @test parsestmt("(a=1)()") == + Expr(:call, Expr(:(=), :a, 1)) + + # Operator calls: = is not :kw + @test parsestmt("(x=1) != 2") == + Expr(:call, :!=, Expr(:(=), :x, 1), 2) + @test parsestmt("+(a=1)") == + Expr(:call, :+, Expr(:(=), :a, 1)) + @test parsestmt("(a=1)'") == + Expr(Symbol("'"), Expr(:(=), :a, 1)) + @test parsestmt("(a=1)'ᵀ") == + Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1)) + + # Dotcall + @test parsestmt("f.(a=1; b=2)") == + Expr(:., :f, Expr(:tuple, + Expr(:parameters, Expr(:kw, :b, 2)), + Expr(:kw, :a, 1))) + + # Named tuples + @test parsestmt("(a=1,)") == + Expr(:tuple, Expr(:(=), :a, 1)) + @test parsestmt("(a=1,; b=2)") == + Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) + @test parsestmt("(a=1,; b=2; c=3)") == + Expr(:tuple, + Expr(:parameters, + Expr(:parameters, Expr(:kw, :c, 3)), + Expr(:kw, :b, 2)), + Expr(:(=), :a, 1)) + + # ref + @test parsestmt("x[i=j]") == + Expr(:ref, :x, Expr(:kw, :i, :j)) + @test parsestmt("(i=j)[x]") == + Expr(:ref, Expr(:(=), :i, :j), :x) + @test parsestmt("x[a, b; i=j]") == + Expr(:ref, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) + # curly + @test parsestmt("(i=j){x}") == + Expr(:curly, Expr(:(=), :i, :j), :x) + @test parsestmt("x{a, b; i=j}") == + Expr(:curly, :x, Expr(:parameters, Expr(:(=), :i, :j)), :a, :b) + + # vect + @test parsestmt("[a=1,; b=2]") == + Expr(:vect, + Expr(:parameters, Expr(:(=), :b, 2)), + Expr(:(=), :a, 1)) + # braces + @test parsestmt("{a=1,; b=2}") == + Expr(:braces, + Expr(:parameters, Expr(:(=), :b, 2)), + Expr(:(=), :a, 1)) + + # dotted = is not :kw + @test parsestmt("f(a .= 1)") == + Expr(:call, :f, Expr(:.=, :a, 1)) + + # = inside parens in calls and tuples + @test parsestmt("f(((a = 1)))") == + Expr(:call, :f, Expr(:kw, :a, 1)) + @test parsestmt("(((a = 1)),)") == + Expr(:tuple, Expr(:(=), :a, 1)) + @test parsestmt("(;((a = 1)),)") == + Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + end + + @testset "Field access syntax" begin + @test parsestmt("a.b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.\$b") == Expr(:., :a, QuoteNode(Expr(:$, :b))) + @test parsestmt("a.:b") == Expr(:., :a, QuoteNode(:b)) + @test parsestmt("a.@b x") == Expr(:macrocall, + Expr(:., :a, QuoteNode(Symbol("@b"))), + LineNumberNode(1), + :x) + end + + @testset "dotcall / dotted operators" begin + @test parsestmt("f.(x,y)") == Expr(:., :f, Expr(:tuple, :x, :y)) + @test parsestmt("f.(x=1)") == Expr(:., :f, Expr(:tuple, Expr(:kw, :x, 1))) + @test parsestmt("f.(a=1; b=2)") == + Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + @test parsestmt("(a=1).()") == Expr(:., Expr(:(=), :a, 1), Expr(:tuple)) + @test parsestmt("x .+ y") == Expr(:call, Symbol(".+"), :x, :y) + @test parsestmt("(x=1) .+ y") == Expr(:call, Symbol(".+"), Expr(:(=), :x, 1), :y) + @test parsestmt("a .< b .< c") == Expr(:comparison, :a, Symbol(".<"), + :b, Symbol(".<"), :c) + @test parsestmt("a .< (.<) .< c") == Expr(:comparison, :a, Symbol(".<"), + Expr(:., :<), Symbol(".<"), :c) + @test parsestmt(".*(x)") == Expr(:call, Symbol(".*"), :x) + @test parsestmt(".+(x)") == Expr(:call, Symbol(".+"), :x) + @test parsestmt(".+x") == Expr(:call, Symbol(".+"), :x) + @test parsestmt("(.+)(x)") == Expr(:call, Expr(:., :+), :x) + @test parsestmt("(.+).(x)") == Expr(:., Expr(:., :+), Expr(:tuple, :x)) + + @test parsestmt(".+") == Expr(:., :+) + @test parsestmt(":.+") == QuoteNode(Symbol(".+")) + @test parsestmt(":(.+)") == Expr(:quote, (Expr(:., :+))) + @test parsestmt("quote .+ end") == Expr(:quote, + Expr(:block, + LineNumberNode(1), + Expr(:., :+))) + @test parsestmt(".+{x}") == Expr(:curly, Symbol(".+"), :x) + + # Quoted syntactic ops act different when in parens + @test parsestmt(":.=") == QuoteNode(Symbol(".=")) + @test parsestmt(":(.=)") == QuoteNode(Symbol(".=")) + + # A few other cases of bare dotted ops + @test parsestmt("f(.+)") == Expr(:call, :f, Expr(:., :+)) + @test parsestmt("(a, .+)") == Expr(:tuple, :a, Expr(:., :+)) + @test parsestmt("A.:.+") == Expr(:., :A, QuoteNode(Symbol(".+"))) + + # Issue #341 + @test parsestmt("./x", ignore_errors=true) == Expr(:call, Expr(:error, Expr(:., :/)), :x) + end + + @testset "syntactic update-assignment operators" begin + @test parsestmt("x += y") == Expr(:(+=), :x, :y) + @test parsestmt("x .+= y") == Expr(:(.+=), :x, :y) + @test parsestmt(":+=") == QuoteNode(Symbol("+=")) + @test parsestmt(":(+=)") == QuoteNode(Symbol("+=")) + @test parsestmt(":.+=") == QuoteNode(Symbol(".+=")) + @test parsestmt(":(.+=)") == QuoteNode(Symbol(".+=")) + @test parsestmt("x \u2212= y") == Expr(:(-=), :x, :y) + end + + @testset "let" begin + @test parsestmt("let x=1\n end") == + Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(2))) + @test parsestmt("let x=1 ; end") == + Expr(:let, Expr(:(=), :x, 1), Expr(:block, LineNumberNode(1))) + @test parsestmt("let x ; end") == + Expr(:let, :x, Expr(:block, LineNumberNode(1))) + @test parsestmt("let x::1 ; end") == + Expr(:let, Expr(:(::), :x, 1), Expr(:block, LineNumberNode(1))) + @test parsestmt("let x=1,y=2 end") == + Expr(:let, Expr(:block, Expr(:(=), :x, 1), Expr(:(=), :y, 2)), Expr(:block, LineNumberNode(1))) + @test parsestmt("let x+=1 ; end") == + Expr(:let, Expr(:block, Expr(:+=, :x, 1)), Expr(:block, LineNumberNode(1))) + @test parsestmt("let ; end") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1))) + @test parsestmt("let ; body end") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(1), :body)) + @test parsestmt("let\na\nb\nend") == + Expr(:let, Expr(:block), Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) + end + + @testset "where" begin + @test parsestmt("A where T") == Expr(:where, :A, :T) + @test parsestmt("A where {T}") == Expr(:where, :A, :T) + @test parsestmt("A where {S, T}") == Expr(:where, :A, :S, :T) + @test parsestmt("A where {X, Y; Z}") == Expr(:where, :A, Expr(:parameters, :Z), :X, :Y) + end + + @testset "macrocall" begin + # line numbers + @test parsestmt("@m\n") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1)) + @test parsestmt("\n@m") == Expr(:macrocall, Symbol("@m"), LineNumberNode(2)) + # parameters + @test parsestmt("@m(x; a)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + Expr(:parameters, :a), :x) + @test parsestmt("@m(a=1; b=2)") == Expr(:macrocall, Symbol("@m"), LineNumberNode(1), + Expr(:parameters, Expr(:kw, :b, 2)), Expr(:(=), :a, 1)) + # @__dot__ + @test parsestmt("@.") == Expr(:macrocall, Symbol("@__dot__"), LineNumberNode(1)) + @test parsestmt("using A: @.") == Expr(:using, Expr(Symbol(":"), Expr(:., :A), Expr(:., Symbol("@__dot__")))) + + # var"" + @test parsestmt("@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a) + @test parsestmt("@var\"\\\"\" a") == Expr(:macrocall, Symbol("@\""), LineNumberNode(1), :a) + @test parsestmt("A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a) + + # Square brackets + @test parsestmt("@S[a,b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vect, :a, :b)) + @test parsestmt("@S[a b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:hcat, :a, :b)) + @test parsestmt("@S[a; b]") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:vcat, :a, :b)) + @test parsestmt("@S[a ;; b]", version=v"1.7") == + Expr(:macrocall, Symbol("@S"), LineNumberNode(1), Expr(:ncat, 2, :a, :b)) + end + + @testset "var" begin + @test parsestmt("var\"x\"") == :x + @test parsestmt("var\"\"") == Symbol("") + @test parsestmt("var\"\\\"\"") == Symbol("\"") + @test parsestmt("var\"\\\\\\\"\"") == Symbol("\\\"") + @test parsestmt("var\"\\\\x\"") == Symbol("\\\\x") + @test parsestmt("var\"x\"+y") == Expr(:call, :+, :x, :y) + end + + @testset "vect" begin + @test parsestmt("[x,y ; z]") == Expr(:vect, Expr(:parameters, :z), :x, :y) + end + + @testset "concatenation" begin + @test parsestmt("[a ;;; b ;;;; c]", version=v"1.7") == + Expr(:ncat, 4, Expr(:nrow, 3, :a, :b), :c) + @test parsestmt("[a b ; c d]") == + Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d)) + @test parsestmt("[a\nb]") == Expr(:vcat, :a, :b) + @test parsestmt("[a b]") == Expr(:hcat, :a, :b) + @test parsestmt("[a b ; c d]") == + Expr(:vcat, Expr(:row, :a, :b), Expr(:row, :c, :d)) + + @test parsestmt("T[a ;;; b ;;;; c]", version=v"1.7") == + Expr(:typed_ncat, :T, 4, Expr(:nrow, 3, :a, :b), :c) + @test parsestmt("T[a b ; c d]") == + Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d)) + @test parsestmt("T[a\nb]") == Expr(:typed_vcat, :T, :a, :b) + @test parsestmt("T[a b]") == Expr(:typed_hcat, :T, :a, :b) + @test parsestmt("T[a b ; c d]") == + Expr(:typed_vcat, :T, Expr(:row, :a, :b), Expr(:row, :c, :d)) + end + + @testset "generators" begin + @test parsestmt("(x for a in as for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:(=), :a, :as))) + @test parsestmt("(x for a in as, b in bs)") == + Expr(:generator, :x, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)) + @test parsestmt("(x for a in as, b in bs if z)") == + Expr(:generator, :x, + Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt("(x for a in as, b in bs for c in cs, d in ds)") == + Expr(:flatten, + Expr(:generator, + Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)), + Expr(:(=), :a, :as), Expr(:(=), :b, :bs))) + @test parsestmt("(x for a in as for b in bs if z)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :b, :bs))), + Expr(:(=), :a, :as))) + @test parsestmt("(x for a in as if z for b in bs)") == + Expr(:flatten, Expr(:generator, + Expr(:generator, :x, Expr(:(=), :b, :bs)), + Expr(:filter, :z, Expr(:(=), :a, :as)))) + @test parsestmt("[x for a = as for b = bs if cond1 for c = cs if cond2]" ) == + Expr(:comprehension, + Expr(:flatten, + Expr(:generator, + Expr(:flatten, + Expr(:generator, + Expr(:generator, + :x, + Expr(:filter, + :cond2, + Expr(:(=), :c, :cs))), + Expr(:filter, + :cond1, + Expr(:(=), :b, :bs)))), + Expr(:(=), :a, :as)))) + @test parsestmt("[x for a = as if begin cond2 end]" ) == + Expr(:comprehension, Expr(:generator, :x, + Expr(:filter, + Expr(:block, LineNumberNode(1), :cond2), + Expr(:(=), :a, :as)))) + @test parsestmt("(x for a in as if z)") == + Expr(:generator, :x, Expr(:filter, :z, Expr(:(=), :a, :as))) + end + + @testset "try" begin + @test parsestmt("try x catch e; y end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y)) + @test parsestmt("try x finally y end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + false, + false, + Expr(:block, LineNumberNode(1), :y)) + @test parsestmt("try x catch e; y finally z end") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + Expr(:block, LineNumberNode(1), :z)) + @test parsestmt("try x catch e; y else z end", version=v"1.8") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + false, + Expr(:block, LineNumberNode(1), :z)) + @test parsestmt("try x catch e; y else z finally w end", version=v"1.8") == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :y), + Expr(:block, LineNumberNode(1), :w), + Expr(:block, LineNumberNode(1), :z)) + # finally before catch + @test parsestmt("try x finally y catch e z end", ignore_warnings=true) == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + :e, + Expr(:block, LineNumberNode(1), :z), + Expr(:block, LineNumberNode(1), :y)) + # empty recovery + @test parsestmt("try x end", ignore_errors=true) == + Expr(:try, + Expr(:block, LineNumberNode(1), :x), + false, false, + Expr(:block, Expr(:error))) + end + + @testset "juxtapose" begin + @test parsestmt("2x") == Expr(:call, :*, 2, :x) + @test parsestmt("(2)(3)x") == Expr(:call, :*, 2, 3, :x) + end + + @testset "Core.@doc" begin + @test parsestmt("\"x\" f") == + Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(1), "x", :f) + @test parsestmt("\n\"x\" f") == + Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f) + end + + @testset "String and cmd macros" begin + # Custom string macros + @test parsestmt("foo\"str\"") == + Expr(:macrocall, Symbol("@foo_str"), LineNumberNode(1), "str") + # Bare @cmd + @test parsestmt("\n`str`") == + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(2), "str") + # Custom cmd macros + @test parsestmt("foo`str`") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str") + @test parsestmt("foo`str`flag") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str", "flag") + @test parsestmt("foo```\n a\n b```") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "a\nb") + # Expr conversion distinguishes from explicit calls to a macro of the same name + @test parsestmt("@foo_cmd `str`") == + Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), + Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "str")) + end + + @testset "return" begin + @test parsestmt("return x") == Expr(:return, :x) + @test parsestmt("return") == Expr(:return, nothing) + end + + @testset "Large integer macros" begin + @test parsestmt("0x00000000000000001") == + Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), + nothing, "0x00000000000000001") + + @test parsestmt("(0x00000000000000001)") == + Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), + nothing, "0x00000000000000001") + end + + @testset "struct" begin + @test parsestmt("struct A end") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(1))) + @test parsestmt("mutable struct A end") == + Expr(:struct, true, :A, Expr(:block, LineNumberNode(1))) + + @test parsestmt("struct A <: B \n a::X \n end") == + Expr(:struct, false, Expr(:<:, :A, :B), + Expr(:block, LineNumberNode(2), Expr(:(::), :a, :X))) + @test parsestmt("struct A \n a \n b \n end") == + Expr(:struct, false, :A, + Expr(:block, LineNumberNode(2), :a, LineNumberNode(3), :b)) + @test parsestmt("struct A const a end", version=v"1.8") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(1), Expr(:const, :a))) + + @test parsestmt("struct A \n \"doc\" \n a end") == + Expr(:struct, false, :A, Expr(:block, LineNumberNode(2), "doc", :a)) + end + + @testset "export" begin + @test parsestmt("export a") == Expr(:export, :a) + @test parsestmt("export @a") == Expr(:export, Symbol("@a")) + @test parsestmt("export @var\"'\"") == Expr(:export, Symbol("@'")) + @test parsestmt("export a, \n @b") == Expr(:export, :a, Symbol("@b")) + @test parsestmt("export +, ==") == Expr(:export, :+, :(==)) + @test parsestmt("export \n a") == Expr(:export, :a) + end + + @testset "global/const/local" begin + @test parsestmt("global x") == Expr(:global, :x) + @test parsestmt("local x") == Expr(:local, :x) + @test parsestmt("global x,y") == Expr(:global, :x, :y) + @test parsestmt("global const x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt("local const x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt("const global x = 1") == Expr(:const, Expr(:global, Expr(:(=), :x, 1))) + @test parsestmt("const local x = 1") == Expr(:const, Expr(:local, Expr(:(=), :x, 1))) + @test parsestmt("const x,y = 1,2") == Expr(:const, Expr(:(=), Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) + @test parsestmt("const x = 1") == Expr(:const, Expr(:(=), :x, 1)) + @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1)) + @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1)) + + # Parsing of global/local with + @test parsestmt("global (x,y)") == Expr(:global, :x, :y) + @test parsestmt("local (x,y)") == Expr(:local, :x, :y) + end + + @testset "tuples" begin + @test parsestmt("(;)") == Expr(:tuple, Expr(:parameters)) + @test parsestmt("(; a=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :a, 1))) + @test parsestmt("(; a=1; b=2)") == + Expr(:tuple, Expr(:parameters, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))) + @test parsestmt("(a; b; c,d)") == + Expr(:tuple, Expr(:parameters, Expr(:parameters, :c, :d), :b), :a) + end + + @testset "module" begin + @test parsestmt("module A end") == + Expr(:module, true, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) + @test parsestmt("baremodule A end") == + Expr(:module, false, :A, Expr(:block, LineNumberNode(1), LineNumberNode(1))) + end + + @testset "errors" begin + @test parsestmt("--", ignore_errors=true) == + Expr(:error, "invalid operator: `--`") + @test parseall("a b", ignore_errors=true) == + Expr(:toplevel, LineNumberNode(1), :a, + LineNumberNode(1), Expr(:error, :b)) + @test parsestmt("(x", ignore_errors=true) == + Expr(:block, :x, Expr(:error)) + @test parsestmt("x do", ignore_errors=true) == + Expr(:block, :x, Expr(:error, :do)) + @test parsestmt("x var\"y\"", ignore_errors=true) == + Expr(:block, :x, Expr(:error, :var, ErrorVal(), "y", ErrorVal())) + @test parsestmt("var\"y", ignore_errors=true) == + Expr(:var, :y, Expr(:error)) + end + + @testset "import" begin + @test parsestmt("import A") == Expr(:import, Expr(:., :A)) + @test parsestmt("import A.(:b).:c: x.:z", ignore_warnings=true) == + Expr(:import, Expr(Symbol(":"), Expr(:., :A, :b, :c), Expr(:., :x, :z))) + # Stupid parens and quotes in import paths + @test parsestmt("import A.:+", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parsestmt("import A.(:+)", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parsestmt("import A.:(+)", ignore_warnings=true) == + Expr(:import, Expr(:., :A, :+)) + @test parsestmt("import A.:(+) as y", ignore_warnings=true, version=v"1.6") == + Expr(:import, Expr(:as, Expr(:., :A, :+), :y)) + end +end + +@testset "SyntaxNode->Expr conversion" begin + src = repeat('a', 1000) * '\n' * "@hi" + @test Expr(parsestmt(SyntaxNode, SubString(src, 1001:lastindex(src)))) == + Expr(:macrocall, Symbol("@hi"), LineNumberNode(2)) +end diff --git a/JuliaSyntax/test/fuzz_test.jl b/JuliaSyntax/test/fuzz_test.jl new file mode 100644 index 0000000000000..71c9ff77b1ab6 --- /dev/null +++ b/JuliaSyntax/test/fuzz_test.jl @@ -0,0 +1,1023 @@ +using .JuliaSyntax: tokenize +import Logging +import Test + +# Parser fuzz testing tools. + +const all_tokens = [ + "#x\n" + "#==#" + " " + "\t" + "\n" + "x" + "@" + "," + ";" + + "baremodule" + "begin" + "break" + "const" + "continue" + "do" + "export" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "catch" + "finally" + "else" + "elseif" + "end" + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "type" + "var" + + "1" + "0b1" + "0x1" + "0o1" + "1.0" + "1.0f0" + "\"s\"" + "'c'" + "`s`" + "true" + "false" + + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + + "=" + "+=" + "-=" # Also used for "−=" + "−=" + "*=" + "/=" + "//=" + "|=" + "^=" + "÷=" + "%=" + "<<=" + ">>=" + ">>>=" + "\\=" + "&=" + ":=" + "~" + "\$=" + "⊻=" + "≔" + "⩴" + "≕" + + "=>" + + "?" + + "-->" + "<--" + "<-->" + "←" + "→" + "↔" + "↚" + "↛" + "↞" + "↠" + "↢" + "↣" + "↤" + "↦" + "↮" + "⇎" + "⇍" + "⇏" + "⇐" + "⇒" + "⇔" + "⇴" + "⇶" + "⇷" + "⇸" + "⇹" + "⇺" + "⇻" + "⇼" + "⇽" + "⇾" + "⇿" + "⟵" + "⟶" + "⟷" + "⟹" + "⟺" + "⟻" + "⟼" + "⟽" + "⟾" + "⟿" + "⤀" + "⤁" + "⤂" + "⤃" + "⤄" + "⤅" + "⤆" + "⤇" + "⤌" + "⤍" + "⤎" + "⤏" + "⤐" + "⤑" + "⤔" + "⤕" + "⤖" + "⤗" + "⤘" + "⤝" + "⤞" + "⤟" + "⤠" + "⥄" + "⥅" + "⥆" + "⥇" + "⥈" + "⥊" + "⥋" + "⥎" + "⥐" + "⥒" + "⥓" + "⥖" + "⥗" + "⥚" + "⥛" + "⥞" + "⥟" + "⥢" + "⥤" + "⥦" + "⥧" + "⥨" + "⥩" + "⥪" + "⥫" + "⥬" + "⥭" + "⥰" + "⧴" + "⬱" + "⬰" + "⬲" + "⬳" + "⬴" + "⬵" + "⬶" + "⬷" + "⬸" + "⬹" + "⬺" + "⬻" + "⬼" + "⬽" + "⬾" + "⬿" + "⭀" + "⭁" + "⭂" + "⭃" + "⭄" + "⭇" + "⭈" + "⭉" + "⭊" + "⭋" + "⭌" + "←" + "→" + "⇜" + "⇝" + "↜" + "↝" + "↩" + "↪" + "↫" + "↬" + "↼" + "↽" + "⇀" + "⇁" + "⇄" + "⇆" + "⇇" + "⇉" + "⇋" + "⇌" + "⇚" + "⇛" + "⇠" + "⇢" + "↷" + "↶" + "↺" + "↻" + + "||" + + "&&" + + "<:" + ">:" + ">" + "<" + ">=" + "≥" + "<=" + "≤" + "==" + "===" + "≡" + "!=" + "≠" + "!==" + "≢" + "∈" + "in" + "isa" + "∉" + "∋" + "∌" + "⊆" + "⊈" + "⊂" + "⊄" + "⊊" + "∝" + "∊" + "∍" + "∥" + "∦" + "∷" + "∺" + "∻" + "∽" + "∾" + "≁" + "≃" + "≂" + "≄" + "≅" + "≆" + "≇" + "≈" + "≉" + "≊" + "≋" + "≌" + "≍" + "≎" + "≐" + "≑" + "≒" + "≓" + "≖" + "≗" + "≘" + "≙" + "≚" + "≛" + "≜" + "≝" + "≞" + "≟" + "≣" + "≦" + "≧" + "≨" + "≩" + "≪" + "≫" + "≬" + "≭" + "≮" + "≯" + "≰" + "≱" + "≲" + "≳" + "≴" + "≵" + "≶" + "≷" + "≸" + "≹" + "≺" + "≻" + "≼" + "≽" + "≾" + "≿" + "⊀" + "⊁" + "⊃" + "⊅" + "⊇" + "⊉" + "⊋" + "⊏" + "⊐" + "⊑" + "⊒" + "⊜" + "⊩" + "⊬" + "⊮" + "⊰" + "⊱" + "⊲" + "⊳" + "⊴" + "⊵" + "⊶" + "⊷" + "⋍" + "⋐" + "⋑" + "⋕" + "⋖" + "⋗" + "⋘" + "⋙" + "⋚" + "⋛" + "⋜" + "⋝" + "⋞" + "⋟" + "⋠" + "⋡" + "⋢" + "⋣" + "⋤" + "⋥" + "⋦" + "⋧" + "⋨" + "⋩" + "⋪" + "⋫" + "⋬" + "⋭" + "⋲" + "⋳" + "⋴" + "⋵" + "⋶" + "⋷" + "⋸" + "⋹" + "⋺" + "⋻" + "⋼" + "⋽" + "⋾" + "⋿" + "⟈" + "⟉" + "⟒" + "⦷" + "⧀" + "⧁" + "⧡" + "⧣" + "⧤" + "⧥" + "⩦" + "⩧" + "⩪" + "⩫" + "⩬" + "⩭" + "⩮" + "⩯" + "⩰" + "⩱" + "⩲" + "⩳" + "⩵" + "⩶" + "⩷" + "⩸" + "⩹" + "⩺" + "⩻" + "⩼" + "⩽" + "⩾" + "⩿" + "⪀" + "⪁" + "⪂" + "⪃" + "⪄" + "⪅" + "⪆" + "⪇" + "⪈" + "⪉" + "⪊" + "⪋" + "⪌" + "⪍" + "⪎" + "⪏" + "⪐" + "⪑" + "⪒" + "⪓" + "⪔" + "⪕" + "⪖" + "⪗" + "⪘" + "⪙" + "⪚" + "⪛" + "⪜" + "⪝" + "⪞" + "⪟" + "⪠" + "⪡" + "⪢" + "⪣" + "⪤" + "⪥" + "⪦" + "⪧" + "⪨" + "⪩" + "⪪" + "⪫" + "⪬" + "⪭" + "⪮" + "⪯" + "⪰" + "⪱" + "⪲" + "⪳" + "⪴" + "⪵" + "⪶" + "⪷" + "⪸" + "⪹" + "⪺" + "⪻" + "⪼" + "⪽" + "⪾" + "⪿" + "⫀" + "⫁" + "⫂" + "⫃" + "⫄" + "⫅" + "⫆" + "⫇" + "⫈" + "⫉" + "⫊" + "⫋" + "⫌" + "⫍" + "⫎" + "⫏" + "⫐" + "⫑" + "⫒" + "⫓" + "⫔" + "⫕" + "⫖" + "⫗" + "⫘" + "⫙" + "⫷" + "⫸" + "⫹" + "⫺" + "⊢" + "⊣" + "⟂" + "⫪" + "⫫" + + "<|" + "|>" + + ":" + ".." + "…" + "⁝" + "⋮" + "⋱" + "⋰" + "⋯" + + "\$" + "+" + "-" # also used for "−" + "−" + "++" + "⊕" + "⊖" + "⊞" + "⊟" + "|" + "∪" + "∨" + "⊔" + "±" + "∓" + "∔" + "∸" + "≏" + "⊎" + "⊻" + "⊽" + "⋎" + "⋓" + "⧺" + "⧻" + "⨈" + "⨢" + "⨣" + "⨤" + "⨥" + "⨦" + "⨧" + "⨨" + "⨩" + "⨪" + "⨫" + "⨬" + "⨭" + "⨮" + "⨹" + "⨺" + "⩁" + "⩂" + "⩅" + "⩊" + "⩌" + "⩏" + "⩐" + "⩒" + "⩔" + "⩖" + "⩗" + "⩛" + "⩝" + "⩡" + "⩢" + "⩣" + "¦" + + "*" + "/" + "÷" + "%" + "⋅" # also used for lookalikes "·" and "·" + "·" + "·" + "∘" + "×" + "\\" + "&" + "∩" + "∧" + "⊗" + "⊘" + "⊙" + "⊚" + "⊛" + "⊠" + "⊡" + "⊓" + "∗" + "∙" + "∤" + "⅋" + "≀" + "⊼" + "⋄" + "⋆" + "⋇" + "⋉" + "⋊" + "⋋" + "⋌" + "⋏" + "⋒" + "⟑" + "⦸" + "⦼" + "⦾" + "⦿" + "⧶" + "⧷" + "⨇" + "⨰" + "⨱" + "⨲" + "⨳" + "⨴" + "⨵" + "⨶" + "⨷" + "⨸" + "⨻" + "⨼" + "⨽" + "⩀" + "⩃" + "⩄" + "⩋" + "⩍" + "⩎" + "⩑" + "⩓" + "⩕" + "⩘" + "⩚" + "⩜" + "⩞" + "⩟" + "⩠" + "⫛" + "⊍" + "▷" + "⨝" + "⟕" + "⟖" + "⟗" + "⌿" + "⨟" + + "//" + + "<<" + ">>" + ">>>" + + "^" + "↑" + "↓" + "⇵" + "⟰" + "⟱" + "⤈" + "⤉" + "⤊" + "⤋" + "⤒" + "⤓" + "⥉" + "⥌" + "⥍" + "⥏" + "⥑" + "⥔" + "⥕" + "⥘" + "⥙" + "⥜" + "⥝" + "⥠" + "⥡" + "⥣" + "⥥" + "⥮" + "⥯" + "↑" + "↓" + + "::" + + "where" + + "." + + "!" + "'" + ".'" + "->" + + "¬" + "√" + "∛" + "∜" +] + +const cutdown_tokens = [ + "#x\n" + "#==#" + " " + "\t" + "\n" + "x" + "β" + "@" + "," + ";" + + "baremodule" + "begin" + "break" + "const" + "continue" + "do" + "export" + "for" + "function" + "global" + "if" + "import" + "let" + "local" + "macro" + "module" + "quote" + "return" + "struct" + "try" + "using" + "while" + "catch" + "finally" + "else" + "elseif" + "end" + "abstract" + "as" + "doc" + "mutable" + "outer" + "primitive" + "type" + "var" + + "1" + "0b1" + "0x1" + "0o1" + "1.0" + "1.0f0" + "\"s\"" + "'c'" + "`s`" + "true" + "false" + + "[" + "]" + "{" + "}" + "(" + ")" + "\"" + "\"\"\"" + "`" + "```" + + "=" + "+=" + "~" + + "=>" + + "?" + + "-->" + + "||" + + "&&" + + "<:" + ">:" + ">" + "<" + ">=" + "<=" + "==" + "===" + "!=" + + "<|" + "|>" + + ":" + ".." + "…" + + "\$" + "+" + "−" + "-" + "|" + + "*" + "/" + "⋅" # also used for lookalikes "·" and "·" + "·" + "\\" + + "//" + + "<<" + + "^" + + "::" + + "where" + + "." + + "!" + "'" + "->" + + "√" +] + +#------------------------------------------------------------------------------- +# Parsing functions for use with fuzz_test + +function try_parseall_failure(str) + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, str, ignore_errors=true); + return nothing + catch exc + !(exc isa InterruptException) || rethrow() + rstr = reduce_text(str, parser_throws_exception) + @error "Parser threw exception" rstr exception=current_exceptions() + return rstr + end +end + +function try_hook_failure(str) + try + test_logger = Test.TestLogger() + Logging.with_logger(test_logger) do + try + Meta_parseall(str) + catch exc + exc isa Meta.ParseError || exc isa JuliaSyntax.ParseError || rethrow() + end + end + if !isempty(test_logger.logs) + return str + end + catch exc + return str + end + return nothing +end + +#------------------------------------------------------------------------------- +"""Delete `nlines` adjacent lines from code, at `niters` randomly chosen positions""" +function delete_lines(lines, nlines, niters) + selection = trues(length(lines)) + for j=1:niters + i = rand(1:length(lines)-nlines) + selection[i:i+nlines] .= false + end + join(lines[selection], '\n') +end + +"""Delete `ntokens` adjacent tokens from code, at `niters` randomly chosen positions""" +function delete_tokens(code, tokens, ntokens, niters) + # [ aa bbbb cc d eeeeee ] + # | | | | | | + selection = trues(length(tokens)) + for j=1:niters + i = rand(1:length(tokens)-ntokens) + selection[i:i+ntokens] .= false + end + io = IOBuffer() + i = 1 + while true + while i <= length(selection) && !selection[i] + i += 1 + end + if i > length(selection) + break + end + first_ind = first(tokens[i].range) + while selection[i] && i < length(selection) + i += 1 + end + last_ind = last(tokens[i].range) + write(io, @view code[first_ind:last_ind]) + if i == length(selection) + break + end + end + return String(take!(io)) +end + +#------------------------------------------------------------------------------- +# Generators for "potentially bad input" + +""" +Fuzz test parser against all tuples of length `N` with elements taken from +`tokens`. +""" +function product_token_fuzz(tokens, N) + (join(ts) for ts in Iterators.product([tokens for _ in 1:N]...)) +end + +function random_token_fuzz(tokens, ntokens, ntries) + (join(rand(tokens, ntokens)) for _ in 1:ntries) +end + +""" +Fuzz test parser against randomly generated binary strings +""" +function random_binary_fuzz(nbytes, N) + (String(rand(UInt8, nbytes)) for _ in 1:N) +end + +""" +Fuzz test by deleting random lines of some given source `code` +""" +function deleted_line_fuzz(code, N; nlines=10, niters=10) + lines = split(code, '\n') + (delete_lines(lines, nlines, niters) for _=1:N) +end + +""" +Fuzz test by deleting random tokens from given source `code` +""" +function deleted_token_fuzz(code, N; ntokens=10, niters=10) + ts = tokenize(code) + (delete_tokens(code, ts, ntokens, niters) for _=1:N) +end + +""" +Fuzz test a parsing function by trying it with many "bad" input strings. + +`try_parsefail` should return `nothing` when the parser succeeds, and return a +string (or reduced string) when parsing succeeds. +""" +function fuzz_test(try_parsefail::Function, bad_input_iter) + error_strings = [] + for str in bad_input_iter + res = try_parsefail(str) + if !isnothing(res) + push!(error_strings, res) + end + end + return error_strings +end + + +# Examples +# +# fuzz_test(try_hook_failure, product_token_fuzz(cutdown_tokens, 2)) +# fuzz_test(try_parseall_failure, product_token_fuzz(cutdown_tokens, 2)) diff --git a/JuliaSyntax/test/green_node.jl b/JuliaSyntax/test/green_node.jl new file mode 100644 index 0000000000000..0c3be65873c2e --- /dev/null +++ b/JuliaSyntax/test/green_node.jl @@ -0,0 +1,68 @@ +@testset "GreenNode" begin + t = parsestmt(GreenNode, "aa + b") + + @test span(t) == 6 + @test !is_leaf(t) + @test head(t) == SyntaxHead(K"call", 0x0088) + @test span.(children(t)) == [2,1,1,1,1] + @test head.(children(t)) == [ + SyntaxHead(K"Identifier", 0x0000) + SyntaxHead(K"Whitespace", 0x0001) + SyntaxHead(K"Identifier", 0x0000) + SyntaxHead(K"Whitespace", 0x0001) + SyntaxHead(K"Identifier", 0x0000) + ] + + @test numchildren(t) == 5 + @test !is_leaf(t) + @test is_leaf(t[1]) + + @test t[1] === children(t)[1] + @test t[2:4] == [t[2],t[3],t[4]] + @test firstindex(t) == 1 + @test lastindex(t) == 5 + + t2 = parsestmt(GreenNode, "aa + b") + @test t == t2 + @test t !== t2 + + text = "f(@x(y), z)" + @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text)) == + """ + 1:11 │[call] + 1:1 │ Identifier ✔ + 2:2 │ ( + 3:7 │ [macrocall] + 3:4 │ [macro_name] + 3:3 │ @ + 4:4 │ Identifier ✔ + 5:5 │ ( + 6:6 │ Identifier ✔ + 7:7 │ ) + 8:8 │ , + 9:9 │ Whitespace + 10:10 │ Identifier ✔ + 11:11 │ ) + """ + + @test sprint(show, MIME("text/plain"), parsestmt(GreenNode, text), text) == + """ + 1:11 │[call] + 1:1 │ Identifier ✔ "f" + 2:2 │ ( "(" + 3:7 │ [macrocall] + 3:4 │ [macro_name] + 3:3 │ @ "@" + 4:4 │ Identifier ✔ "x" + 5:5 │ ( "(" + 6:6 │ Identifier ✔ "y" + 7:7 │ ) ")" + 8:8 │ , "," + 9:9 │ Whitespace " " + 10:10 │ Identifier ✔ "z" + 11:11 │ ) ")" + """ + + @test sprint(show, parsestmt(GreenNode, "a + bb - f(ccc)")) == + "(call-i (call-i 1-1::Identifier 2-2::Whitespace-t 3-3::Identifier 4-4::Whitespace-t 5-6::Identifier) 7-7::Whitespace-t 8-8::Identifier 9-9::Whitespace-t (call 10-10::Identifier 11-11::(-t 12-14::Identifier 15-15::)-t))" +end diff --git a/JuliaSyntax/test/hooks.jl b/JuliaSyntax/test/hooks.jl new file mode 100644 index 0000000000000..333344d7c50a1 --- /dev/null +++ b/JuliaSyntax/test/hooks.jl @@ -0,0 +1,516 @@ +function _unwrap_parse_error(core_hook_result) + @test Meta.isexpr(core_hook_result[1], :error, 1) + err = core_hook_result[1].args[1] + if JuliaSyntax._has_v1_10_hooks + @test err isa Meta.ParseError + return err.detail + else + @test err isa JuliaSyntax.ParseError + return err + end +end + +@testset "Hooks for Core integration" begin + @testset "whitespace and comment parsing" begin + @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) + @test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0) + + @test JuliaSyntax.core_parser_hook(" ", "somefile", 1, 2, :statement) == Core.svec(nothing,2) + @test JuliaSyntax.core_parser_hook(" #==# ", "somefile", 1, 6, :statement) == Core.svec(nothing,6) + + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :statement) == Core.svec(:x,4) + @test JuliaSyntax.core_parser_hook(" x \n", "somefile", 1, 0, :atom) == Core.svec(:x,2) + + # https://github.com/JuliaLang/JuliaSyntax.jl/issues/316#issuecomment-1870294857 + stmtstr = + """ + plus(a, b) = a + b + + # Issue #81 + f() = nothing + """ + @test JuliaSyntax.core_parser_hook(stmtstr, "somefile", 1, 0, :statement)[2] == 19 + end + + @testset "filename and lineno" begin + ex = JuliaSyntax.core_parser_hook("@a", "somefile", 1, 0, :statement)[1] + @test Meta.isexpr(ex, :macrocall) + @test ex.args[2] == LineNumberNode(1, "somefile") + + ex = JuliaSyntax.core_parser_hook("@a", "otherfile", 2, 0, :statement)[1] + @test ex.args[2] == LineNumberNode(2, "otherfile") + + # Errors also propagate file & lineno + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement) + ) + @test err isa JuliaSyntax.ParseError + @test filename(err) == "f1" + @test err.source.first_line == 1 + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement) + ) + @test err isa JuliaSyntax.ParseError + @test filename(err) == "f2" + @test err.source.first_line == 2 + + # Errors including nontrivial offset indices + err = _unwrap_parse_error( + JuliaSyntax.core_parser_hook("a\nh{x)\nb", "test.jl", 1, 2, :statement) + ) + @test err isa JuliaSyntax.ParseError + @test err.source.first_line == 1 + @test err.diagnostics[1].first_byte == 6 + @test err.diagnostics[1].last_byte == 5 + @test err.diagnostics[1].message == "Expected `}` or `,`" + end + + @testset "toplevel errors" begin + ex = JuliaSyntax.core_parser_hook("a\nb\n[x,\ny)", "somefile", 1, 0, :all)[1] + @test ex.head == :toplevel + @test ex.args[1:5] == [ + LineNumberNode(1, "somefile"), + :a, + LineNumberNode(2, "somefile"), + :b, + LineNumberNode(4, "somefile"), + ] + @test Meta.isexpr(ex.args[6], :error) + + ex = JuliaSyntax.core_parser_hook("x.", "somefile", 0, 0, :all)[1] + @test ex.head == :toplevel + @test ex.args[2].head == :incomplete + end + + @testset "enable_in_core!" begin + JuliaSyntax.enable_in_core!() + + @test Meta.parse("x + 1") == :(x + 1) + @test Meta.parse("x + 1", 1) == (:(x + 1), 6) + + # Test that parsing statements incrementally works and stops after + # whitespace / comment trivia + @test Meta.parse("x + 1\n(y)\n", 1) == (:(x + 1), 7) + @test Meta.parse("x + 1\n(y)\n", 7) == (:y, 11) + @test Meta.parse(" x#==#", 1) == (:x, 7) + @test Meta.parse(" #==# ", 1) == (nothing, 7) + + # Check the exception type that Meta.parse throws + if JuliaSyntax._has_v1_10_hooks + @test_throws Meta.ParseError Meta.parse("[x)") + @test_throws Meta.ParseError eval(Meta.parse("[x)", raise=false)) + @test_throws Meta.ParseError eval(Meta.parse("(x")) # Expr(:incomplete) + else + @test_throws JuliaSyntax.ParseError Meta.parse("[x)") + end + + # Check custom string types defined in a world age later than + # enable_in_core!() can be passed to Meta.parse() + mystr = @eval begin + struct MyString <: AbstractString + x::String + end + Base.String(s::MyString) = s.x + Base.ncodeunits(s::MyString) = ncodeunits(s.x) + + MyString("hi") + end + @test Meta.parse(mystr) == :hi + + err = Meta.parse("\"") + @test Meta.isexpr(err, :incomplete) + if JuliaSyntax._has_v1_10_hooks + @test err.args[1] isa Meta.ParseError + exc = err.args[1] + @test exc.msg == "ParseError:\n# Error @ none:1:2\n\"\n#└ ── unterminated string literal" + @test exc.detail isa JuliaSyntax.ParseError + @test exc.detail.incomplete_tag === :string + else + @test err.args[1] isa String + end + + JuliaSyntax.enable_in_core!(false) + end + + @testset "Expr(:incomplete)" begin + for (str, tag) in [ + "\"" => :string + "\"\$foo" => :string + "#=" => :comment + "'" => :char + "'a" => :char + "`" => :cmd + "(" => :other + "[" => :other + "begin" => :block + "quote" => :block + "let" => :block + "let;" => :block + "for" => :other + "for x=xs" => :block + "function" => :other + "function f()" => :block + "macro" => :other + "macro f()" => :block + "f() do" => :other + "f() do x" => :block + "module" => :other + "module X" => :block + "baremodule" => :other + "baremodule X" => :block + "mutable struct" => :other + "mutable struct X" => :block + "struct" => :other + "struct X" => :block + "if" => :other + "if x" => :block + "while" => :other + "while x" => :block + "try" => :block + # could be `try x catch exc body end` or `try x catch ; body end` + "try x catch" => :block + "using" => :other + "import" => :other + "local" => :other + "global" => :other + + "1 == 2 ?" => :other + "1 == 2 ? 3 :" => :other + "1," => :other + "1, " => :other + "1,\n" => :other + "1, \n" => :other + "f(1, " => :other + "[x " => :other + "( " => :other + + # Reference parser fails to detect incomplete exprs in this case + "(x for y" => :other + + # Syntax which may be an error but is not incomplete + "" => :none + ")" => :none + "1))" => :none + "a b" => :none + "()x" => :none + "." => :none + + # Some error tokens which cannot be made complete by appending more characters + "1.e1." => :none + "\u200b" => :none + "x #=\xf5b\n=#" => :none + "₁" => :none + "0x1.0\n" => :none + "\"\$x෴\"" => :none + "10e1000" => :none + + # Multiline input with comments (#519) + "function f()\nbody #comment" => :block + "a = [\n1,\n2, #comment" => :other + + # Extended set of cases extracted from the REPL stdlib tests. + # There is some redundancy here, but we've mostly left these + # here because incomplete-detection is partly heuristic and + # it's good to have a wide variety of incomplete expressions. + # + # The "desired" incomplete tag here was generated from the + # flisp parser. + "Main.CompletionFoo." => :other + "Base.return_types(getin" => :other + "test7()." => :other + "(3,2)." => :other + "Base.print(\"lol" => :string + "run(`lol" => :cmd + "copy(A')." => :other + "cd(\"path_to_an_empty_folder_should_not_complete_latex\\\\\\alpha" => :string + "\"C:\\\\ \\alpha" => :string + "cd(\"C:\\U" => :string + "max(" => :other + "!(" => :other + "!isnothing(" => :other + "!!isnothing(" => :other + "CompletionFoo.test(1, 1, " => :other + "CompletionFoo.test(CompletionFoo.array," => :other + "CompletionFoo.test(1,1,1," => :other + "CompletionFoo.test1(Int," => :other + "CompletionFoo.test1(Float64," => :other + "prevind(\"θ\",1," => :other + "(1, CompletionFoo.test2(\")\"," => :other + "(1, CompletionFoo.test2(')'," => :other + "(1, CompletionFoo.test2(`')'`," => :other + "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat," => :other + "CompletionFoo.test3([1.,2.], 1.," => :other + "CompletionFoo.test4(\"e\",r\" \"," => :other + "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\")," => :other + "CompletionFoo.test5(Bool[x==1 for x=1:4]," => :other + "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], " => :other + "CompletionFoo.test4(\"\\\"\"," => :other + "convert(" => :other + "convert(" => :other + "CompletionFoo.test5(AbstractArray[Bool[]][1]," => :other + "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat)," => :other + "CompletionFoo.kwtest( " => :other + "CompletionFoo.kwtest(;" => :other + "CompletionFoo.kwtest(; x=1, " => :other + "CompletionFoo.kwtest(; kw=1, " => :other + "CompletionFoo.kwtest(x=1, " => :other + "CompletionFoo.kwtest(x=1; " => :other + "CompletionFoo.kwtest(x=kw=1, " => :other + "CompletionFoo.kwtest(; x=kw=1, " => :other + "CompletionFoo.kwtest2(1, x=1," => :other + "CompletionFoo.kwtest2(1; x=1, " => :other + "CompletionFoo.kwtest2(1, x=1; " => :other + "CompletionFoo.kwtest2(1, kw=1, " => :other + "CompletionFoo.kwtest2(1; kw=1, " => :other + "CompletionFoo.kwtest2(1, kw=1; " => :other + "CompletionFoo.kwtest2(y=3, 1, " => :other + "CompletionFoo.kwtest2(y=3, 1; " => :other + "CompletionFoo.kwtest2(kw=3, 1, " => :other + "CompletionFoo.kwtest2(kw=3, 1; " => :other + "CompletionFoo.kwtest2(1; " => :other + "CompletionFoo.kwtest2(1, " => :other + "CompletionFoo.kwtest4(x23=18, x; " => :other + "CompletionFoo.kwtest4(x23=18, x, " => :other + "CompletionFoo.kwtest4(x23=18, " => :other + "CompletionFoo.kwtest5(3, somekwarg=6," => :other + "CompletionFoo.kwtest5(3, somekwarg=6, anything, " => :other + "CompletionFoo.?([1,2,3], 2.0" => :other + "CompletionFoo.?('c'" => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(false, \"a\", 3, " => :other + "CompletionFoo.?(\"a\", 3, " => :other + "CompletionFoo.?(; " => :other + "CompletionFoo.?(" => :other + "CompletionFoo.test10(z, Integer[]...," => :other + "CompletionFoo.test10(3, Integer[]...," => :other + "CompletionFoo.test10(3, 4," => :other + "CompletionFoo.test10(3, 4, 5," => :other + "CompletionFoo.test10(z, z, 0, " => :other + "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], " => :other + "CompletionFoo.test11(Integer[false][1], Integer[14][1], " => :other + "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6," => :other + "CompletionFoo.test11(3, 4," => :other + "CompletionFoo.test11(0x8, 5," => :other + "CompletionFoo.test11(0x8, 'c'," => :other + "CompletionFoo.test11('d', 3," => :other + "CompletionFoo.test!12(" => :other + "CompletionFoo.kwtest(; x=2, y=4; kw=3, " => :other + "CompletionFoo.kwtest(x=2; y=4; " => :other + "CompletionFoo.kwtest((x=y)=4, " => :other + "CompletionFoo.kwtest(; (x=y)=4, " => :other + "CompletionFoo.kwtest(; w...=16, " => :other + "CompletionFoo.kwtest(; 2, " => :other + "CompletionFoo.kwtest(; 2=3, " => :other + "CompletionFoo.kwtest3(im; (true ? length : length), " => :other + "CompletionFoo.kwtest.(x=2; y=4; " => :other + "CompletionFoo.kwtest.(; w...=16, " => :other + "(1+2im)." => :other + "((1+2im))." => :other + "CompletionFoo.test_y_array[1]." => :other + "CompletionFoo.named." => :other + "#=\n\\alpha" => :comment + "#=\nmax" => :comment + "using " => :other + "(max" => :other + "@show \"/dev/nul" => :string + "@show \"/tm" => :string + "@show \"/dev/nul" => :string + "(Iter" => :other + "\"/tmp/jl_4sjOtz/tmpfoob" => :string + "\"~" => :string + "\"~user" => :string + "\"/tmp/jl_Mn9Rbz/selfsym" => :string + "\"~/ka8w5rsz" => :string + "\"foo~bar" => :string + "\"~/Zx6Wa0GkC" => :string + "\"~/Zx6Wa0GkC0" => :string + "\"~/Zx6Wa0GkC0/my_" => :string + "\"~/Zx6Wa0GkC0/my_file" => :string + "cd(\"folder_do_not_exist_77/file" => :string + "CompletionFoo.tuple." => :other + "CompletionFoo.test_dict[\"ab" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[ \"abcd" => :string + "CompletionFoo.test_dict[\"abcd" => :string + "CompletionFoo.test_dict[:b" => :other + "CompletionFoo.test_dict[:bar2" => :other + "CompletionFoo.test_dict[Ba" => :other + "CompletionFoo.test_dict[occ" => :other + "CompletionFoo.test_dict[`l" => :cmd + "CompletionFoo.test_dict[6" => :other + "CompletionFoo.test_dict[66" => :other + "CompletionFoo.test_dict[(" => :other + "CompletionFoo.test_dict[\"\\alp" => :string + "CompletionFoo.test_dict[\"\\alpha" => :string + "CompletionFoo.test_dict[\"α" => :string + "CompletionFoo.test_dict[:α" => :other + "CompletionFoo.test_dict[" => :other + "CompletionFoo.test_customdict[\"ab" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[ \"abcd" => :string + "CompletionFoo.test_customdict[\"abcd" => :string + "CompletionFoo.test_customdict[:b" => :other + "CompletionFoo.test_customdict[:bar2" => :other + "CompletionFoo.test_customdict[Ba" => :other + "CompletionFoo.test_customdict[occ" => :other + "CompletionFoo.test_customdict[`l" => :cmd + "CompletionFoo.test_customdict[6" => :other + "CompletionFoo.test_customdict[66" => :other + "CompletionFoo.test_customdict[(" => :other + "CompletionFoo.test_customdict[\"\\alp" => :string + "CompletionFoo.test_customdict[\"\\alpha" => :string + "CompletionFoo.test_customdict[\"α" => :string + "CompletionFoo.test_customdict[:α" => :other + "CompletionFoo.test_customdict[" => :other + "test_repl_comp_dict[\"ab" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[ \"abcd" => :string + "test_repl_comp_dict[\"abcd" => :string + "test_repl_comp_dict[:b" => :other + "test_repl_comp_dict[:bar2" => :other + "test_repl_comp_dict[Ba" => :other + "test_repl_comp_dict[occ" => :other + "test_repl_comp_dict[`l" => :cmd + "test_repl_comp_dict[6" => :other + "test_repl_comp_dict[66" => :other + "test_repl_comp_dict[(" => :other + "test_repl_comp_dict[\"\\alp" => :string + "test_repl_comp_dict[\"\\alpha" => :string + "test_repl_comp_dict[\"α" => :string + "test_repl_comp_dict[:α" => :other + "test_repl_comp_dict[" => :other + "test_repl_comp_customdict[\"ab" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[ \"abcd" => :string + "test_repl_comp_customdict[\"abcd" => :string + "test_repl_comp_customdict[:b" => :other + "test_repl_comp_customdict[:bar2" => :other + "test_repl_comp_customdict[Ba" => :other + "test_repl_comp_customdict[occ" => :other + "test_repl_comp_customdict[`l" => :cmd + "test_repl_comp_customdict[6" => :other + "test_repl_comp_customdict[66" => :other + "test_repl_comp_customdict[(" => :other + "test_repl_comp_customdict[\"\\alp" => :string + "test_repl_comp_customdict[\"\\alpha" => :string + "test_repl_comp_customdict[\"α" => :string + "test_repl_comp_customdict[:α" => :other + "test_repl_comp_customdict[" => :other + "CompletionFoo.kwtest3(a;foob" => :other + "CompletionFoo.kwtest3(a; le" => :other + "CompletionFoo.kwtest3.(a;\nlength" => :other + "CompletionFoo.kwtest3(a, length=4, l" => :other + "CompletionFoo.kwtest3(a; kwargs..., fo" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, le" => :other + "CompletionFoo.kwtest3(a; another!" => :other + "CompletionFoo.kwtest3(a; another!kwarg=0, foob" => :other + "CompletionFoo.kwtest3(a; namedarg=0, foob" => :other + "kwtest3(blabla; unknown=4, namedar" => :other + "kwtest3(blabla; named" => :other + "kwtest3(blabla; named." => :other + "kwtest3(blabla; named..., another!" => :other + "kwtest3(blabla; named..., len" => :other + "kwtest3(1+3im; named" => :other + "kwtest3(1+3im; named." => :other + "CompletionFoo.kwtest4(a; x23=0, _" => :other + "CompletionFoo.kwtest4(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4.(a; xαβγ=1, _" => :other + "CompletionFoo.kwtest4(a; x23=0, x" => :other + "CompletionFoo.kwtest4.(a; x23=0, x" => :other + "CompletionFoo.kwtest4(a; _a1b=1, x" => :other + "CompletionFoo.kwtest5(3, 5; somek" => :other + "CompletionFoo.kwtest5(3, 5, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(3, 5, 7; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, Any[]...; somek" => :other + "CompletionFoo.kwtest5(unknownsplat...; somekw" => :other + "CompletionFoo.kwtest5(3, 5, 7, 9, somekwarg=4, somek" => :other + "CompletionFoo.kwtest5(String[]..., unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', unknownsplat...; xy" => :other + "CompletionFoo.kwtest5('a', 3, String[]...; xy" => :other + "CompletionFoo.kwtest3(" => :other + "CompletionFoo.kwtest3(a;" => :other + "CompletionFoo.kwtest3(a; len2=" => :other + "CompletionFoo.kwtest3(a; len2=le" => :other + "CompletionFoo.kwtest3(a; len2=3 " => :other + "CompletionFoo.kwtest3(a; [le" => :other + "CompletionFoo.kwtest3([length; le" => :other + "CompletionFoo.kwtest3(a; (le" => :other + "CompletionFoo.kwtest3(a; foo(le" => :other + "CompletionFoo.kwtest3(a; (; le" => :other + "CompletionFoo.kwtest3(a; length, " => :other + "CompletionFoo.kwtest3(a; kwargs..., " => :other + ":(function foo(::Int) end).args[1].args[2]." => :other + "log(log.(varfloat)," => :other + "Base.return_types(getin" => :other + "test(1,1, " => :other + "test.(1,1, " => :other + "prevind(\"θ\",1," => :other + "typeof(+)." => :other + "test_dict[\"ab" => :string + "CompletionFoo.x." => :other + "@noexist." => :other + "Main.@noexist." => :none # <- Invalid syntax which adding a suffix can't fix + "@Main.noexist." => :other + "@show." => :other + "@macroexpand." => :other + "CompletionFoo.@foobar()." => :other + "CompletionFoo.@foobar(4)." => :other + "foo(#=#==#=##==#).rs[1]." => :other + "foo().r." => :other + "foo(#=#=# =#= =#).r." => :other + "test_47594." => :other + "Issue36437(42)." => :other + "Some(Issue36437(42)).value." => :other + "some_issue36437.value." => :other + "some_issue36437.value.a, some_issue36437.value." => :other + "@show some_issue36437.value.a; some_issue36437.value." => :other + "()." => :other + "Ref(Issue36437(42))[]." => :other + "global_dict[:r]." => :other + "global_dict_nested[:g][:r]." => :other + "global_dict_nested[" => :other + "global_dict_nested[:g][" => :other + "pop!(global_xs)." => :other + "tcd1." => :other + "tcd1.x." => :other + "tcd1.x.v." => :other + "getkeyelem(mutable_const_prop)." => :other + "getkeyelem(mutable_const_prop).value." => :other + "var\"complicated " => :string + "WeirdNames().var\"oh " => :string + "WeirdNames().var\"" => :string + "\"abc\"." => :other + "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)." => :other + "union_somes(1, 1.0)." => :other + "union_some_ref(1, 1.0)." => :other + "Issue49892(fal" => :other + "-CompletionFoo.Test_y(3)." => :other + "99 ⨷⁻ᵨ⁷ CompletionFoo.type_test." => :other + "CompletionFoo.type_test + CompletionFoo.Test_y(2)." => :other + "(CompletionFoo.type_test + CompletionFoo.Test_y(2))." => :other + "CompletionFoo.type_test + CompletionFoo.unicode_αβγ." => :other + "(CompletionFoo.type_test + CompletionFoo.unicode_αβγ)." => :other + "using Base." => :other + "@time(using .Iss" => :other + "using .Issue52922.Inner1." => :other + "Issue53126()." => :other + "using " => :other + "global xxx::Number = Base." => :other + "let x = 1 # comment" => :other + ] + @testset "$(repr(str))" begin + # Test :statement parsing + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :statement)[1] + @test Base.incomplete_tag(ex) == tag + # Test :all parsing - this is what the REPL uses to parse user input. + ex = JuliaSyntax.core_parser_hook(str, "somefile", 1, 0, :all)[1] + @test ex.head == :toplevel + @test Base.incomplete_tag(ex.args[end]) == tag + end + end + + # Should not throw + @test JuliaSyntax.core_parser_hook("+=", "somefile", 1, 0, :statement)[1] isa Expr + end +end diff --git a/JuliaSyntax/test/kinds.jl b/JuliaSyntax/test/kinds.jl new file mode 100644 index 0000000000000..5179544ec15d3 --- /dev/null +++ b/JuliaSyntax/test/kinds.jl @@ -0,0 +1,59 @@ +# Only test this once per session, as kind modules must be unique (ugh) +if !isdefined(@__MODULE__, :FooKinds) +@eval module FooKinds + +using ..JuliaSyntax + +function _init_kinds() + JuliaSyntax.register_kinds!(@__MODULE__, 42, [ + "BEGIN_FOO" + "foo_1" + "foo_2" + "BEGIN_FOOBAR" + "foobar_1" + "foobar_2" + "END_FOOBAR" + "END_FOO" + ]) +end + +_init_kinds() + +k_before_init = K"foo_1" + +function __init__() + _init_kinds() +end + +end + +@eval module BarKinds + # Intentionally empty +end + +end + +@testset "Kinds" begin + @test K"foo_1" != K"foo_2" + + @test FooKinds.k_before_init == K"foo_1" + + @test K"BEGIN_FOO" == K"foo_1" + @test K"foo_2" < K"BEGIN_FOOBAR" + @test K"BEGIN_FOOBAR" == K"foobar_1" + @test K"END_FOOBAR" == K"foobar_2" + @test K"END_FOO" == K"foobar_2" + + @test parentmodule(K"foo_1") == FooKinds + @test sprint(show, K"foo_1") == "K\"foo_1\"" + + # Too many kind modules + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 64, ["hoo?"]) + # Too many kind names per module + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, string.(1:1024)) + # Re-registering or registering new kinds is not supported + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_2", "foo_1"]) + @test_throws ErrorException JuliaSyntax.register_kinds!(FooKinds, 42, ["foo_3"]) + # Module ID already taken by FooKinds + @test_throws ErrorException JuliaSyntax.register_kinds!(BarKinds, 42, ["hii?"]) +end diff --git a/JuliaSyntax/test/literal_parsing.jl b/JuliaSyntax/test/literal_parsing.jl new file mode 100644 index 0000000000000..bfb8e932458ad --- /dev/null +++ b/JuliaSyntax/test/literal_parsing.jl @@ -0,0 +1,256 @@ +using .JuliaSyntax: + parse_int_literal, + parse_uint_literal, + parse_float_literal, + unescape_julia_string + +@testset "Float parsing" begin + # Float64 + @test parse_float_literal(Float64, "123", 1, 4) === (123.0, :ok) + @test parse_float_literal(Float64, "123", 2, 4) === (23.0, :ok) + @test parse_float_literal(Float64, "123", 2, 3) === (2.0, :ok) + @test parse_float_literal(Float64, "1.3", 1, 4) === (1.3, :ok) + @test parse_float_literal(Float64, "1.3e2", 1, 6) === (1.3e2, :ok) + @test parse_float_literal(Float64, "1.3E2", 1, 6) === (1.3e2, :ok) + @test parse_float_literal(Float64, "1.0e-1000", 1, 10) === (0.0, :underflow) + @test parse_float_literal(Float64, "1.0e+1000", 1, 10) === (Inf, :overflow) + # Slow path (exceeds static buffer size) + @test parse_float_literal(Float64, "0.000000000000000000000000000000000000000000000000000000000001", 1, 63) === (1e-60, :ok) + # hexfloat + @test parse_float_literal(Float64, "0x0ap-0", 1, 8) === (Float64(10), :ok) + @test parse_float_literal(Float64, "0xffp-0", 1, 8) === (Float64(255), :ok) + + # Float32 + @test parse_float_literal(Float32, "123", 1, 4) === (123.0f0, :ok) + @test parse_float_literal(Float32, "1.3f2", 1, 6) === (1.3f2, :ok) + if !Sys.iswindows() + @test parse_float_literal(Float32, "1.0f-50", 1, 8) === (0.0f0, :underflow) + end + @test parse_float_literal(Float32, "1.0f+50", 1, 8) === (Inf32, :overflow) + + # Assertions + @test_throws ErrorException parse_float_literal(Float64, "x", 1, 2) + @test_throws ErrorException parse_float_literal(Float64, "1x", 1, 3) + + # Underscore and \minus allowed + @test parse_float_literal(Float64, "10_000.0_0", 1, 9) === (Float64(10000), :ok) + @test parse_float_literal(Float64, "−10.0", 1, 8) === (Float64(-10), :ok) + @test parse_float_literal(Float64, "10e\u22121", 1, 8) === (Float64(1), :ok) +end + +hexint(s) = parse_uint_literal(s, K"HexInt") +binint(s) = parse_uint_literal(s, K"BinInt") +octint(s) = parse_uint_literal(s, K"OctInt") + +@testset "Integer parsing" begin + # Integers + @testset "Signed Integers" begin + @test parse_int_literal("-1") isa Int + @test parse_int_literal("1") isa Int + @test parse_int_literal("2147483647") isa Int + @test parse_int_literal("9223372036854775807") isa Int64 + @test parse_int_literal("9223372036854775808") isa Int128 + @test parse_int_literal("170141183460469231731687303715884105727") isa Int128 + @test parse_int_literal("170141183460469231731687303715884105728") isa BigInt + end + + # HexInt + @testset "HexInt numeric limits for different types" begin + @test hexint("0xff") === UInt8(0xff) + @test hexint("0x100") === UInt16(0x100) + @test hexint("0xffff") === UInt16(0xffff) + @test hexint("0x10000") === UInt32(0x10000) + @test hexint("0xffffffff") === UInt32(0xffffffff) + @test hexint("0x100000000") === UInt64(0x100000000) + @test hexint("0xffffffffffffffff") === UInt64(0xffffffffffffffff) + @test hexint("0x10000000000000000") === UInt128(0x10000000000000000) + @test hexint("0xffffffffffffffffffffffffffffffff") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = hexint("0x100000000000000000000000000000000"); + n isa BigInt && n == big"0x100000000000000000000000000000000") + end + @testset "HexInt string length limits for different types" begin + @test hexint("0x00") === UInt8(0) + @test hexint("0x000") === UInt16(0) + @test hexint("0x0000") === UInt16(0) + @test hexint("0x00000") === UInt32(0) + @test hexint("0x00000000") === UInt32(0) + @test hexint("0x000000000") === UInt64(0) + @test hexint("0x0000000000000000") === UInt64(0) + @test hexint("0x00000000000000000") === UInt128(0) + @test hexint("0x00000000000000000000000000000000") === UInt128(0) + @test (n = hexint("0x000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + # BinInt + @testset "BinInt numeric limits for different types" begin + @test binint("0b11111111") === UInt8(0xff) + @test binint("0b100000000") === UInt16(0x100) + @test binint("0b1111111111111111") === UInt16(0xffff) + @test binint("0b10000000000000000") === UInt32(0x10000) + @test binint("0b11111111111111111111111111111111") === UInt32(0xffffffff) + @test binint("0b100000000000000000000000000000000") === UInt64(0x100000000) + @test binint("0b1111111111111111111111111111111111111111111111111111111111111111") === UInt64(0xffffffffffffffff) + @test binint("0b10000000000000000000000000000000000000000000000000000000000000000") === UInt128(0x10000000000000000) + @test binint("0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = binint("0b100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + n isa BigInt && n == big"0x100000000000000000000000000000000") + end + @testset "BinInt string length limits for different types" begin + @test binint("0b00000000") === UInt8(0) + @test binint("0b000000000") === UInt16(0) + @test binint("0b0000000000000000") === UInt16(0) + @test binint("0b00000000000000000") === UInt32(0) + @test binint("0b00000000000000000000000000000000") === UInt32(0) + @test binint("0b000000000000000000000000000000000") === UInt64(0) + @test binint("0b0000000000000000000000000000000000000000000000000000000000000000") === UInt64(0) + @test binint("0b00000000000000000000000000000000000000000000000000000000000000000") === UInt128(0) + @test binint("0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000") === UInt128(0) + @test (n = binint("0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + # OctInt + @testset "OctInt numeric limits for different types" begin + @test octint("0o377") === UInt8(0xff) + @test octint("0o400") === UInt16(0x100) + @test octint("0o177777") === UInt16(0xffff) + @test octint("0o200000") === UInt32(0x10000) + @test octint("0o37777777777") === UInt32(0xffffffff) + @test octint("0o40000000000") === UInt64(0x100000000) + @test octint("0o1777777777777777777777") === UInt64(0xffffffffffffffff) + @test octint("0o2000000000000000000000") === UInt128(0x10000000000000000) + @test octint("0o3777777777777777777777777777777777777777777") === UInt128(0xffffffffffffffffffffffffffffffff) + @test (n = octint("0o4000000000000000000000000000000000000000000"); + n isa BigInt && n == big"0x100000000000000000000000000000000") + end + @testset "OctInt string length limits for different types" begin + @test octint("0o000") === UInt8(0) + @test octint("0o0000") === UInt16(0) + @test octint("0o000000") === UInt16(0) + @test octint("0o0000000") === UInt32(0) + @test octint("0o00000000000") === UInt32(0) + @test octint("0o000000000000") === UInt64(0) + @test octint("0o0000000000000000000000") === UInt64(0) + @test octint("0o00000000000000000000000") === UInt128(0) + @test octint("0o0000000000000000000000000000000000000000000") === UInt128(0) + @test (n = octint("0o00000000000000000000000000000000000000000000"); + n isa BigInt && n == 0) + end + + @testset "Underscore separators" begin + @test parse_int_literal("10_000") === 10000 + @test parse_uint_literal("0xff_ff", K"HexInt") === 0xffff + @test parse_uint_literal("0b1111_1111", K"BinInt") === 0xff + @test parse_uint_literal("0o177_777", K"OctInt") === 0xffff + end + + @testset "\\minus ('\\u2212' / '−') allowed in numbers" begin + @test parse_int_literal("−10") === -10 + end +end + +function unesc(str, firstind=firstindex(str), endind=lastindex(str)+1; diagnostics=false) + io = IOBuffer() + ds = JuliaSyntax.Diagnostic[] + unescape_julia_string(io, Vector{UInt8}(str), firstind, endind, ds) + if diagnostics + ds + else + @test isempty(ds) + String(take!(io)) + end +end + +@testset "String unescaping" begin + # offsets + @test unesc("abcd", 1, 3) == "ab" + @test unesc("abcd", 2, 4) == "bc" + @test unesc("abcd", 3, 5) == "cd" + + # Allowed escapes of delimiters and dollar sign + @test unesc("\\\\") == "\\" + @test unesc("\\\"") == "\"" + @test unesc("\\\$") == "\$" + @test unesc("\\'") == "\'" + @test unesc("\\`") == "`" + + # Newline normalization + @test unesc("a\nb\rc\r\nd") == "a\nb\nc\nd" + + # Invalid escapes + @test !isempty(unesc("\\.", diagnostics=true)) + @test !isempty(unesc("\\z", diagnostics=true)) + + # Standard C escape sequences + @test codeunits(unesc("\\n\\t\\r\\e\\b\\f\\v\\a")) == + UInt8[0x0a, 0x09, 0x0d, 0x1b, 0x08, 0x0c, 0x0b, 0x07] + + # Hex and unicode escapes; \x \u and \U + @test unesc("x\\x61x") == "xax" + @test unesc("x\\u03b1x") == "xαx" + @test unesc("x\\U001F604x") == "x😄x" + # Maximum unicode code point + @test unesc("x\\U10ffffx") == "x\U10ffffx" + @test !isempty(unesc("x\\U110000x", diagnostics=true)) + + # variable-length octal + @test unesc("x\\7x") == "x\ax" + @test unesc("x\\77x") == "x?x" + @test unesc("x\\141x") == "xax" + @test unesc("x\\377x") == "x\xffx" + @test !isempty(unesc("x\\400x", diagnostics=true)) +end + +function unesc_raw(str, is_cmd) + io = IOBuffer() + JuliaSyntax.unescape_raw_string(io, Vector{UInt8}(str), + firstindex(str), lastindex(str)+1, is_cmd) + return String(take!(io)) +end + +@testset "Raw string unescaping" begin + # " delimited + # x\"x ==> x"x + @test unesc_raw("x\\\"x", false) == "x\"x" + # x\`x ==> x\`x + @test unesc_raw("x\\`x", false) == "x\\`x" + # x\\\"x ==> x\"x + @test unesc_raw("x\\\\\\\"x", false) == "x\\\"x" + # x\\\`x ==> x\\\`x + @test unesc_raw("x\\\\\\`x", false) == "x\\\\\\`x" + # '\\ ' ==> '\\ ' + @test unesc_raw("\\\\ ", false) == "\\\\ " + # '\\' ==> '\' + @test unesc_raw("\\\\", false) == "\\" + # '\\\\' ==> '\\' + @test unesc_raw("\\\\\\\\", false) == "\\\\" + + # ` delimited + # x\"x ==> x\"x + @test unesc_raw("x\\\"x", true) == "x\\\"x" + # x\`x ==> x`x + @test unesc_raw("x\\`x", true) == "x`x" + # x\\\"x ==> x\"x + @test unesc_raw("x\\\\\\\"x", true) == "x\\\\\\\"x" + # x\\\`x ==> x\`x + @test unesc_raw("x\\\\\\`x", true) == "x\\`x" + # '\\ ' ==> '\\ ' + @test unesc_raw("\\\\ ", true) == "\\\\ " +end + +@testset "Normalization of identifiers" begin + # NFC normalization + # https://github.com/JuliaLang/julia/issues/5434 + # https://github.com/JuliaLang/julia/pull/19464 + @test JuliaSyntax.normalize_identifier("\u0069\u0302") == "\u00ee" + + # Special Julia normalization + # https://github.com/JuliaLang/julia/pull/42561 + @test JuliaSyntax.normalize_identifier("julia\u025B\u00B5\u00B7\u0387\u2212") == + "julia\u03B5\u03BC\u22C5\u22C5\u002D" + + # https://github.com/JuliaLang/julia/issues/48870 + # ℏ -> ħ + @test JuliaSyntax.normalize_identifier("\u210f") == "\u0127" +end diff --git a/JuliaSyntax/test/parse_packages.jl b/JuliaSyntax/test/parse_packages.jl new file mode 100644 index 0000000000000..b5b08f488a918 --- /dev/null +++ b/JuliaSyntax/test/parse_packages.jl @@ -0,0 +1,79 @@ +# Full-scale parsing tests of JuliaSyntax itself, Julia Base, etc. + +juliasyntax_dir = joinpath(@__DIR__, "..") +@testset "Parse JuliaSyntax" begin + test_parse_all_in_path(joinpath(juliasyntax_dir, "src")) +end +@testset "Parse JuliaSyntax tests" begin + test_parse_all_in_path(joinpath(juliasyntax_dir, "test")) +end + +base_path = let + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base") + if !isdir(p) + # For julia 1.9 images. + p = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base") + if !isdir(p) + error("source for Julia base not found") + end + end + p +end +@testset "Parse Base at $base_path" begin + test_parse_all_in_path(base_path) do f + if endswith(f, "gmp.jl") + # Loose comparison due to `f(::g(w) = z) = a` syntax + return exprs_roughly_equal + end + return exprs_equal_no_linenum + end +end + +base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test") +@testset "Parse Base tests at $base_tests_path" begin + test_parse_all_in_path(base_tests_path) do f + # In julia-1.6, test/copy.jl had spurious syntax which became the + # multidimensional array syntax in 1.7. + if endswith(f, "copy.jl") && v"1.6" <= VERSION < v"1.7" + return nothing + end + + # syntax.jl has some intentionally weird syntax which we parse + # differently than the flisp parser, and some cases which we've + # decided are syntax errors. + if endswith(f, "syntax.jl") + return nothing + end + + if endswith(f, "core.jl") + # The test + # @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T}) + # depends on a JuliaSyntax bugfix and parses differently (wrong) using + # flisp. This was added in julia#52228 and backported in julia#52045 + if v"1.10.0-rc1.39" <= VERSION + return nothing + else + # Loose comparison due to `for f() = 1:3` syntax + return exprs_roughly_equal + end + end + + # subtype.jl also depends on the where precedence JuliaSyntax bugfix as of julia#53034 + if endswith(f, "subtype.jl") && v"1.11.0-DEV.1382" <= VERSION + return nothing + end + + return exprs_equal_no_linenum + end +end + +@testset "Parse Julia stdlib at $(Sys.STDLIB)" begin + for stdlib in readdir(Sys.STDLIB) + fulldir = joinpath(Sys.STDLIB, stdlib) + if isdir(fulldir) + @testset "Parse $stdlib" begin + test_parse_all_in_path(joinpath(Sys.STDLIB, fulldir)) + end + end + end +end diff --git a/JuliaSyntax/test/parse_stream.jl b/JuliaSyntax/test/parse_stream.jl new file mode 100644 index 0000000000000..3386ba59e435f --- /dev/null +++ b/JuliaSyntax/test/parse_stream.jl @@ -0,0 +1,158 @@ +# Prototype ParseStream interface +# +# Here we test the ParseStream interface, by taking input code and checking +# that the correct sequence of emit() and bump() produces a valid parse tree. + +using .JuliaSyntax: ParseStream, + peek, peek_token, + bump, bump_trivia, bump_invisible, + emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG, + ParseStreamPosition, first_child_position, last_child_position, + parsestmt + +# Here we manually issue parse events in the order the Julia parser would issue +# them +@testset "ParseStream" begin + code = """ + for i = 1:10 + xx[i] + 2 + # hi + yy + end + """ + st = ParseStream(code) + + p1 = position(st) + @test peek(st) == K"for" + bump(st, TRIVIA_FLAG) + p2 = position(st) + @test peek(st) == K"Identifier" # 'i' + bump(st) + @test peek(st) == K"=" + bump(st, TRIVIA_FLAG) + p3 = position(st) + @test peek(st) == K"Integer" # 1 + bump(st) + @test peek(st) == K":" + bump(st) # : + @test peek(st) == K"Integer" # 10 + bump(st) # 10 + emit(st, p3, K"call", INFIX_FLAG) + emit(st, p2, K"=") + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + p4 = position(st) + p5 = position(st) # [call] + p6 = position(st) # [ref] + @test peek(st) == K"Identifier" # 'xx' + bump(st) + @test peek(st) == K"[" + bump(st, TRIVIA_FLAG) + @test peek(st) == K"Identifier" # 'i' + bump(st) + @test peek(st) == K"]" + bump(st, TRIVIA_FLAG) + emit(st, p6, K"ref") + @test peek(st) == K"+" + bump(st) + @test peek(st) == K"Integer" # 2 + bump(st) + emit(st, p5, K"call", INFIX_FLAG) + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + @test peek(st) == K"Identifier" # 'yy' + bump(st) + emit(st, p4, K"block") + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + bump(st, TRIVIA_FLAG) # end + emit(st, p1, K"for") + @test peek(st) == K"NewlineWs" + bump(st, TRIVIA_FLAG) + emit(st, p1, K"toplevel") +end + +@testset "ParseStream constructors" begin + @testset "Byte buffer inputs" begin + # Vector{UInt8} + let + st = ParseStream(Vector{UInt8}("x+y")) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + let + st = ParseStream(Vector{UInt8}("x+y"), 3) + bump(st) + @test build_tree(Expr, st) == :y + @test JuliaSyntax.last_byte(st) == 3 + end + # Ptr{UInt8}, len + code = "x+y" + GC.@preserve code begin + let + st = ParseStream(pointer(code), 3) + bump(st) + @test build_tree(Expr, st) == :x + @test JuliaSyntax.last_byte(st) == 1 + end + end + end +end + +@testset "ParseStream tree traversal" begin + # NB: ParseStreamPosition.node_index includes an initial sentinel token so + # indices here are one more than "might be expected". Additionally, note that + # the byte index points to the first byte after the token. + st = parse_sexpr("((a b) c)") + child1_pos = first_child_position(st, position(st)) + @test child1_pos == ParseStreamPosition(7, 8) + @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 4) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10) + @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 6) + + st = parse_sexpr("( (a b) c)") + child1_pos = first_child_position(st, position(st)) + @test child1_pos == ParseStreamPosition(8, 9) + @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(10, 11) + @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 7) + + st = parse_sexpr("(a (b c))") + @test first_child_position(st, position(st)) == ParseStreamPosition(3, 3) + child2_pos = last_child_position(st, position(st)) + @test child2_pos == ParseStreamPosition(9, 10) + @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 6) + @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 8) + + st = parse_sexpr("( a (b c))") + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 4) + child2_pos = last_child_position(st, position(st)) + @test child2_pos == ParseStreamPosition(10, 11) + @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 7) + @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 9) + + st = parse_sexpr("a (b c)") + @test first_child_position(st, position(st)) == ParseStreamPosition(5, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 7) + + st = parse_sexpr("(a) (b c)") + @test first_child_position(st, position(st)) == ParseStreamPosition(7, 8) + @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10) + + st = parse_sexpr("(() ())") + @test first_child_position(st, position(st)) == ParseStreamPosition(4, 5) + @test last_child_position(st, position(st)) == ParseStreamPosition(7, 9) +end + +@testset "SubString{GenericString} (issue #505)" begin + x = Test.GenericString("1 2") + @test x == "1 2" + y = split(x)[1] + @test y == "1" + @test y isa SubString{GenericString} + @test ParseStream(y) isa ParseStream + @test parsestmt(Expr, y) == parsestmt(Expr, "1") +end diff --git a/JuliaSyntax/test/parser.jl b/JuliaSyntax/test/parser.jl new file mode 100644 index 0000000000000..4aa8652858313 --- /dev/null +++ b/JuliaSyntax/test/parser.jl @@ -0,0 +1,1233 @@ +""" +Parse string to SyntaxNode tree and show as an sexpression +""" +function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws...) + stream = ParseStream(code, version=v) + production(ParseState(stream)) + JuliaSyntax.validate_tokens(stream) + s = build_tree(SyntaxNode, stream, keep_parens=true) + return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...)) +end + +function test_parse(production, input, expected) + if !(input isa AbstractString) + opts, input = input + else + opts = NamedTuple() + end + parsed = parse_to_sexpr_str(production, input; opts...) + if expected isa Regex # Could be AbstractPattern, but that type was added in Julia 1.6. + @test match(expected, parsed) !== nothing + else + @test parsed == expected + end +end + +function test_parse(inout::Pair) + test_parse(JuliaSyntax.parse_toplevel, inout...) +end + +PARSE_ERROR = r"\(error-t " + +with_version(v::VersionNumber, (i,o)::Pair) = ((;v=v), i) => o + +# TODO: +# * Extract the following test cases from the source itself. +# * Use only the green tree to generate the S-expressions +# (add flag annotations to heads) +tests = [ + JuliaSyntax.parse_toplevel => [ + "a \n b" => "(toplevel a b)" + "a;b \n c;d" => "(toplevel (toplevel-; a b) (toplevel-; c d))" + "a \n \n" => "(toplevel a)" + "" => "(toplevel)" + ], + JuliaSyntax.parse_block => [ + "a;b;c" => "(block a b c)" + "a;;;b;;" => "(block a b)" + ";a" => "(block a)" + "\n a" => "(block a)" + "a\nb" => "(block a b)" + ], + JuliaSyntax.parse_stmts => [ + "a;b;c" => "(toplevel-; a b c)" + "a;;;b;;" => "(toplevel-; a b)" + """ "x" a ; "y" b """ => + """(toplevel-; (doc (string "x") a) (doc (string "y") b))""" + "x y" => "(wrapper x (error-t y))" + ], + JuliaSyntax.parse_eq => [ + # parse_assignment + "a = b" => "(= a b)" + "a .= b" => "(.= a b)" + "a += b" => "(op= a + b)" + "a .+= b" => "(.op= a + b)" + "a, b = c, d" => "(= (tuple a b) (tuple c d))" + "x, = xs" => "(= (tuple x) xs)" + "[a ~b]" => "(hcat a (call-pre ~ b))" + "a ~ b" => "(call-i a ~ b)" + "a .~ b" => "(dotcall-i a ~ b)" + "[a ~ b c]" => "(hcat (call-i a ~ b) c)" + "[a~b]" => "(vect (call-i a ~ b))" + "f(x) .= 1" => "(.= (call f x) 1)" + "::g() = 1" => "(= (::-pre (call g)) 1)" + "f(x) = 1" => "(function-= (call f x) 1)" + "f(x)::T = 1" => "(function-= (::-i (call f x) T) 1)" + "f(x) where S where U = 1" => "(function-= (where (where (call f x) S) U) 1)" + "(f(x)::T) where S = 1" => "(function-= (where (parens (::-i (call f x) T)) S) 1)" + "f(x) = 1 = 2" => "(function-= (call f x) (= 1 2))" # Should be a warning! + ], + JuliaSyntax.parse_pair => [ + "a => b" => "(call-i a => b)" + "a .=> b" => "(dotcall-i a => b)" + ], + JuliaSyntax.parse_cond => [ + "a ? b : c" => "(? a b c)" + "a ?\nb : c" => "(? a b c)" + "a ? b :\nc" => "(? a b c)" + "a ? b : c:d" => "(? a b (call-i c : d))" + # Following are errors but should recover + "a? b : c" => "(? a (error-t) b c)" + "a ?b : c" => "(? a (error-t) b c)" + "a ? b: c" => "(? a b (error-t) c)" + "a ? b :c" => "(? a b (error-t) c)" + "a ? b c" => "(? a b (error-t) c)" + "A[x ? y : end]" => "(ref A (? x y end))" + ], + JuliaSyntax.parse_arrow => [ + "x → y" => "(call-i x → y)" + "x <--> y" => "(call-i x <--> y)" + "x --> y" => "(--> x y)" + "x .--> y" => "(dotcall-i x --> y)" + "x -->₁ y" => "(call-i x -->₁ y)" + ], + JuliaSyntax.parse_or => [ + "x || y || z" => "(|| x (|| y z))" + ((v=v"1.6",), "x .|| y") => "(error (.|| x y))" + ((v=v"1.7",), "x .|| y") => "(.|| x y)" + ], + JuliaSyntax.parse_and => [ + "x && y && z" => "(&& x (&& y z))" + ((v=v"1.6",), "x .&& y") => "(error (.&& x y))" + ((v=v"1.7",), "x .&& y") => "(.&& x y)" + ], + JuliaSyntax.parse_comparison => [ + # Type comparisons are syntactic + "x <: y" => "(<: x y)" + "x >: y" => "(>: x y)" + # Normal binary comparisons + "x < y" => "(call-i x < y)" + "x .< y" => "(dotcall-i x < y)" + "x .<: y" => "(dotcall-i x <: y)" + ":. == :." => "(call-i (quote-: .) == (quote-: .))" + # Comparison chains + "x < y < z" => "(comparison x < y < z)" + "x == y < z" => "(comparison x == y < z)" + "x .< y .< z" => "(comparison x (. <) y (. <) z)" + "x .< y < z" => "(comparison x (. <) y < z)" + ], + JuliaSyntax.parse_pipe_lt => [ + "x <| y <| z" => "(call-i x <| (call-i y <| z))" + ], + JuliaSyntax.parse_pipe_gt => [ + "x |> y |> z" => "(call-i (call-i x |> y) |> z)" + "x .|> y" => "(dotcall-i x |> y)" + ], + JuliaSyntax.parse_range => [ + "1:2" => "(call-i 1 : 2)" + "1:2:3" => "(call-i 1 : 2 3)" + "a:b:c:d:e" => "(call-i (call-i a : b c) : d e)" + "a :< b" => "(call-i a (error : <) b)" + "1:\n2" => "(call-i 1 : (error))" + ], + JuliaSyntax.parse_range => [ + "a..b" => "(call-i a .. b)" + "a … b" => "(call-i a … b)" + "a .… b" => "(dotcall-i a … b)" + "[1 :a]" => "(hcat 1 (quote-: a))" + "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote-: a))" + "x..." => "(... x)" + "x:y..." => "(... (call-i x : y))" + "x..y..." => "(... (call-i x .. y))" + ], + JuliaSyntax.parse_invalid_ops => [ + "a--b" => "(call-i a (ErrorInvalidOperator) b)" + ], + JuliaSyntax.parse_expr => [ + "a - b - c" => "(call-i (call-i a - b) - c)" + "a + b + c" => "(call-i a + b c)" + "a + b .+ c" => "(dotcall-i (call-i a + b) + c)" + # parse_with_chains: + # The following is two elements of a hcat + "[x +y]" => "(hcat x (call-pre + y))" + "[x+y +z]" => "(hcat (call-i x + y) (call-pre + z))" + # Conversely the following are infix calls + "[x +₁y]" => "(vect (call-i x +₁ y))" + "[x+y+z]" => "(vect (call-i x + y z))" + "[x+y + z]" => "(vect (call-i x + y z))" + # Dotted and normal operators + "a +₁ b +₁ c" => "(call-i (call-i a +₁ b) +₁ c)" + "a .+ b .+ c" => "(dotcall-i (dotcall-i a + b) + c)" + ], + JuliaSyntax.parse_term => [ + "a * b * c" => "(call-i a * b c)" + "a .* b" => "(dotcall-i a * b)" + "-2*x" => "(call-i -2 * x)" + ], + JuliaSyntax.parse_rational => [ + "x // y // z" => "(call-i (call-i x // y) // z)" + ], + JuliaSyntax.parse_shift => [ + "x >> y >> z" => "(call-i (call-i x >> y) >> z)" + ], + JuliaSyntax.parse_juxtapose => [ + "2x" => "(juxtapose 2 x)" + "2x" => "(juxtapose 2 x)" + "2(x)" => "(juxtapose 2 (parens x))" + "(2)(3)x" => "(juxtapose (parens 2) (parens 3) x)" + "(x-1)y" => "(juxtapose (parens (call-i x - 1)) y)" + "x'y" => "(juxtapose (call-post x ') y)" + "1√x" => "(juxtapose 1 (call-pre √ x))" + # errors + "\"a\"\"b\"" => "(juxtapose (string \"a\") (error-t) (string \"b\"))" + "\"a\"x" => "(juxtapose (string \"a\") (error-t) x)" + "\"\$y\"x" => "(juxtapose (string y) (error-t) x)" + "\"a\"begin end" => "(juxtapose (string \"a\") (error-t) (block))" + # Not juxtaposition - parse_juxtapose will consume only the first token. + "x.3" => "x" + "f(2)2" => "(call f 2)" + "x' y" => "(call-post x ')" + "x 'y" => "x" + "x@y" => "x" + "(begin end)x" => "(parens (block))" + ], + JuliaSyntax.parse_unary => [ + ":T" => "(quote-: T)" + "in::T" => "(::-i in T)" + "isa::T" => "(::-i isa T)" + "-2^x" => "(call-pre - (call-i 2 ^ x))" + "-2[1, 3]" => "(call-pre - (ref 2 1 3))" + # signed literals + "-2" => "-2" + "+2.0" => "2.0" + "-1.0f0" => "-1.0f0" + "-0xf.0p0" => "-15.0" + "+0b10010" => "0x12" + "+0o22" => "0x12" + "+0x12" => "0x12" + "-0b10010" => "(call-pre - 0x12)" + "-0o22" => "(call-pre - 0x12)" + "-0x12" => "(call-pre - 0x12)" + "-1::T" => "(::-i -1 T)" + # Standalone dotted operators are parsed as (|.| op) + ".+" => "(. +)" + ".+\n" => "(. +)" + ".+ =" => "(. +)" + ".+)" => "(. +)" + ".&" => "(. &)" + # Standalone non-dotted operators + "+)" => "+" + # Call with type parameters or non-unary prefix call + "+{T}(x::T)" => "(call (curly + T) (::-i x T))" + "*(x)" => "(call * x)" + ".*(x)" => "(call (. *) x)" + # Prefix function calls for operators which are both binary and unary + "+(a,b)" => "(call + a b)" + "+(a,)" => "(call-, + a)" + ".+(a,)" => "(call-, (. +) a)" + "(.+)(a)" => "(call (parens (. +)) a)" + "(.~(a))" => "(parens (dotcall-pre ~ (parens a)))" + "+(a=1,)" => "(call-, + (= a 1))" + "+(a...)" => "(call + (... a))" + "+(a;b,c)" => "(call + a (parameters b c))" + "+(;a)" => "(call + (parameters a))" + "+(;;a)" => "(call + (parameters) (parameters a))" + "+()" => "(call +)" + "+(\n;a)" => "(call + (parameters a))" + "+(;)" => "(call + (parameters))" + "+(\n;\n)" => "(call + (parameters))" + "+(\n)" => "(call +)" + # Whitespace not allowed before prefix function call bracket + "+ (a,b)" => "(call + (error) a b)" + # Prefix calls have higher precedence than ^ + "+(a,b)^2" => "(call-i (call + a b) ^ 2)" + "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" + "<:(a,)" => "(<:-, a)" + # Unary function calls with brackets as grouping, not an arglist + ".+(a)" => "(dotcall-pre + (parens a))" + "+(a;b)" => "(call-pre + (block-p a b))" + "+(;;)" => "(call-pre + (block-p))" + "+(;;)" => "(call-pre + (block-p))" + "+(a;)" => "(call-pre + (block-p a))" + "+(a;;)" => "(call-pre + (block-p a))" + "+(\n;\n;\n)" => "(call-pre + (block-p))" + "+(a=1)" => "(call-pre + (parens (= a 1)))" + # Unary operators have lower precedence than ^ + "+(a)^2" => "(call-pre + (call-i (parens a) ^ 2))" + ".+(a)^2" => "(dotcall-pre + (call-i (parens a) ^ 2))" + "+(a)(x,y)^2" => "(call-pre + (call-i (call (parens a) x y) ^ 2))" + "<:(a)" => "(<:-pre (parens a))" + # Normal unary calls + "+x" => "(call-pre + x)" + "√x" => "(call-pre √ x)" + ".~x" => "(dotcall-pre ~ x)" + # Things which are not quite negative literals + "-0x1"=> "(call-pre - 0x01)" + "- 2" => "(call-pre - 2)" + ".-2" => "(dotcall-pre - 2)" + # Not a unary operator + "/x" => "(call-pre (error /) x)" + "+₁ x" => "(call-pre (error +₁) x)" + ".<: x" => "(dotcall-pre (error (. <:)) x)" + "?\"str\"" => """(call-pre (error ?) (string "str"))""" + ], + JuliaSyntax.parse_factor => [ + "x^y" => "(call-i x ^ y)" + "x^y^z" => "(call-i x ^ (call-i y ^ z))" + "x .^ y" => "(dotcall-i x ^ y)" + "begin x end::T" => "(::-i (block x) T)" + # parse_decl_with_initial_ex + "a::b" => "(::-i a b)" + "a::b::c" => "(::-i (::-i a b) c)" + "a->b" => "(-> (tuple a) b)" + "(a,b)->c" => "(-> (tuple-p a b) c)" + "(a;b=1)->c" => "(-> (tuple-p a (parameters (= b 1))) c)" + "x::T->c" => "(-> (tuple (::-i x T)) c)" + "\$a->b" => "(-> (tuple (\$ a)) b)" + "\$(a)->b" => "(-> (tuple (\$ (parens a))) b)" + # FIXME "&(a)->b" => "(-> (tuple-p (& (parens a))) b)" + # FIXME "::(a)->b" => "(-> (tuple-p (:: (parens a))) b)" + # `where` combined with `->` still parses strangely. However: + # * It's extra hard to add a tuple around the `x` in this syntax corner case. + # * The user already needs to add additional, ugly, parens to get this + # to parse correctly because the precedence of `where` is + # inconsistent with `::` and `->` in this case. + "(x where T)->c" => "(-> (parens (where x T)) c)" + "((x::T) where T)->c" => "(-> (parens (where (parens (::-i x T)) T)) c)" + ], + JuliaSyntax.parse_unary_subtype => [ + "<: )" => "<:" + "<: \n" => "<:" + "<: =" => "<:" + "<:{T}(x::T)" => "(call (curly <: T) (::-i x T))" + "<:(x::T)" => "(<:-pre (parens (::-i x T)))" + "<: x" => "(<:-pre x)" + "<: <: x" => "(<:-pre (<:-pre x))" + "<: A where B" => "(<:-pre (where A B))" + # FIXME: The following bizarre precedence seems broken, but is + # compatible with the reference parser (see #248) + "+ <: A where B" => "(where (call-pre + (<:-pre A)) B)" + # Really for parse_where + "x where \n {T}" => "(where x (braces T))" + "x where {T,S}" => "(where x (braces T S))" + "x where {T,S,}" => "(where x (braces-, T S))" + "x where {T S}" => "(where x (bracescat (row T S)))" + "x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))" + "x where T" => "(where x T)" + "x where \n T" => "(where x T)" + "x where T<:S" => "(where x (<: T S))" + # nested unary and unary-syntactic ops + "<: + <: + A" => "(<:-pre (call-pre + (<:-pre (call-pre + A))))" + "* <: A" => "(call-pre (error *) (<:-pre A))" + ], + JuliaSyntax.parse_unary_prefix => [ + "&)" => "&" + "\$\n" => "\$" + "&a" => "(& a)" + "::a" => "(::-pre a)" + "\$a" => "(\$ a)" + "\$\$a" => "(\$ (\$ a))" + ], + JuliaSyntax.parse_call => [ + # parse_call + "f(x)" => "(call f x)" + "\$f(x)" => "(call (\$ f) x)" + ".&(x,y)" => "(call (. &) x y)" + # parse_call_chain + "f(a).g(b)" => "(call (. (call f a) g) b)" + "\$A.@x" => "(macrocall (. (\$ A) (macro_name x)))" + + # non-errors in space sensitive contexts + "[f (x)]" => "(hcat f (parens x))" + "[f x]" => "(hcat f x)" + # space separated macro calls + "@foo a b" => "(macrocall (macro_name foo) a b)" + "@foo (x)" => "(macrocall (macro_name foo) (parens x))" + "@foo (x,y)" => "(macrocall (macro_name foo) (tuple-p x y))" + "A.@foo a b" => "(macrocall (. A (macro_name foo)) a b)" + "@A.foo a b" => "(macrocall (macro_name (. A foo)) a b)" + "[@foo x]" => "(vect (macrocall (macro_name foo) x))" + "[@foo]" => "(vect (macrocall (macro_name foo)))" + "@var\"#\" a" => "(macrocall (macro_name (var #)) a)" + "@(A) x" => "(macrocall (macro_name (parens A)) x)" + "A.@x y" => "(macrocall (. A (macro_name x)) y)" + "A.@var\"#\" a"=> "(macrocall (. A (macro_name (var #))) a)" + "@+x y" => "(macrocall (macro_name +) x y)" + "A.@.x" => "(macrocall (. A (macro_name .)) x)" + # Macro names + "@! x" => "(macrocall (macro_name !) x)" + "@.. x" => "(macrocall (macro_name ..) x)" + "@\$ y" => "(macrocall (macro_name \$) y)" + "@[x] y z" => "(macrocall (macro_name (error (vect x))) y z)" + # Special @doc parsing rules + "@doc x\ny" => "(macrocall (macro_name doc) x y)" + "A.@doc x\ny" => "(macrocall (. A (macro_name doc)) x y)" + "@A.doc x\ny" => "(macrocall (macro_name (. A doc)) x y)" + "@doc x y\nz" => "(macrocall (macro_name doc) x y)" + "@doc x\n\ny" => "(macrocall (macro_name doc) x)" + "@doc x\nend" => "(macrocall (macro_name doc) x)" + + # calls with brackets + "f(a,b)" => "(call f a b)" + "f(a,)" => "(call-, f a)" + "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" + "f(a; b; c)" => "(call f a (parameters b) (parameters c))" + "(a=1)()" => "(call (parens (= a 1)))" + "f (a)" => "(call f (error-t) a)" + "@x(a, b)" => "(macrocall-p (macro_name x) a b)" + "@x(a, b,)" => "(macrocall-p-, (macro_name x) a b)" + "A.@x(y)" => "(macrocall-p (. A (macro_name x)) y)" + "A.@x(y).z" => "(. (macrocall-p (. A (macro_name x)) y) z)" + "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))" + # do + "f() do\nend" => "(call f (do (tuple) (block)))" + "f() do ; body end" => "(call f (do (tuple) (block body)))" + "f() do x, y\n body end" => "(call f (do (tuple x y) (block body)))" + "f(x) do y body end" => "(call f x (do (tuple y) (block body)))" + "@f(x) do y body end" => "(macrocall-p (macro_name f) x (do (tuple y) (block body)))" + + # square brackets + "@S[a,b]" => "(macrocall (macro_name S) (vect a b))" + "@S[a b]" => "(macrocall (macro_name S) (hcat a b))" + "@S[a; b]" => "(macrocall (macro_name S) (vcat a b))" + "A.@S[a]" => "(macrocall (. A (macro_name S)) (vect a))" + "@S[a].b" => "(. (macrocall (macro_name S) (vect a)) b)" + ((v=v"1.7",), "@S[a ;; b]") => "(macrocall (macro_name S) (ncat-2 a b))" + ((v=v"1.6",), "@S[a ;; b]") => "(macrocall (macro_name S) (error (ncat-2 a b)))" + "a[i]" => "(ref a i)" + "a [i]" => "(ref a (error-t) i)" + "a[i,j]" => "(ref a i j)" + "(a=1)[]" => "(ref (parens (= a 1)))" + "a[end]" => "(ref a end)" + "a[begin]" => "(ref a begin)" + "a[:(end)]" => "(typed_hcat a (quote-: (parens (error-t))) (error-t))" + "T[x y]" => "(typed_hcat T x y)" + "T[x ; y]" => "(typed_vcat T x y)" + "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" + "T[x for x in xs]" => "(typed_comprehension T (generator x (iteration (in x xs))))" + ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" + + # Dotted forms + # Allow `@` in macrocall only in first and last position + "A.B.@x" => "(macrocall (. (. A B) (macro_name x)))" + "@A.B.x" => "(macrocall (macro_name (. (. A B) x)))" + "A.@B.x" => "(macrocall (. (. A (error-t) B) (macro_name (error-t) x)))" + "@M.(x)" => "(macrocall (dotcall (macro_name M) (error-t) x))" + "f.(a,b)" => "(dotcall f a b)" + "f.(a,b,)" => "(dotcall-, f a b)" + "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" + "(a=1).()" => "(dotcall (parens (= a 1)))" + "f. (x)" => "(dotcall f (error-t) x)" + # Other dotted syntax + "A.:+" => "(. A (quote-: +))" + "A.:.+" => "(. A (quote-: (. +)))" + "A.: +" => "(. A (quote-: (error-t) +))" + "f.\$x" => "(. f (\$ x))" + "f.\$(x+y)" => "(. f (\$ (parens (call-i x + y))))" + "A.\$B.@x" => "(macrocall (. (. A (\$ B)) (macro_name x)))" + "@A.\$x a" => "(macrocall (macro_name (. A (error x))) a)" + "A.@x" => "(macrocall (. A (macro_name x)))" + "A.@x a" => "(macrocall (. A (macro_name x)) a)" + "@A.B.@x a" => "(macrocall (macro_name (. (. A B) (error-t) x)) a)" + # .' discontinued + "f.'" => "(dotcall-post f (error '))" + # Field/property syntax + "f.x.y" => "(. (. f x) y)" + "x .y" => "(. x (error-t) y)" + "x.?" => "(. x ?)" + "x.in" => "(. x in)" + # Adjoint + "f'" => "(call-post f ')" + "f'ᵀ" => "(call-post f 'ᵀ)" + # Curly calls + "S {a}" => "(curly S (error-t) a)" + "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))" + "@S{a,b}" => "(macrocall (macro_name S) (braces a b))" + "A.@S{a}" => "(macrocall (. A (macro_name S)) (braces a))" + "@S{a}.b" => "(. (macrocall (macro_name S) (braces a)) b)" + # Macro calls with chained operations + "@a[b][c]" => "(ref (macrocall (macro_name a) (vect b)) c)" + "@a{b}{c}" => "(curly (macrocall (macro_name a) (braces b)) c)" + "@a[b]{c}" => "(curly (macrocall (macro_name a) (vect b)) c)" + "@a{b}[c]" => "(ref (macrocall (macro_name a) (braces b)) c)" + "S{a,b}" => "(curly S a b)" + "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))" + # String macros + "x\"str\"" => """(macrocall @x_str (string-r "str"))""" + "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" + "x\"\"" => """(macrocall @x_str (string-r ""))""" + "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" + "in\"str\"" => """(macrocall @in_str (string-r "str"))""" + "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" + "A.x\"str\"" => """(macrocall (. A @x_str) (string-r "str"))""" + "A.x`str`" => """(macrocall (. A @x_cmd) (cmdstring-r "str"))""" + # Triple quoted processing for custom strings + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" + # Macro suffixes can include keywords and numbers + "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" + "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" + "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" + "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" + "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" + # Cmd macro suffixes + "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" + "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" + "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" + "x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)""" + "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)""" + ], + JuliaSyntax.parse_resword => [ + # In normal_context + "begin f() where T = x end" => "(block (function-= (where (call f) T) x))" + # block + "begin end" => "(block)" + "begin a ; b end" => "(block a b)" + "begin\na\nb\nend" => "(block a b)" + # quote + "quote end" => "(quote (block))" + "quote body end" => "(quote (block body))" + # while + "while cond body end" => "(while cond (block body))" + "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" + # for + "for x in xs end" => "(for (iteration (in x xs)) (block))" + "for x in xs, y in ys \n a \n end" => "(for (iteration (in x xs) (in y ys)) (block a))" + # let + "let x=1\n end" => "(let (block (= x 1)) (block))" + "let x=1 ; end" => "(let (block (= x 1)) (block))" + "let x ; end" => "(let (block x) (block))" + "let x::1 ; end" => "(let (block (::-i x 1)) (block))" + "let x=1,y=2 end" => "(let (block (= x 1) (= y 2)) (block))" + "let x+=1 ; end" => "(let (block (op= x + 1)) (block))" + "let ; end" => "(let (block) (block))" + "let ; body end" => "(let (block) (block body))" + "let\na\nb\nend" => "(let (block) (block a b))" + # abstract type + "abstract type A end" => "(abstract A)" + "abstract type A ; end" => "(abstract A)" + "abstract type \n\n A \n\n end" => "(abstract A)" + "abstract type A <: B end" => "(abstract (<: A B))" + "abstract type A <: B{T,S} end" => "(abstract (<: A (curly B T S)))" + "abstract type A < B end" => "(abstract (call-i A < B))" + # primitive type + "primitive type A 32 end" => "(primitive A 32)" + "primitive type A 32 ; end" => "(primitive A 32)" + "primitive type A \$N end" => "(primitive A (\$ N))" + "primitive type A <: B \n 8 \n end" => "(primitive (<: A B) 8)" + # struct + "struct A <: B \n a::X \n end" => "(struct (<: A B) (block (::-i a X)))" + "struct A \n a \n b \n end" => "(struct A (block a b))" + "struct A \n \"doca\" \n a \n \"docb\" \n b \n end" => "(struct A (block (doc (string \"doca\") a) (doc (string \"docb\") b)))" + "mutable struct A end" => "(struct-mut A (block))" + ((v=v"1.8",), "struct A const a end") => "(struct A (block (const a)))" + ((v=v"1.7",), "struct A const a end") => "(struct A (block (error (const a))))" + "struct A end" => "(struct A (block))" + "struct try end" => "(struct (error try) (block))" + # return + "return\nx" => "(return)" + "return)" => "(return)" + "return x" => "(return x)" + "return x,y" => "(return (tuple x y))" + # break/continue + "break" => "(break)" + "continue" => "(continue)" + # module/baremodule + "module A end" => "(module A (block))" + "baremodule A end" => "(module-bare A (block))" + "module do \n end" => "(module (error do) (block))" + "module \$A end" => "(module (\$ A) (block))" + "module A \n a \n b \n end" => "(module A (block a b))" + """module A \n "x"\na\n end""" => """(module A (block (doc (string "x") a)))""" + # export + "export a" => "(export a)" + "export @a" => "(export (macro_name a))" + "export @var\"'\"" => "(export (macro_name (var ')))" + "export a, \n @b" => "(export a (macro_name b))" + "export +, ==" => "(export + ==)" + "export \n a" => "(export a)" + "export \$a, \$(a*b)" => "(export (\$ a) (\$ (parens (call-i a * b))))" + "export (x::T)" => "(export (error (parens (::-i x T))))" + "export outer" => "(export outer)" + "export (\$f)" => "(export (parens (\$ f)))" + ], + JuliaSyntax.parse_if_elseif => [ + "if a xx elseif b yy else zz end" => "(if a (block xx) (elseif b (block yy) (block zz)))" + "if end" => "(if (error) (block))" + "if \n end" => "(if (error) (block))" + "if a end" => "(if a (block))" + "if a xx end" => "(if a (block xx))" + "if a \n\n xx \n\n end" => "(if a (block xx))" + "if a xx elseif b yy end" => "(if a (block xx) (elseif b (block yy)))" + "if a xx else if b yy end" => "(if a (block xx) (error-t) (elseif b (block yy)))" + "if a xx else yy end" => "(if a (block xx) (block yy))" + "if true; x ? true elseif true end" => "(if true (block (if x true (error-t) (error-t))) (elseif true (block)))" + "if true; x ? true end" => "(if true (block (if x true (error-t) (error-t))))" + "if true; x ? true\nend" => "(if true (block (if x true (error-t) (error-t))))" + "if true; x ? true : elseif true end" => "(if true (block (if x true (error-t))) (elseif true (block)))" + ], + JuliaSyntax.parse_resword => [ + "global x" => "(global x)" + "local x" => "(local x)" + "global x,y" => "(global x y)" + "global const x = 1" => "(global (const (= x 1)))" + "local const x = 1" => "(local (const (= x 1)))" + "const global x = 1" => "(const (global (= x 1)))" + "const local x = 1" => "(const (local (= x 1)))" + "const x,y = 1,2" => "(const (= (tuple x y) (tuple 1 2)))" + "const x = 1" => "(const (= x 1))" + "const x .= 1" => "(error (const (.= x 1)))" + "global x ~ 1" => "(global (call-i x ~ 1))" + "global x += 1" => "(global (op= x + 1))" + "const x" => "(error (const x))" + "global const x" => "(global (error (const x)))" + "const global x" => "(error (const (global x)))" + ], + JuliaSyntax.parse_resword => [ + # Macros and functions + "macro while(ex) end" => "(macro (call (error while) ex) (block))" + "macro f() end" => "(macro (call f) (block))" + "macro (:)(ex) end" => "(macro (call (parens :) ex) (block))" + "macro (type)(ex) end" => "(macro (call (parens type) ex) (block))" + "macro \$f() end" => "(macro (call (\$ f)) (block))" + "macro (\$f)() end" => "(macro (call (parens (\$ f))) (block))" + "function (x) body end"=> "(function (tuple-p x) (block body))" + "function (x,y) end" => "(function (tuple-p x y) (block))" + "function (x,y,) end" => "(function (tuple-p-, x y) (block))" + "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" + "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" + "function (f(x),) end" => "(function (tuple-p-, (call f x)) (block))" + "function (@f(x);) end" => "(function (tuple-p (macrocall-p (macro_name f) x) (parameters)) (block))" + "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p (macro_name f) x))) (block))" + "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p (macro_name f) x))) (block))" + "function (\$f) end" => "(function (error (tuple-p (\$ f))) (block))" + "function ()(x) end" => "(function (call (tuple-p) x) (block))" + "function (A).f() end" => "(function (call (. (parens A) f)) (block))" + "function (:)() end" => "(function (call (parens :)) (block))" + "function (x::T)() end"=> "(function (call (parens (::-i x T))) (block))" + "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" + "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" + "function (::T)() end" => "(function (call (parens (::-pre T))) (block))" + "function (:*=(f))() end" => "(function (call (parens (call (quote-: *=) f))) (block))" + "function begin() end" => "(function (call (error begin)) (block))" + "function f() end" => "(function (call f) (block))" + "function type() end" => "(function (call type) (block))" + "function \n f() end" => "(function (call f) (block))" + "function \$f() end" => "(function (call (\$ f)) (block))" + "function (::Type{T})(x) end" => "(function (call (parens (::-pre (curly Type T))) x) (block))" + # Function/macro definition with no methods + "function f end" => "(function f)" + "function f \n\n end" => "(function f)" + "function \$f end" => "(function (\$ f))" + "function var\".\" end" => "(function (var .))" + "macro f end" => "(macro f)" + # Function argument list + "function f(x,y) end" => "(function (call f x y) (block))" + "function f{T}() end" => "(function (call (curly f T)) (block))" + "function A.f() end" => "(function (call (. A f)) (block))" + "function f body end" => "(function (error f) (block body))" + "function f()::T end" => "(function (::-i (call f) T) (block))" + "function f()::g(T) end" => "(function (::-i (call f) (call g T)) (block))" + "function f() where {T} end" => "(function (where (call f) (braces T)) (block))" + "function f() where T end" => "(function (where (call f) T) (block))" + "function f()::S where T end" => "(function (where (::-i (call f) S) T) (block))" + # Ugly cases for compat where extra parentheses existed and we've + # already parsed at least the call part of the signature + "function (f() where T) end" => "(function (parens (where (call f) T)) (block))" + "function (f()) where T end" => "(function (where (parens (call f)) T) (block))" + "function (f() where T) where U end" => "(function (where (parens (where (call f) T)) U) (block))" + "function (f()::S) end"=> "(function (parens (::-i (call f) S)) (block))" + "function ((f()::S) where T) end" => "(function (parens (where (parens (::-i (call f) S)) T)) (block))" + "function (x*y ) end" => "(function (parens (call-i x * y)) (block))" + # body + "function f() \n a \n b end" => "(function (call f) (block a b))" + "function f() end" => "(function (call f) (block))" + # Macrocall as sig + ((v=v"1.12",), "function @callmemacro(a::Int) \n 1 \n end") => "(function (macrocall-p (macro_name callmemacro) (::-i a Int)) (block 1))" + ((v=v"1.12",), "function @callmemacro(a::T, b::T) where T <: Int64\n3\nend") => "(function (where (macrocall-p (macro_name callmemacro) (::-i a T) (::-i b T)) (<: T Int64)) (block 3))" + ((v=v"1.12",), "function @callmemacro(a::Int, b::Int, c::Int)::Float64\n4\nend") => "(function (::-i (macrocall-p (macro_name callmemacro) (::-i a Int) (::-i b Int) (::-i c Int)) Float64) (block 4))" + ((v=v"1.12",), "function @f()() end") => "(function (call (macrocall-p (macro_name f))) (block))" + # Errors + "function" => "(function (error (error)) (block (error)) (error-t))" + ], + JuliaSyntax.parse_try => [ + "try \n x \n catch e \n y \n finally \n z end" => + "(try (block x) (catch e (block y)) (finally (block z)))" + ((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") => + "(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))" + "try x catch end" => "(try (block x) (catch □ (block)))" + "try x catch ; y end" => "(try (block x) (catch □ (block y)))" + "try x catch \n y end" => "(try (block x) (catch □ (block y)))" + "try x catch e y end" => "(try (block x) (catch e (block y)))" + "try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))" + "try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))" + "try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))" + "try x finally y end" => "(try (block x) (finally (block y)))" + # v1.8 only + ((v=v"1.8",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (block)))" + ((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))" + ((v=v"1.7",), "try catch ; else end") => "(try (block) (catch □ (block)) (else (error (block))))" + # finally before catch :-( + "try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))" + "try x end" => "(try (block x) (error-t))" + ], + JuliaSyntax.parse_imports => [ + "import A as B: x" => "(import (: (error (as (importpath A) B)) (importpath x)))" + "import A, y" => "(import (importpath A) (importpath y))" + "import A: +, ==" => "(import (: (importpath A) (importpath +) (importpath ==)))" + "import A: x, y" => "(import (: (importpath A) (importpath x) (importpath y)))" + "import A: x, B: y" => "(import (: (importpath A) (importpath x) (importpath B) (error-t (importpath y))))" + "import A: x" => "(import (: (importpath A) (importpath x)))" + "using A" => "(using (importpath A))" + "import A" => "(import (importpath A))" + # parse_import + "import A: x, y" => "(import (: (importpath A) (importpath x) (importpath y)))" + "import A as B" => "(import (as (importpath A) B))" + "import A: x as y" => "(import (: (importpath A) (as (importpath x) y)))" + "using A: x as y" => "(using (: (importpath A) (as (importpath x) y)))" + ((v=v"1.5",), "import A as B") => "(import (error (as (importpath A) B)))" + "using A as B" => "(using (error (as (importpath A) B)))" + "using A, B as C" => "(using (importpath A) (error (as (importpath B) C)))" + # parse_import_path + # When parsing import we must split initial dots into nontrivial + # leading dots for relative paths + "import .A" => "(import (importpath . A))" + "import ..A" => "(import (importpath . . A))" + "import ...A" => "(import (importpath . . . A))" + "import ....A" => "(import (importpath . . . . A))" + # Dots with spaces are allowed (a misfeature?) + "import . .A" => "(import (importpath . . A))" + # Modules with operator symbol names + "import .⋆" => "(import (importpath . ⋆))" + # Expressions allowed in import paths + "import @x" => "(import (importpath (macro_name x)))" + "import \$A" => "(import (importpath (\$ A)))" + "import \$A.@x" => "(import (importpath (\$ A) (macro_name x)))" + "import A.B" => "(import (importpath A B))" + "import A.B.C" => "(import (importpath A B C))" + "import A.:+" => "(import (importpath A (quote-: +)))" + "import A.(:+)" => "(import (importpath A (parens (quote-: +))))" + "import A.:(+)" => "(import (importpath A (quote-: (parens +))))" + "import A.==" => "(import (importpath A ==))" + "import A.⋆.f" => "(import (importpath A ⋆ f))" + "import A..." => "(import (importpath A ..))" + "import A; B" => "(import (importpath A))" + # Colons not allowed first in import paths + # but are allowed in trailing components (#473) + "using :A" => "(using (importpath (error (quote-: A))))" + "using A: :b" => "(using (: (importpath A) (importpath (error (quote-: b)))))" + "using A: b.:c" => "(using (: (importpath A) (importpath b (quote-: c))))" + # Syntactic operators not allowed in import + ], + JuliaSyntax.parse_iteration_specs => [ + "i = rhs" => "(iteration (in i rhs))" + "i in rhs" => "(iteration (in i rhs))" + "i ∈ rhs" => "(iteration (in i rhs))" + "i = 1:10" => "(iteration (in i (call-i 1 : 10)))" + "(i,j) in iter" => "(iteration (in (tuple-p i j) iter))" + "outer = rhs" => "(iteration (in outer rhs))" + "outer <| x = rhs" => "(iteration (in (call-i outer <| x) rhs))" + "outer i = rhs" => "(iteration (in (outer i) rhs))" + "outer (x,y) = rhs" => "(iteration (in (outer (tuple-p x y)) rhs))" + ], + JuliaSyntax.parse_paren => [ + # Tuple syntax with commas + "()" => "(tuple-p)" + "(x,)" => "(tuple-p-, x)" + "(x,y)" => "(tuple-p x y)" + "(x=1, y=2)" => "(tuple-p (= x 1) (= y 2))" + # Named tuples with initial semicolon + "(;)" => "(tuple-p (parameters))" + "(; a=1)" => "(tuple-p (parameters (= a 1)))" + # Extra credit: nested parameters and frankentuples + "(x...; y)" => "(tuple-p (... x) (parameters y))" + "(x...;)" => "(tuple-p (... x) (parameters))" + "(; a=1; b=2)" => "(tuple-p (parameters (= a 1)) (parameters (= b 2)))" + "(a; b; c,d)" => "(tuple-p a (parameters b) (parameters c d))" + "(a=1, b=2; c=3)" => "(tuple-p (= a 1) (= b 2) (parameters (= c 3)))" + # Block syntax + "(;;)" => "(block-p)" + "(a=1;)" => "(block-p (= a 1))" + "(a;b;;c)" => "(block-p a b c)" + "(a=1; b=2)" => "(block-p (= a 1) (= b 2))" + # Following is an error for flisp compatibility. But it could be + # allowed as valid block syntax in the future? + "(y for x = xs; a)" => "(parens (generator y (iteration (in x xs))) (error-t ✘ a))" + # Parentheses used for grouping + "(a * b)" => "(parens (call-i a * b))" + "(a=1)" => "(parens (= a 1))" + "(x)" => "(parens x)" + "(a...)" => "(parens (... a))" + # Generators + "(x for a in as)" => "(parens (generator x (iteration (in a as))))" + "(x \n\n for a in as)" => "(parens (generator x (iteration (in a as))))" + # Range parsing in parens + "(1:\n2)" => "(parens (call-i 1 : 2))" + "(1:2)" => "(parens (call-i 1 : 2))" + ], + JuliaSyntax.parse_atom => [ + # char literal + "'a'" => "(char 'a')" + "'α'" => "(char 'α')" + "'\\xce\\xb1'" => "(char 'α')" + "'\\u03b1'" => "(char 'α')" + "'\\U1D7DA'" => "(char '𝟚')" + "'a" => "(char 'a' (error-t))" + "''" => "(char (error))" + "'" => "(char (error))" + # symbol/expression quote + ":foo" => "(quote-: foo)" + # Literal colons + ":)" => ":" + ": end" => ":" + # Whitespace after quoting colon + ": foo" => "(quote-: (error-t) foo)" + ":\nfoo" => "(quote-: (error-t) foo)" + # plain equals + "=" => "(error =)" + # Identifiers + "xx" => "xx" + "x₁" => "x₁" + # var syntax + """var"x" """ => "(var x)" + # var syntax raw string unescaping + "var\"\"" => "(var )" + "var\"\\\"\"" => "(var \")" + "var\"\\\\\\\"\"" => "(var \\\")" + "var\"\\\\x\"" => "(var \\\\x)" + # trailing syntax after var + """var"x"+""" => "(var x)" + """var"x")""" => "(var x)" + """var"x"(""" => "(var x)" + """var"x"end""" => "(var x (error-t))" + """var"x"1""" => "(var x (error-t))" + """var"x"y""" => "(var x (error-t))" + # Standalone syntactic operators are errors + "?" => "(error ?)" + "&&" => "(error &&)" + "||" => "(error ||)" + "." => "(error .)" + "..." => "(error ...)" + "+=" => "(error +=)" + "-=" => "(error -=)" + "*=" => "(error *=)" + "/=" => "(error /=)" + "//=" => "(error //=)" + "|=" => "(error |=)" + "^=" => "(error ^=)" + "÷=" => "(error ÷=)" + "%=" => "(error %=)" + "<<=" => "(error <<=)" + ">>=" => "(error >>=)" + ">>>="=> "(error >>>=)" + "\\=" => "(error \\=)" + "&=" => "(error &=)" + ":=" => "(error :=)" + "\$=" => "(error \$=)" + "⊻=" => "(error ⊻=)" + ".+=" => "(error (. +=))" + # Normal operators + "+" => "+" + # Assignment-precedence operators which can be used as identifiers + "~" => "~" + "≔" => "≔" + "⩴" => "⩴" + "≕" => "≕" + # Quoted syntactic operators allowed + ":+=" => "(quote-: +=)" + ":.+=" => "(quote-: (. +=))" + ":.=" => "(quote-: (. =))" + ":.&&" => "(quote-: (. &&))" + # Special symbols quoted + ":end" => "(quote-: end)" + ":(end)" => "(quote-: (parens (error-t)))" + ":<:" => "(quote-: <:)" + # unexpected = + "=" => "(error =)" + # parse_cat + "[]" => "(vect)" + "[x,]" => "(vect-, x)" + "[x,y,]" => "(vect-, x y)" + "[x\n,,]" => "(vect-, x (error-t ✘))" + "[x]" => "(vect x)" + "[x \n ]" => "(vect x)" + "[x \n, ]" => "(vect-, x)" + "[x" => "(vect x (error-t))" + "[x \n\n ]" => "(vect x)" + "[x for a in as]" => "(comprehension (generator x (iteration (in a as))))" + "[x \n\n for a in as]" => "(comprehension (generator x (iteration (in a as))))" + # parse_generator + "(x for a in as for b in bs)" => "(parens (generator x (iteration (in a as)) (iteration (in b bs))))" + "(x for a in as, b in bs)" => "(parens (generator x (iteration (in a as) (in b bs))))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration (in a as) (in b bs)) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration (in a as) (in b bs)) (iteration (in c cs) (in d ds))))" + "(x for a in as for b in bs if z)" => "(parens (generator x (iteration (in a as)) (filter (iteration (in b bs)) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration (in a as)) z) (iteration (in b bs))))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (iteration (in a as)) (filter (iteration (in b bs)) cond1) (filter (iteration (in c cs)) cond2)))" + "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (iteration (in a as)) (block cond2))))" + "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (iteration (in x xs))))" + "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))" + # parse_vect + "[x, y]" => "(vect x y)" + "[x, y,]" => "(vect-, x y)" + "[x,\n y]" => "(vect x y)" + "[x\n, y]" => "(vect x y)" + "[x\n,, y]" => "(vect-, x (error-t ✘ y))" + "[x,y ; z]" => "(vect x y (parameters z))" + "[x=1, y=2]" => "(vect (= x 1) (= y 2))" + "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" + # parse_paren + ":(=)" => "(quote-: (parens =))" + ":(::)" => "(quote-: (parens ::))" + ":(::\n)" => "(quote-: (parens ::))" + "(function f \n end)" => "(parens (function f))" + # braces + "{x,y}" => "(braces x y)" + "{x,y,}" => "(braces-, x y)" + "{x y}" => "(bracescat (row x y))" + ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" + ((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))" + ((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))" + # Macro names can be keywords + "@end x" => "(macrocall (macro_name end) x)" + # __dot__ macro + "@. x" => "(macrocall (macro_name .) x)" + # cmd strings + "``" => "(cmdstring-r \"\")" + "`cmd`" => "(cmdstring-r \"cmd\")" + "```cmd```" => "(cmdstring-s-r \"cmd\")" + # literals + "true" => "true" + "42" => "42" + "1.0e-1000" => "0.0" + "0x123456789abcdefp+0" => "8.19855292164869e16" + # closing tokens + ")" => "(error)" + ], + JuliaSyntax.parse_atom => [ + # Actually parse_array + # Normal matrix construction syntax + "[x y ; z w]" => "(vcat (row x y) (row z w))" + "[x y ; z w ; a b]" => "(vcat (row x y) (row z w) (row a b))" + "[x ; y ; z]" => "(vcat x y z)" + "[x;]" => "(vcat x)" + "[x y]" => "(hcat x y)" + # Early abort in array parsing + "[x@y" => "(hcat x (error-t ✘ y))" + "[x@y]" => "(hcat x (error-t ✘ y))" + # Mismatched rows + "[x y ; z]" => "(vcat (row x y) z)" + # Single elements in rows + ((v=v"1.7",), "[x ; y ;; z ]") => "(ncat-2 (nrow-1 x y) z)" + ((v=v"1.7",), "[x y ;;; z ]") => "(ncat-3 (row x y) z)" + # Higher dimensional ncat + # Row major + ((v=v"1.7",), "[x y ; z w ;;; a b ; c d]") => + "(ncat-3 (nrow-1 (row x y) (row z w)) (nrow-1 (row a b) (row c d)))" + # Column major + ((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") => + "(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))" + # Dimension 4 ncat + ((v=v"1.7",), "[x ;;;; y]") => "(ncat-4 x y)" + ((v=v"1.7",), "[a ; b ;;;; c ; d]") => "(ncat-4 (nrow-1 a b) (nrow-1 c d))" + ((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]") => + "(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))" + # Array separators + # Newlines before semicolons are not significant + "[a \n ;]" => "(vcat a)" + # Newlines after semicolons are not significant + "[a ; \n]" => "(vcat a)" + "[a ; \n\n b]" => "(vcat a b)" + ((v=v"1.7",), "[a ;; \n b]") => "(ncat-2 a b)" + # In hcat with spaces as separators, `;;` is a line + # continuation character + ((v=v"1.7",), "[a b ;; \n c]") => "(hcat a b c)" + ((v=v"1.7",), "[a b \n ;; c]") => "(ncat-2 (row a b (error-t)) c)" + # Can't mix spaces and multiple ;'s + ((v=v"1.7",), "[a b ;; c]") => "(ncat-2 (row a b (error-t)) c)" + # Linebreaks not significant before closing `]` + "[a b\n\n]" => "(hcat a b)" + # Treat a linebreak prior to a value as a semicolon (ie, separator for + # the first dimension) if no previous semicolons observed + "[a \n b]" => "(vcat a b)" + # Can't mix multiple ;'s and spaces + ((v=v"1.7",), "[a ;; b c]") => "(ncat-2 a (row b (error-t) c))" + # Empty N-dimensional arrays + ((v=v"1.8",), "[;]") => "(ncat-1)" + ((v=v"1.8",), "[;;]") => "(ncat-2)" + ((v=v"1.8",), "[\n ;; \n ]") => "(ncat-2)" + ((v=v"1.7",), "[;;]") => "(ncat-2 (error))" + # parse_string + "\"\"\"\n\$x\n a\"\"\"" => "(string-s x \"\\n\" \" a\")" + "\"a \$(x + y) b\"" => "(string \"a \" (parens (call-i x + y)) \" b\")" + "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" + "\"\$(x,y)\"" => "(string (parens (error x y)))" + "\"\$(x;y)\"" => "(string (parens (error x y)))" + "\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration (in y z))))))" + "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration (in y z))))))" + "\"\$(xs...)\"" => "(string (parens (... xs)))" + "\"a \$foo b\"" => "(string \"a \" foo \" b\")" + "\"\$var\"" => "(string var)" + "\"\$outer\"" => "(string outer)" + "\"\$in\"" => "(string in)" + # Triple-quoted dedenting: + "\"\"\"\nx\"\"\"" => raw"""(string-s "x")""" + "\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")""" + "```\n x\n y```" => raw"""(cmdstring-s-r "x\n" "y")""" + # Various newlines (\n \r \r\n) and whitespace (' ' \t) + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\r\n x\r\n y\"\"\"" => raw"""(string-s "x\n" "y")""" + # Spaces or tabs or mixtures acceptable + "\"\"\"\n\tx\n\ty\"\"\"" => raw"""(string-s "x\n" "y")""" + "\"\"\"\n \tx\n \ty\"\"\"" => raw"""(string-s "x\n" "y")""" + # Mismatched tab vs space not deindented + # Find minimum common prefix in mismatched whitespace + "\"\"\"\n\tx\n y\"\"\"" => raw"""(string-s "\tx\n" " y")""" + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" " y")""" + "\"\"\"\n x\n y\"\"\"" => raw"""(string-s " x\n" "y")""" + "\"\"\"\n \tx\n y\"\"\"" => raw"""(string-s "\tx\n" " y")""" + "\"\"\"\n x\n \ty\"\"\"" => raw"""(string-s " x\n" "\ty")""" + # Empty lines don't affect dedenting + "\"\"\"\n x\n\n y\"\"\"" => raw"""(string-s "x\n" "\n" "y")""" + # Non-empty first line doesn't participate in deindentation + "\"\"\" x\n y\"\"\"" => raw"""(string-s " x\n" "y")""" + # Dedenting and interpolations + "\"\"\"\n \$a\n \$b\"\"\"" => raw"""(string-s a "\n" b)""" + "\"\"\"\n \$a \n \$b\"\"\"" => raw"""(string-s a " \n" b)""" + "\"\"\"\n \$a\n \$b\n\"\"\"" => raw"""(string-s " " a "\n" " " b "\n")""" + # Empty chunks after dedent are removed + "\"\"\"\n \n \"\"\"" => "(string-s \"\\n\")" + # Newline at end of string + "\"\"\"\n x\n y\n\"\"\"" => raw"""(string-s " x\n" " y\n")""" + # Empty strings, or empty after triple quoted processing + "\"\"" => "(string \"\")" + "\"\"\"\n \"\"\"" => "(string-s \"\")" + # Missing delimiter + "\"str" => "(string \"str\" (error-t))" + # String interpolations + "\"\$x\$y\$z\"" => "(string x y z)" + "\"\$(x)\"" => "(string (parens x))" + "\"\$x\"" => "(string x)" + # Strings with embedded whitespace trivia + "\"a\\\nb\"" => raw"""(string "a" "b")""" + "\"a\\\rb\"" => raw"""(string "a" "b")""" + "\"a\\\r\nb\"" => raw"""(string "a" "b")""" + "\"a\\\n \tb\"" => raw"""(string "a" "b")""" + # Strings with only a single valid string chunk + "\"str\"" => "(string \"str\")" + "\"a\\\n\"" => "(string \"a\")" + "\"a\\\r\"" => "(string \"a\")" + "\"a\\\r\n\"" => "(string \"a\")" + ], + JuliaSyntax.parse_atom => [ + # errors in literals + "\"\\xqqq\"" => "(string (ErrorInvalidEscapeSequence))" + "'\\xq'" => "(char (ErrorInvalidEscapeSequence))" + "'ab'" => "(char (ErrorOverLongCharacter))" + "\"\xf5\"" => "(string (ErrorInvalidUTF8))" + "'\xf5'" => "(char (ErrorInvalidUTF8))" + "`\xf5`" => "(cmdstring-r (ErrorInvalidUTF8))" + "10.0e1000'" => "(ErrorNumericOverflow)" + "10.0f100'" => "(ErrorNumericOverflow)" + ], + JuliaSyntax.parse_stmts => with_version.(v"1.11", [ + "function f(public)\n public + 3\nend" => "(function (call f public) (block (call-i public + 3)))" + "public A, B" => "(public A B)" + "if true \n public *= 4 \n end" => "(if true (block (op= public * 4)))" + "module Mod\n public A, B \n end" => "(module Mod (block (public A B)))" + "module Mod2\n a = 3; b = 6; public a, b\n end" => "(module Mod2 (block (= a 3) (= b 6) (public a b)))" + "a = 3; b = 6; public a, b" => "(toplevel-; (= a 3) (= b 6) (public a b))" + "begin \n public A, B \n end" => PARSE_ERROR + "if true \n public A, B \n end" => PARSE_ERROR + "public export=true foo, bar" => PARSE_ERROR # but these may be + "public experimental=true foo, bar" => PARSE_ERROR # supported soon ;) + "public(x::String) = false" => "(function-= (call public (::-i x String)) false)" + "module M; export @a; end" => "(module M (block (export (macro_name a))))" + "module M; public @a; end" => "(module M (block (public (macro_name a))))" + "module M; export ⤈; end" => "(module M (block (export ⤈)))" + "module M; public ⤈; end" => "(module M (block (public ⤈)))" + "public = 4" => "(= public 4)" + "public[7] = 5" => "(= (ref public 7) 5)" + "public() = 6" => "(function-= (call public) 6)" + ]), + JuliaSyntax.parse_stmts => [ + ((v = v"1.12",), "@callmemacro(b::Float64) = 2") => "(= (macrocall-p (macro_name callmemacro) (::-i b Float64)) 2)" + ], + JuliaSyntax.parse_docstring => [ + """ "notdoc" ] """ => "(string \"notdoc\")" + """ "notdoc" \n] """ => "(string \"notdoc\")" + """ "notdoc" \n\n foo """ => "(string \"notdoc\")" + """ "doc" \n foo """ => """(doc (string "doc") foo)""" + """ "doc" foo """ => """(doc (string "doc") foo)""" + """ "doc \$x" foo """ => """(doc (string "doc " x) foo)""" + # Allow docstrings with embedded trailing whitespace trivia + "\"\"\"\n doc\n \"\"\" foo" => """(doc (string-s "doc\\n") foo)""" + ], +] + +@testset "Inline test cases" begin + @testset "$production" for (production, test_specs) in tests + @testset "$(repr(input))" for (input, output) in test_specs + test_parse(production, input, output) + end + end +end + +parsestmt_test_specs = [ + # whitespace before keywords in space-insensitive mode + "(y::\nif x z end)" => "(parens (::-i y (if x (block z))))" + # Contextual keyword pairs inside parentheses + "(abstract type X end)" => "(parens (abstract X))" + "(mutable struct X end)" => "(parens (struct-mut X (block)))" + # parsing of tricky primes + "x in'c'" => "(call-i x in (char 'c'))" + "1where'c'" => "(where 1 (char 'c'))" + ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" + # unary subtype ops and newlines + "a +\n\n<:" => "(call-i a + <:)" + "for\n\n<:" => "(for (iteration (in <: (error (error-t)))) (block (error)) (error-t))" + # Empty character consumes trailing ' delimiter (ideally this could be + # tested above but we don't require the input stream to be consumed in the + # unit tests there. + "''" => "(char (error))" + + # The following may not be ideal error recovery! But at least the parser + # shouldn't crash + "@(x y)" => "(macrocall (macro_name (parens x (error-t y))))" + "|(&\nfunction" => "(call | (& (function (error (error)) (block (error)) (error-t))) (error-t))" + "@(" => "(macrocall (macro_name (parens (error-t))))" + "x = @(" => "(= x (macrocall (macro_name (parens (error-t)))))" + "function(where" => "(function (tuple-p where (error-t)) (block (error)) (error-t))" + # Contextual keyword pairs must not be separated by newlines even within parens + "(abstract\ntype X end)" => "(wrapper (parens abstract (error-t type X)) (error-t end ✘))" + "(mutable\nstruct X end)" => "(wrapper (parens mutable (error-t struct X)) (error-t end ✘))" + + # Lexer vs parser: issues detecting which tokens are string delimiters and + # detecting raw vs non-raw strings. The old parser was tightly coupled to + # the lexer and the parser state was used to disambiguate these cases. + "x in' '" => "(call-i x in (char (error)))" + "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))" + "var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))" + "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))" + + # trailing junk in generators (issue #407) + "(x for x = xs a)" => "(parens (generator x (iteration (in x xs))) (error-t a))" + "(x for x = xs a, b)" => "(parens (generator x (iteration (in x xs))) (error-t a ✘ b))" + "f(x for x = xs a)" => "(call f (generator x (iteration (in x xs))) (error-t a))" +] + +@testset "Parser does not crash on broken code" begin + @testset "$(repr(input))" for (input, output) in parsestmt_test_specs + test_parse(JuliaSyntax.parse_stmts, input, output) + end +end + +parsestmt_with_kind_tests = [ + # Most operators are semantically just normal identifiers after parsing so + # get the Kind K"Identifier" + "+" => "+::Identifier" + "a + b" => "(call-i a::Identifier +::Identifier b::Identifier)" + "a .+ b" => "(dotcall-i a::Identifier +::Identifier b::Identifier)" + "a |> b" => "(call-i a::Identifier |>::Identifier b::Identifier)" + "a => b" => "(call-i a::Identifier =>::Identifier b::Identifier)" + "a → b" => "(call-i a::Identifier →::Identifier b::Identifier)" + "a < b < c" => "(comparison a::Identifier <::Identifier b::Identifier <::Identifier c::Identifier)" + "a .<: b"=> "(dotcall-i a::Identifier <:::Identifier b::Identifier)" + "a .. b" => "(call-i a::Identifier ..::Identifier b::Identifier)" + "a : b" => "(call-i a::Identifier :::Identifier b::Identifier)" + "-2^x" => "(call-pre -::Identifier (call-i 2::Integer ^::Identifier x::Identifier))" + "-(2)" => "(call-pre -::Identifier (parens 2::Integer))" + "<:(a,)" => "(<:-, a::Identifier)" + "- 2" => "(call-pre -::Identifier 2::Integer)" + "/x" => "(call-pre (error /::Identifier) x::Identifier)" + "a^b" => "(call-i a::Identifier ^::Identifier b::Identifier)" + "f.'" => "(dotcall-post f::Identifier (error '::Identifier))" + "f'" => "(call-post f::Identifier '::Identifier)" + # Standalone syntactic ops which keep their kind - they can't really be + # used in a sane way as identifiers or interpolated into expressions + # because they have their own syntactic forms. + ":(::)" => "(quote-: (parens ::::::))" + ":(\$)" => "(quote-: (parens \$::\$))" + ":(<:)" => "(quote-: (parens <:::<:))" + ":(&&)" => "(quote-: (parens &&::&&))" + ":(=)" => "(quote-: (parens =::=))" + "a := b" => "(:= a::Identifier b::Identifier)" + "a += b" => "(op= a::Identifier +::Identifier b::Identifier)" + "a .+= b" => "(.op= a::Identifier +::Identifier b::Identifier)" + "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)" + ":+=" => "(quote-: +=::op=)" + ":.+=" => "(quote-: (. +=::op=))" + # str/cmd macro name kinds + "x\"str\"" => """(macrocall x::StrMacroName (string-r "str"::String))""" + "x`str`" => """(macrocall x::CmdMacroName (cmdstring-r "str"::CmdString))""" +] + +@testset "parser `Kind` remapping" begin + @testset "$(repr(input))" for (input, output) in parsestmt_with_kind_tests + input = ((show_kind=true,), input) + test_parse(JuliaSyntax.parse_stmts, input, output) + end +end + +@testset "Trivia attachment" begin + # TODO: Need to expand this greatly to cover as many forms as possible! + @test show_green_tree("f(a;b)") == """ + 1:6 │[toplevel] + 1:6 │ [call] + 1:1 │ Identifier ✔ "f" + 2:2 │ ( "(" + 3:3 │ Identifier ✔ "a" + 4:5 │ [parameters] + 4:4 │ ; ";" + 5:5 │ Identifier ✔ "b" + 6:6 │ ) ")" + """ +end + +@testset "Unicode normalization in tree conversion" begin + # ɛµ normalizes to εμ + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" + # · and · normalize to ⋅ + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)" + # − ('\u2212') normalizes to - ('\u002d') + @test parse_to_sexpr_str(JuliaSyntax.parse_expr, "a \u2212 b") == "(call-i a - b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u2212= b") == "(op= a - b)" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a .\u2212= b") == "(.op= a - b)" +end + +@testset "Unbalanced bidirectional unicode" begin + # https://trojansource.codes + @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """ + function checkUserAccess(u::User) + if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066" + return true + end + return false + end + """) + + @test_throws JuliaSyntax.ParseError parsestmt(GreenNode, """ + function checkUserAccess(u::User) + #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =# + return true + #= end admin only \u202e \u2066end\u2069 \u2066=# + return false + end + """) +end diff --git a/JuliaSyntax/test/parser_api.jl b/JuliaSyntax/test/parser_api.jl new file mode 100644 index 0000000000000..10a09d3ace585 --- /dev/null +++ b/JuliaSyntax/test/parser_api.jl @@ -0,0 +1,237 @@ +@testset "parser API" begin + @testset "parse with String input" begin + @test parsestmt(Expr, " x ") == :x + @test JuliaSyntax.remove_linenums!(parseall(Expr, " x ")) == Expr(:toplevel, :x) + @test parseatom(Expr, " x ") == :x + @test parseatom(Expr, "(x)") == :x + + # SubString + @test parsestmt(Expr, SubString("x+y")) == :(x+y) + @test parsestmt(Expr, SubString("α+x")) == :(α+x) + @test parseatom(Expr, SubString("x+y",3,3)) == :y + + # Exceptions due to extra trailing syntax + @test_throws JuliaSyntax.ParseError parseatom(Expr, "x+y") + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "x+y\nz") + + # ignore_warnings flag + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "import . .A") + @test parsestmt(Expr, "import . .A", ignore_warnings=true) == :(import ..A) + + # version selection + @test_throws JuliaSyntax.ParseError parsestmt(Expr, "[a ;; b]", version=v"1.6") + @test parsestmt(Expr, "[a ;; b]", version=v"1.7") == Expr(:ncat, 2, :a, :b) + + # filename + @test parsestmt(Expr, "begin\na\nend", filename="foo.jl", first_line=55) == + Expr(:block, LineNumberNode(56, Symbol("foo.jl")), :a) + + # ignore_trivia + @test parseatom(Expr, " x ", ignore_trivia=true) == :x + @test_throws JuliaSyntax.ParseError parseatom(Expr, " x ", ignore_trivia=false) + + # Top level parsing + @test parseall(Expr, "a\nb") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) + @test parseall(Expr, "a\nb #==#") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) + @test parseall(Expr, "#==#\na\nb") == + Expr(:toplevel, LineNumberNode(2), :a, LineNumberNode(3), :b) + @test parseall(Expr, "a\nb\n#==#") == + Expr(:toplevel, LineNumberNode(1), :a, LineNumberNode(2), :b) + end + + @testset "IO input" begin + # IOBuffer + io = IOBuffer("x+y") + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + io = IOBuffer("x+y") + seek(io, 2) + @test parse!(Expr, io, rule=:atom) == (:y, []) + @test position(io) == 3 + # A GenericIOBuffer, not actually IOBuffer + io = IOBuffer(SubString("x+y")) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + # Another type of GenericIOBuffer + io = IOBuffer(codeunits("x+y")) + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + # IOStream + mktemp() do path, io + write(io, "x+y") + close(io) + + open(path, "r") do io + @test parse!(Expr, io, rule=:statement) == (:(x+y), []) + @test position(io) == 3 + end + end + end + + @testset "parse with String and index input" begin + # String + let + ex,pos = parseall(Expr, "x+y\nz", 1) + @test JuliaSyntax.remove_linenums!(ex) == Expr(:toplevel, :(x+y), :z) + @test pos == 6 + end + @test parsestmt(Expr, "x+y\nz", 1) == (:(x+y), 4) + @test parseatom(Expr, "x+y\nz", 1) == (:x, 2) + @test parseatom(Expr, "x+y\nz", 5) == (:z, 6) + + # SubString + @test parsestmt(Expr, SubString("α+x\ny"), 1) == (:(α+x), 5) + @test parseatom(Expr, SubString("x+y"), 1) == (:x, 2) + @test parseatom(Expr, SubString("x+y"), 3) == (:y, 4) + + @test parseatom(Expr, SubString("x+1.0"), 3) == (1.0, 6) + @test parseatom(Expr, SubString("x+\"\n\""), 3) == ("\n", 6) + + # Line numbers are relative to the start of the string we're currently + # parsing + @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 1) == + (Expr(:block, LineNumberNode(2), :a), 12) + @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 12) == + (Expr(:block, LineNumberNode(3), :b), 24) + end + + @testset "error/warning handling" begin + parseshow(s;kws...) = sprint(show, MIME("text/x.sexpression"), parsestmt(SyntaxNode, s; kws...)) + @test_throws JuliaSyntax.ParseError parseshow("try finally catch ex end") + @test parseshow("try finally catch ex end", ignore_warnings=true) == + "(try (block) (finally (block)) (catch ex (block)))" + # ignore_errors + @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]") + @test_throws JuliaSyntax.ParseError parseshow("[a; b, c]", ignore_warnings=true) + @test parseshow("[a; b, c]", ignore_errors=true) == "(vcat a b (error-t) c)" + # errors in literals + @test parseshow("\"\\z\"", ignore_errors=true) == "(string (ErrorInvalidEscapeSequence))" + @test parseshow("'\\z'", ignore_errors=true) == "(char (ErrorInvalidEscapeSequence))" + @test parseshow("'abc'", ignore_errors=true) == "(char (ErrorOverLongCharacter))" + @test parseshow("1e1000", ignore_errors=true) == "(ErrorNumericOverflow)" + @test parseshow("1f1000", ignore_errors=true) == "(ErrorNumericOverflow)" + end +end + +@testset "ParseError printing" begin + try + parsestmt(SyntaxNode, "a -- b -- c", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: + # Error @ somefile.jl:1:3 + a -- b -- c + # └┘ ── invalid operator""" + @test occursin("Stacktrace:\n", sprint(showerror, exc, catch_backtrace())) + file_url = JuliaSyntax._file_url("somefile.jl") + @test sprint(showerror, exc, context=:color=>true) == """ + ParseError: + \e[90m# Error @ \e[0;0m\e]8;;$file_url#1:3\e\\\e[90msomefile.jl:1:3\e[0;0m\e]8;;\e\\ + a \e[48;2;120;70;70m--\e[0;0m b -- c + \e[90m# └┘ ── \e[0;0m\e[91minvalid operator\e[0;0m""" + end + + try + # Test that warnings are printed first followed by only the first error + parsestmt(SyntaxNode, """ + @(a) + x -- y + z -- y""", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: + # Warning @ somefile.jl:1:2 + @(a) + #└─┘ ── parenthesizing macro names is unnecessary + # Error @ somefile.jl:2:1 + @(a) + x + ╙ ── unexpected text after parsing statement""" + end + + try + # Test that initial warnings are always printed + parsestmt(SyntaxNode, """ + @(a)""", filename="somefile.jl") + @assert false "error should be thrown" + catch exc + @test exc isa JuliaSyntax.ParseError + @test sprint(showerror, exc) == """ + ParseError: some warnings detected: + # Warning @ somefile.jl:1:2 + @(a) + #└─┘ ── parenthesizing macro names is unnecessary""" + end +end + +tokensplit(str; kws...) = [kind(tok) => untokenize(tok, str) for tok in tokenize(str; kws...)] + +@testset "tokenize() API" begin + # tokenize() is eager + @test tokenize("aba") isa Vector{JuliaSyntax.Token} + + # . is a separate token from + in `.+` + @test tokensplit("a .+ β") == [ + K"Identifier" => "a", + K"Whitespace" => " ", + K"." => ".", + K"Identifier" => "+", + K"Whitespace" => " ", + K"Identifier" => "β", + ] + + # + is kind K"+" when operators in identifier position are emitted as + # operator kinds. + @test tokensplit("a .+ β"; operators_as_identifiers=false) == [ + K"Identifier" => "a", + K"Whitespace" => " ", + K"." => ".", + K"+" => "+", + K"Whitespace" => " ", + K"Identifier" => "β", + ] + + # Contextual keywords become identifiers where necessary + @test tokensplit("outer = 1") == [ + K"Identifier" => "outer", + K"Whitespace" => " ", + K"=" => "=", + K"Whitespace" => " ", + K"Integer" => "1", + ] + # Including word operators + @test tokensplit("where = 1"; operators_as_identifiers=false) == [ + K"Identifier" => "where", + K"Whitespace" => " ", + K"=" => "=", + K"Whitespace" => " ", + K"Integer" => "1", + ] + + # A predicate based on flags() + @test JuliaSyntax.is_suffixed(tokenize("+₁")[1]) + + # Buffer interface + @test tokenize(Vector{UInt8}("a + b")) == tokenize("a + b") + + buf = Vector{UInt8}("a-β") + @test untokenize.(tokenize(buf), Ref(buf,)) == [ + Vector{UInt8}("a"), + Vector{UInt8}("-"), + Vector{UInt8}("β") + ] + + @test kind(JuliaSyntax.Token()) == K"None" + + @test tokensplit("'\\") == [ + K"'" => "'", + K"ErrorInvalidEscapeSequence" => "\\", + K"error" => "" + ] +end diff --git a/JuliaSyntax/test/runtests.jl b/JuliaSyntax/test/runtests.jl new file mode 100644 index 0000000000000..644f073124982 --- /dev/null +++ b/JuliaSyntax/test/runtests.jl @@ -0,0 +1,38 @@ +if !(@isdefined JuliaSyntax) + using JuliaSyntax +end + +using Test + +include("test_utils.jl") +include("test_utils_tests.jl") +include("fuzz_test.jl") + +include("utils.jl") +include("kinds.jl") + +@testset "Tokenize" begin + include("tokenize.jl") +end + +include("parse_stream.jl") +include("parser.jl") +include("green_node.jl") +include("syntax_tree.jl") +include("diagnostics.jl") +include("parser_api.jl") +include("expr.jl") +@testset "Parsing literals from strings" begin + include("literal_parsing.jl") +end +include("source_files.jl") + +if VERSION >= v"1.6" + # Tests restricted to 1.6+ due to + # * Core._parse hook doesn't exist on v1.5 and lower + # * Reference parser bugs which would need workarounds for package parse comparisons + include("hooks.jl") + include("parse_packages.jl") +end + +include("serialization.jl") diff --git a/JuliaSyntax/test/runtests_vendored.jl b/JuliaSyntax/test/runtests_vendored.jl new file mode 100644 index 0000000000000..52980e4917dcf --- /dev/null +++ b/JuliaSyntax/test/runtests_vendored.jl @@ -0,0 +1,4 @@ +# Test copy of JuliaSyntax vendored into Base +using Base.JuliaSyntax: JuliaSyntax + +include("runtests.jl") diff --git a/JuliaSyntax/test/serialization.jl b/JuliaSyntax/test/serialization.jl new file mode 100644 index 0000000000000..abdc5fa61e72f --- /dev/null +++ b/JuliaSyntax/test/serialization.jl @@ -0,0 +1,29 @@ +using Serialization + +@testset "Equality $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + y = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + z = JuliaSyntax.parsestmt(T, "f(x) = 2 + x") + @test x == y + @test x != z + @test y != z +end + +@testset "Hashing $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + y = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + z = hash(JuliaSyntax.parsestmt(T, "f(x) = 2 + x"))::UInt + @test x == y # Correctness + @test x != z # Collision + @test y != z # Collision +end + +@testset "Serialization $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x ⋅ 2") + f = tempname() + open(f, "w") do io + serialize(io, x) + end + y = open(deserialize, f, "r") + @test x == y +end diff --git a/JuliaSyntax/test/source_files.jl b/JuliaSyntax/test/source_files.jl new file mode 100644 index 0000000000000..d518124f1e6e1 --- /dev/null +++ b/JuliaSyntax/test/source_files.jl @@ -0,0 +1,230 @@ +@testset "SourceFile lines and column indexing" begin + @test source_location(SourceFile("a"), 1) == (1,1) + @test source_location(SourceFile("a"), 2) == (1,2) + + @test source_location(SourceFile("a\n"), 2) == (1,2) + @test source_location(SourceFile("a\n"), 3) == (2,1) + + @test source_location(SourceFile("a\nb\n"), 2) == (1,2) + @test source_location(SourceFile("a\nb\n"), 3) == (2,1) + @test source_location(SourceFile("a\nb\n"), 4) == (2,2) + @test source_location(SourceFile("a\nb\n"), 5) == (3,1) + + @test source_location(SourceFile("\n\n"), 1) == (1,1) + @test source_location(SourceFile("\n\n"), 2) == (2,1) + @test source_location(SourceFile("\n\n"), 3) == (3,1) + + @test source_location(SourceFile("a"; first_line=7), 1) == (7,1) + @test source_location(SourceFile("a"; first_line=7), 2) == (7,2) + + @test source_location(SourceFile("a\n"; first_line=7), 2) == (7,2) + @test source_location(SourceFile("a\n"; first_line=7), 3) == (8,1) + + @test source_location(SourceFile("a\nb\n"; first_line=7), 2) == (7,2) + @test source_location(SourceFile("a\nb\n"; first_line=7), 3) == (8,1) + @test source_location(SourceFile("a\nb\n"; first_line=7), 4) == (8,2) + @test source_location(SourceFile("a\nb\n"; first_line=7), 5) == (9,1) + + + mktemp() do path, io + write(io, "a\n") + @test source_location(SourceFile(; filename=path), 1) == (1,1) + @test source_location(SourceFile(; filename=path, first_line=7), 1) == (7,1) + end + + # byte offset + sf = SourceFile("a\nbb\nccc\ndddd", first_index=10) + @test source_location(sf, 13) == (2,2) + @test source_line(sf, 15) == 3 + @test source_line_range(sf, 10) == (10,11) + @test source_line_range(sf, 11) == (10,11) + @test source_line_range(sf, 12) == (12,14) + @test source_line_range(sf, 14) == (12,14) + @test source_line_range(sf, 15) == (15,18) + + # source_line convenience function + @test source_line(SourceFile("a\nb\n"), 2) == 1 + @test source_line(SourceFile("a\nb\n"), 3) == 2 +end + +@testset "SourceFile position indexing" begin + @test SourceFile("a\nb\n")[1:2] == "a\n" + @test SourceFile("a\nb\n")[3:end] == "b\n" + + # unicode + @test SourceFile("αβ")[1:2] == "α" + @test SourceFile("αβ")[3] == 'β' + + # offsets + sf = SourceFile("abcd", first_index=10) + @test firstindex(sf) == 10 + @test lastindex(sf) == 13 + @test sf[10] == 'a' + @test sf[10:11] == "ab" + @test view(sf, 10:11) == "ab" + + @test thisind(SourceFile("xαx", first_index=10), 10) == 10 + @test thisind(SourceFile("xαx", first_index=10), 11) == 11 + @test thisind(SourceFile("xαx", first_index=10), 12) == 11 + @test thisind(SourceFile("xαx", first_index=10), 13) == 13 + + if Base.VERSION >= v"1.4" + # Protect the `[begin` from being viewed by the parser on older Julia versions + @test eval(Meta.parse("SourceFile(\"a\nb\n\")[begin:end]")) == "a\nb\n" + @test eval(Meta.parse("SourceFile(\"abcd\", first_index=10)[begin+1:end-1]")) == "bc" + end +end + +@testset "SourceFile printing and text extraction" begin + srcf = SourceFile("module Foo\nend") + @test sprint(show, MIME("text/plain"), srcf) == """ + ## SourceFile ## + module Foo + end""" + @test sourcetext(srcf) == "module Foo\nend" +end + + +@testset "highlight()" begin + src = SourceFile(""" + abcd + αβγδ + +-*/""") + + # Empty ranges + @test sprint(highlight, src, 1:0) == "abcd\n└\nαβγδ\n+-*/" + @test sprint(highlight, src, 2:1) == "abcd\n#└\nαβγδ\n+-*/" + @test sprint(highlight, src, 3:2) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 4:3) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 5:4) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 6:5) == "abcd\nαβγδ\n└\n+-*/" + @test sprint(highlight, src, 19:18) == "abcd\nαβγδ\n+-*/\n# └" + @test sprint(io->highlight(io, src, 1:0, context_lines_after=0, note="hi")) == + "abcd\n└ ── hi" + + # Single line ranges + @test sprint(highlight, src, 1:4) == "abcd\n└──┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 2:4) == "abcd\n#└─┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 3:4) == "abcd\n# └┘\nαβγδ\n+-*/" + @test sprint(highlight, src, 4:4) == "abcd\n# ╙\nαβγδ\n+-*/" + @test sprint(highlight, src, 5:5) == "abcd\n# └\nαβγδ\n+-*/" + @test sprint(highlight, src, 6:6) == "abcd\nαβγδ\n╙\n+-*/" + @test sprint(highlight, src, 6:9) == "abcd\nαβγδ\n└┘\n+-*/" + @test sprint(highlight, src, 8:8) == "abcd\nαβγδ\n#╙\n+-*/" + + # multi-byte chars + @test sprint(highlight, src, 8:13) == """ + abcd + αβγδ + #└─┘ + +-*/""" + # multi-byte char at eof + @test sprint(highlight, SourceFile("a α"), 3:4) == "a α\n# ╙" + @test sprint(highlight, SourceFile("a\nα"), 1:4) == "┌\na\nα\n┘" + @test sprint(highlight, SourceFile("a\nb\nα"), 3:3) == "a\nb\n╙\nα" + + # empty files + @test sprint(highlight, SourceFile(""), 1:0) == "└" + + # Multi-line ranges + @test sprint(highlight, src, 1:7) == """ + ┌─── + abcd + αβγδ + ┘ + +-*/""" + @test sprint(highlight, src, 2:7) == """ + #┌── + abcd + αβγδ + ┘ + +-*/""" + @test sprint(highlight, src, 2:9) == """ + #┌── + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 4:9) == """ + # ┌ + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 5:9) == """ + # ┌ + abcd + αβγδ + #┘ + +-*/""" + @test sprint(highlight, src, 6:15) == """ + abcd + ┌─── + αβγδ + +-*/ + ┘""" + @test sprint(highlight, src, 8:15) == """ + abcd + #┌── + αβγδ + +-*/ + ┘""" + @test sprint(highlight, src, 1:18) == """ + ┌─── + abcd + αβγδ + +-*/ + #──┘""" + + # context lines + @test sprint(io->highlight(io, src, 8:13; + context_lines_before=0, + context_lines_after=0)) == """ + αβγδ + #└─┘""" + @test sprint(io->highlight(io, src, 8:13; context_lines_after=0)) == """ + abcd + αβγδ + #└─┘""" + @test sprint(io->highlight(io, src, 8:13; context_lines_before=0)) == """ + αβγδ + #└─┘ + +-*/""" + @test sprint(io->highlight(io, src, 1:18; context_lines_inner=0)) == """ + ┌─── + abcd + ⋮ + +-*/ + #──┘""" + + # annotations + @test sprint(io->highlight(io, src, 8:13; note="hello")) == """ + abcd + αβγδ + #└─┘ ── hello + +-*/""" + @test sprint(io->highlight(io, src, 1:13; note="hello")) == """ + ┌─── + abcd + αβγδ + #──┘ ── hello + +-*/""" + @test sprint(io->highlight(io, src, 8:13; + note=(io,indent,w)->print(io, "\n$indent$('!'^w) hello"))) == """ + abcd + αβγδ + #└─┘ + #!!! hello + +-*/""" + + # colored output + @test sprint(io->highlight(io, src, 8:13; context_lines_after=0, note="hello", notecolor=:light_red), + context=:color=>true) == + "abcd\nα\e[48;2;120;70;70mβγδ\e[0;0m\n\e[90m#└─┘ ── \e[0;0m\e[91mhello\e[0;0m" + @test sprint(io->highlight(io, src, 1:13; context_lines_after=0, note="hello", notecolor=(255,0,0)), + context=:color=>true) == + "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70mαβγδ\e[0;0m\n\e[90m#──┘ ── \e[0;0m\e[38;2;255;0;0mhello\e[0;0m" + @test sprint(io->highlight(io, src, 1:18, context_lines_inner=0), + context=:color=>true) == + "\e[90m┌───\e[0;0m\n\e[48;2;120;70;70mabcd\e[0;0m\n\e[48;2;120;70;70m\e[0;0m⋮\n\e[48;2;120;70;70m+-*/\e[0;0m\n\e[90m#──┘\e[0;0m" +end diff --git a/JuliaSyntax/test/syntax_tree.jl b/JuliaSyntax/test/syntax_tree.jl new file mode 100644 index 0000000000000..3e2361ca56b2f --- /dev/null +++ b/JuliaSyntax/test/syntax_tree.jl @@ -0,0 +1,117 @@ +@testset "SyntaxNode" begin + # Child access + tt = "a*b + c" + t = parsestmt(SyntaxNode, tt) + + @test sourcetext(t[1]) == "a*b" + @test sourcetext(t[1][1]) == "a" + @test sourcetext(t[1][2]) == "*" + @test sourcetext(t[1][3]) == "b" + @test sourcetext(t[2]) == "+" + @test sourcetext(t[3]) == "c" + + @test JuliaSyntax.first_byte(t[2]) == findfirst(==('+'), tt) + @test JuliaSyntax.source_line(t[3]) == 1 + @test source_location(t[3]) == (1, 7) + + # Child indexing + @test t[end] === t[3] + @test sourcetext.(t[2:3]) == ["+", "c"] + @test sourcetext.(t[2:end]) == ["+", "c"] + @test firstindex(t) == 1 + @test lastindex(t) == 3 + @test !is_leaf(t) + @test is_leaf(t[3]) + + @test sprint(show, t) == "(call-i (call-i a * b) + c)" + @test sprint(io->show(io, MIME("text/x.sexpression"), t, show_kind=true)) == + "(call-i (call-i a::Identifier *::Identifier b::Identifier) +::Identifier c::Identifier)" + + @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" + + # Pass-through field access + node = t[1][1] + @test node.val === :a + # The specific error text has evolved over Julia versions. Check that it involves `SyntaxData` and immutability + e = try node.val = :q catch e e end + @test occursin("immutable", e.msg) && occursin("SyntaxData", e.msg) + + # Newline-terminated source + t = parsestmt(SyntaxNode, "a*b + c\n") + @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙" + + # copy + t = parsestmt(SyntaxNode, "a*b + c") + ct = copy(t) + ct.data = nothing + @test ct.data === nothing && t.data !== nothing + @test ct[1].parent === ct + @test ct[1] !== t[1] + + node = parsestmt(SyntaxNode, "f()") + push!(node, parsestmt(SyntaxNode, "x")) + @test length(children(node)) == 2 + node[2] = parsestmt(SyntaxNode, "y") + @test sourcetext(node[2]) == "y" + + # SyntaxNode with offsets + t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13) + @test first(byte_range(t)) == 13 + @test first(byte_range(t[1])) == 19 + @test t[1].val == :b + + # Unicode character ranges + src = "ab + αβ" + t = parsestmt(SyntaxNode, src) + @test char_range(t[1]) == 1:2 + @test char_range(t[2]) == 4:4 + @test char_range(t[3]) == 6:8 + # conversely, β takes two bytes so char_range(t[3]) != byte_range(t[3]) + @test byte_range(t[3]) == 6:9 +end + +@testset "SyntaxNode pretty printing" begin + t = parsestmt(SyntaxNode, "f(a*b,\n c)", filename="foo.jl") + @test sprint(show, MIME("text/plain"), t) == """ + SyntaxNode: + [call] + f :: Identifier + [call-i] + a :: Identifier + * :: Identifier + b :: Identifier + c :: Identifier + """ + + @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """ + SyntaxNode: + line:col│ byte_range │ tree + -file- │ "foo.jl" + 1:1 │ 1:11 │[call] + 1:1 │ 1:1 │ f :: Identifier + 1:3 │ 3:5 │ [call-i] + 1:3 │ 3:3 │ a :: Identifier + 1:4 │ 4:4 │ * :: Identifier + 1:5 │ 5:5 │ b :: Identifier + 2:3 │ 10:10 │ c :: Identifier + """ + + @test sprint(io->show(io, MIME("text/plain"), t, show_kind=false)) == """ + SyntaxNode: + [call] + f + [call-i] + a + * + b + c + """ + + t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13, first_line=100) + @test sprint(io->show(io, MIME("text/plain"), t, show_location=true)) == """ + SyntaxNode: + line:col│ byte_range │ tree + 100:1 │ 13:23 │[block] + 100:7 │ 19:19 │ b :: Identifier + """ +end diff --git a/JuliaSyntax/test/test_utils.jl b/JuliaSyntax/test/test_utils.jl new file mode 100644 index 0000000000000..ed3d11e2f966f --- /dev/null +++ b/JuliaSyntax/test/test_utils.jl @@ -0,0 +1,484 @@ +using Test + +# We need a relative include here as JuliaSyntax may come from Base. +using .JuliaSyntax: + # Parsing + ParseStream, + ParseState, + Diagnostic, + SourceFile, + source_location, + source_line, + source_line_range, + parse!, + parsestmt, + parseall, + parseatom, + build_tree, + @K_str, + # Nodes + GreenNode, + RedTreeCursor, + SyntaxNode, + ErrorVal, + # Node inspection + kind, + flags, + EMPTY_FLAGS, TRIVIA_FLAG, INFIX_FLAG, + head, + span, + SyntaxHead, + is_trivia, + sourcetext, + is_leaf, + numchildren, + children, + fl_parseall, + fl_parse, + highlight, + tokenize, + untokenize, + filename, + byte_range, + char_range + +if VERSION < v"1.6" + # Compat stuff which might not be in Base for older versions + using JuliaSyntax: isnothing, only, peek +end + +function toks(str) + ts = [JuliaSyntax.Tokenize.untokenize(t, str)=>kind(t) + for t in JuliaSyntax.Tokenize.tokenize(str)] + @test ts[end] == (""=>K"EndMarker") + pop!(ts) + ts +end + +function remove_macro_linenums!(ex) + if Meta.isexpr(ex, :macrocall) + ex.args[2] = nothing + end + if ex isa Expr + map!(remove_macro_linenums!, ex.args, ex.args) + end + return ex +end + +function remove_all_linenums!(ex) + JuliaSyntax.remove_linenums!(ex) + remove_macro_linenums!(ex) +end + +function kw_to_eq(ex) + return Meta.isexpr(ex, :kw) ? Expr(:(=), ex.args...) : ex +end + +function triple_string_roughly_equal(fl_str, str) + # Allow some leeway for a bug in the reference parser with + # triple quoted strings + lines = split(str, '\n') + fl_lines = split(fl_str, '\n') + if length(lines) != length(fl_lines) + return false + end + has_whitespace_only_line = + any(!isempty(fl_line) && all(c in " \t" for c in fl_line) + for fl_line in fl_lines) + if !has_whitespace_only_line + return str == fl_str + end + for (line, fl_line) in zip(lines, fl_lines) + if !all(c in " \t" for c in fl_line) && !endswith(line, fl_line) + return false + end + end + return true +end + +function exprs_equal_no_linenum(fl_ex, ex) + remove_all_linenums!(deepcopy(ex)) == remove_all_linenums!(deepcopy(fl_ex)) +end + +function is_eventually_call(ex) + return ex isa Expr && (ex.head === :call || + (ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1])) +end + +# Compare Expr from reference parser expression to JuliaSyntax parser, ignoring +# differences due to bugs in the reference parser. +function exprs_roughly_equal(fl_ex, ex) + if fl_ex isa Float64 && Meta.isexpr(ex, :call, 3) && + ex.args[1] == :* && + ex.args[2] == fl_ex && + (ex.args[3] == :f || ex.args[3] == :f0) + # 0x1p0f + return true + elseif !(fl_ex isa Expr) || !(ex isa Expr) + if fl_ex isa String && ex isa String + if fl_ex == ex + return true + else + return triple_string_roughly_equal(fl_ex, ex) + end + else + return fl_ex == ex + end + end + # Ignore differences in line number nodes within block-like constructs + fl_args = fl_ex.head in (:block, :quote, :toplevel) ? + filter(x->!(x isa LineNumberNode), fl_ex.args) : + fl_ex.args + args = ex.head in (:block, :quote, :toplevel) ? + filter(x->!(x isa LineNumberNode), ex.args) : + ex.args + if (fl_ex.head == :block && ex.head == :tuple && + length(fl_args) == 2 && length(args) == 2 && + Meta.isexpr(args[1], :parameters, 1) && + exprs_roughly_equal(fl_args[2], args[1].args[1]) && + exprs_roughly_equal(fl_args[1], args[2])) + # Allow `(a; b,)`: + # * Reference parser produces a block + # * New parser produces a frankentuple + return true + end + if fl_ex.head != ex.head + return false + end + h = ex.head + if h == :function && Meta.isexpr(fl_args[1], :block) + blockargs = filter(x->!(x isa LineNumberNode), fl_args[1].args) + posargs = blockargs[1:max(0, length(blockargs))] + kwargs = blockargs[2:end] + for i = 1:length(kwargs) + if Meta.isexpr(kwargs[i], :(=)) + kwargs[i] = Expr(:kw, kwargs[i].args...) + end + end + fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...) + elseif h == :for + iterspec = args[1] + if is_eventually_call(iterspec.args[1]) && + Meta.isexpr(iterspec.args[2], :block) + blk = iterspec.args[2] + if length(blk.args) == 2 && blk.args[1] isa LineNumberNode + # Ignore short form function location differences in + # `for f() = 1:3 end` + iterspec.args[2] = blk.args[2] + end + end + elseif (h == :(=) || h == :kw) && Meta.isexpr(fl_args[1], :(::), 1) && + Meta.isexpr(fl_args[2], :block, 2) && fl_args[2].args[1] isa LineNumberNode + # The flisp parser adds an extra block around `w` in the following case + # f(::g(z) = w) = 1 + fl_args[2] = fl_args[2].args[2] + end + if length(fl_args) != length(args) + return false + end + if h == :do && length(args) >= 1 && Meta.isexpr(fl_args[1], :macrocall) + # Macrocalls with do, as in `@f(a=1) do\nend` use :kw in the + # reference parser for the `a=1`, but we regard this as a bug. + fl_args = copy(fl_args) + fl_args[1] = Expr(:macrocall, map(kw_to_eq, args[1].args)...) + end + for i = 1:length(args) + if !exprs_roughly_equal(fl_args[i], args[i]) + return false + end + end + return true +end + +function parsers_agree_on_file(filename; kws...) + text = try + read(filename, String) + catch + # Something went wrong reading the file. This isn't a parser failure so + # ignore this case. + return true + end + parsers_agree_on_file(text, filename; kws...) +end + +function parsers_agree_on_file(text, filename; exprs_equal=exprs_equal_no_linenum) + fl_ex = fl_parseall(text, filename=filename) + if Meta.isexpr(fl_ex, :toplevel) && !isempty(fl_ex.args) && + Meta.isexpr(fl_ex.args[end], (:error, :incomplete)) + # Reference parser failed. This generally indicates a broken file not a + # parser problem, so ignore this case. + return true + end + try + stream = ParseStream(text) + parse!(stream) + ex = build_tree(Expr, stream, filename=filename) + return !JuliaSyntax.any_error(stream) && exprs_equal(fl_ex, ex) + catch exc + @error "Parsing failed" filename exception=current_exceptions() + return false + end +end + +function find_source_in_path(basedir) + src_list = String[] + for (root, dirs, files) in walkdir(basedir) + append!(src_list, (joinpath(root, f) for f in files + if endswith(f, ".jl") && (p = joinpath(root,f); !islink(p) && isfile(p)))) + end + src_list +end + +test_parse_all_in_path(basedir) = + test_parse_all_in_path(path->exprs_equal_no_linenum, basedir) + +function test_parse_all_in_path(compare_for_path::Function, basedir) + for filepath in find_source_in_path(basedir) + cmp = compare_for_path(filepath) + if isnothing(cmp) + continue + end + @testset "Parse $(relpath(filepath, basedir))" begin + text = try + read(filepath, String) + catch + # Something went wrong reading the file. This isn't a parser failure so + # ignore this case. + continue + end + parsers_agree = parsers_agree_on_file(text, filepath, exprs_equal=cmp) + @test parsers_agree + if !parsers_agree + reduced_failures = reduce_text.(reduce_tree(text), + parsers_fuzzy_disagree) + @test reduced_failures == [] + end + end + end +end + +#------------------------------------------------------------------------------- +# Test case reduction + +# Check whether a given SyntaxNode converts to the same Expr as the flisp +# parser produces from the source text of the node. +function equals_flisp_parse(exprs_equal, tree) + node_text = sourcetext(tree) + # Reparse with JuliaSyntax. This is a crude way to ensure we're not missing + # some context from the parent node. + fl_ex = fl_parseall(node_text, filename="none") + if Meta.isexpr(fl_ex, :error) || (Meta.isexpr(fl_ex, :toplevel) && + length(fl_ex.args) >= 1 && + Meta.isexpr(fl_ex.args[end], :error)) + return true # Something went wrong in reduction; ignore these cases 😬 + end + ex = parseall(Expr, node_text, filename="none", ignore_errors=true) + exprs_equal(fl_ex, ex) +end + +function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) + if equals_flisp_parse(exprs_equal, tree) + return false + end + if is_leaf(tree) + push!(failing_subtrees, tree) + return true + end + had_failing_subtrees = false + if !is_leaf(tree) + for child in children(tree) + if is_trivia(child) || is_leaf(child) + continue + end + had_failing_subtrees |= _reduce_tree(failing_subtrees, child; exprs_equal=exprs_equal) + end + end + if !had_failing_subtrees + push!(failing_subtrees, tree) + end + return true +end + +""" + reduce_tree(tree::SyntaxNode; exprs_equal=exprs_equal_no_linenum) + +Select minimal subtrees of `tree` which are inconsistent between flisp and +JuliaSyntax parsers. +""" +function reduce_tree(tree::SyntaxNode; kws...) + subtrees = Vector{typeof(tree)}() + _reduce_tree(subtrees, tree; kws...) + subtrees +end + +""" + reduce_tree(text::AbstractString; exprs_equal=exprs_equal_no_linenum) + +Find the minimal subtrees of the parsed form of `text` which are inconsistent +between flisp and JuliaSyntax parsers and return the source text of those +subtrees. +""" +function reduce_tree(text::AbstractString; kws...) + tree = parseall(SyntaxNode, text, ignore_warnings=true) + sourcetext.(reduce_tree(tree; kws...)) +end + + +#------------------------------------------------------------------------------- +# Text-based test case reduction +function parser_throws_exception(text) + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, text, ignore_errors=true) + false + catch + true + end +end + +function parsers_fuzzy_disagree(text::AbstractString) + fl_ex = fl_parseall(text, filename="none") + if Meta.isexpr(fl_ex, (:error,:incomplete)) || + (Meta.isexpr(fl_ex, :toplevel) && length(fl_ex.args) >= 1 && + Meta.isexpr(fl_ex.args[end], (:error,:incomplete))) + return false + end + try + ex = parseall(Expr, text, filename="none", ignore_errors=true) + return !exprs_roughly_equal(fl_ex, ex) + catch + @error "Reduction failed" text + return false + end +end + + +""" +Reduce text of a test case via combination of bisection and random deletion. + +This is suited to randomly generated strings, but it's surprisingly effective +for code-like strings as well. +""" +function reduce_text(str, parse_differs) + while true + if length(str) <= 1 + return str + end + m1 = thisind(str, length(str)÷2) + m2 = nextind(str, m1) + if parse_differs(str[1:m1]) + str = str[1:m1] + elseif parse_differs(str[m2:end]) + str = str[m2:end] + else + chunklen = clamp(length(str)÷10, 1, 10) + reduced = false + for i = 1:100 + m = thisind(str, rand(1:length(str)-chunklen)) + m3 = nextind(str, m+chunklen) + if m3 == nextind(str, m) + continue + end + s = str[1:m]*str[m3:end] + if parse_differs(s) + str = s + reduced = true + break + end + end + if !reduced + return str + end + end + end +end + +function show_green_tree(code; version::VersionNumber=v"1.6") + t = JuliaSyntax.parseall(GreenNode, code, version=version) + sprint(show, MIME"text/plain"(), t, code) +end + +#------------------------------------------------------------------------------- +# Parse s-expressions +function parse_sexpr(code) + st = ParseStream(code) + pos_stack = ParseStreamPosition[] + while true + k = peek(st) + if k == K"(" + push!(pos_stack, position(st)) + bump(st, TRIVIA_FLAG) + elseif k == K")" + if isempty(pos_stack) + bump(st, error="Mismatched `)` with no opening `(`") + break + else + bump(st, TRIVIA_FLAG) + end + emit(st, pop!(pos_stack), K"parens") + elseif k == K"Identifier" || k == K"Integer" + bump(st) + elseif k == K"NewlineWs" + bump(st, TRIVIA_FLAG) + elseif k == K"EndMarker" + if !isempty(pos_stack) + bump_invisible(st, K"error", error="Mismatched `)`") + end + break + else + bump(st, error="Unexpected token") + end + end + if JuliaSyntax.any_error(st) + throw(JuliaSyntax.ParseError(st)) + end + st +end + + +#------------------------------------------------------------------------------- +# Tools copied from Base.Meta which call core_parser_hook as if called by +# Meta.parse(), but without installing the global hook. + +function _Meta_parse_string(text::AbstractString, filename::AbstractString, + lineno::Integer, index::Integer, options) + if index < 1 || index > ncodeunits(text) + 1 + throw(BoundsError(text, index)) + end + ex, offset::Int = JuliaSyntax.core_parser_hook(text, filename, lineno, index-1, options) + ex, offset+1 +end + +function Meta_parse(str::AbstractString, pos::Integer; + filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true) + ex, pos = _Meta_parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom) + if raise && Meta.isexpr(ex, :error) + err = ex.args[1] + if err isa String + err = Meta.ParseError(err) # For flisp parser + end + throw(err) + end + return ex, pos +end + +function Meta_parse(str::AbstractString; + filename="none", raise::Bool=true, depwarn::Bool=true) + ex, pos = Meta_parse(str, 1; filename=filename, greedy=true, raise=raise, depwarn=depwarn) + if Meta.isexpr(ex, :error) + return ex + end + if pos <= ncodeunits(str) + raise && throw(Meta.ParseError("extra token after end of expression")) + return Expr(:error, "extra token after end of expression") + end + return ex +end + +function Meta_parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1) + return _Meta_parse_string(text, String(filename), lineno, pos, :atom) +end + +function Meta_parseall(text::AbstractString; filename="none", lineno=1) + ex,_ = _Meta_parse_string(text, String(filename), lineno, 1, :all) + return ex +end diff --git a/JuliaSyntax/test/test_utils_tests.jl b/JuliaSyntax/test/test_utils_tests.jl new file mode 100644 index 0000000000000..51515515a83f5 --- /dev/null +++ b/JuliaSyntax/test/test_utils_tests.jl @@ -0,0 +1,37 @@ +# Tests for the test_utils go here to allow the utils to be included on their +# own without invoking the tests. +@testset "Reference parser bugs" begin + # `0x1.8p0f` + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f)) + @test exprs_roughly_equal(1.5, + Expr(:call, :*, 1.5, :f0)) + # `@f(a=1) do \n end` + @test exprs_roughly_equal(Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:kw, :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1)))), + Expr(:do, Expr(:macrocall, Symbol("@f"), LineNumberNode(1), Expr(:(=), :a, 1)), + Expr(:->, Expr(:tuple), Expr(:block, LineNumberNode(1))))) + # `"""\n a\n \n b"""` + @test exprs_roughly_equal("a\n \nb", " a\n\n b") + @test !exprs_roughly_equal("a\n x\nb", " a\n x\n b") + @test exprs_roughly_equal("a\n x\nb", "a\n x\nb") + # `(a; b,)` + @test exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :c), :a)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b), + Expr(:tuple, Expr(:parameters, :b), :c)) + @test !exprs_roughly_equal(Expr(:block, :a, LineNumberNode(1), :b, :c), + Expr(:tuple, Expr(:parameters, :b), :a)) + + # Line numbers for short form function defs in `for` :-( + @test exprs_roughly_equal(Expr(:for, Expr(:(=), + Expr(:call, :f), + 1), + Expr(:block, LineNumberNode(1))), + Expr(:for, Expr(:(=), + Expr(:call, :f), + Expr(:block, LineNumberNode(1), 1)), + Expr(:block, LineNumberNode(1)))) +end diff --git a/JuliaSyntax/test/tokenize.jl b/JuliaSyntax/test/tokenize.jl new file mode 100644 index 0000000000000..fe5bba6ac073e --- /dev/null +++ b/JuliaSyntax/test/tokenize.jl @@ -0,0 +1,1190 @@ +# Hack: Introduce a module here to isolate some Tokenize internals from JuliaSyntax +module TokenizeTests + +using Test + +using ..JuliaSyntax: + JuliaSyntax, + @K_str, + Kind, + kind, + is_error, + is_operator + +using ..JuliaSyntax.Tokenize: + Tokenize, + tokenize, + untokenize, + RawToken + +import ..toks + +tok(str, i = 1) = collect(tokenize(str))[i] + +strtok(str) = untokenize.(collect(tokenize(str)), str) + +function onlytok(str) + ts = collect(tokenize(str)) + (length(ts) == 2 && ts[2].kind == K"EndMarker") || + error("Expected one token got $(length(ts)-1)") + return ts[1].kind +end + +@testset "tokens" begin + for s in ["a", IOBuffer("a")] + l = tokenize(s) + @test Tokenize.readchar(l) == 'a' + + l_old = l + @test l == l_old + @test Tokenize.eof(l) + @test Tokenize.readchar(l) == Tokenize.EOF_CHAR + + end +end # testset + +@testset "tokenize unicode" begin + # FIXME: rm VERSION check once we implement our own is_identifier_char + emoji = VERSION < v"1.5" ? "😄" : "\U1F3F3\UFE0F\U200D\U1F308" # 🏳️‍🌈 requires newer Unicode + str = "𝘋 =2"*emoji + for s in [str, IOBuffer(str)] + l = tokenize(s) + kinds = [K"Identifier", K"Whitespace", K"=", + K"Integer", K"Identifier", K"EndMarker"] + token_strs = ["𝘋", " ", "=", "2", emoji, ""] + for (i, n) in enumerate(l) + @test kind(n) == kinds[i] + @test untokenize(n, str) == token_strs[i] + end + end +end # testset + +@testset "tokenize complex piece of code" begin + + str = """ + function foo!{T<:Bar}(x::{T}=12) + @time (x+x, x+x); + end + try + foo + catch + bar + end + @time x+x + y[[1 2 3]] + [1*2,2;3,4] + "string"; 'c' + (a&&b)||(a||b) + # comment + #= comment + is done here =# + 2%5 + a'/b' + a.'\\b.' + `command` + 12_sin(12) + {} + ' + """ + + # Generate the following with + # ``` + # for t in kind.(collect(tokenize(str))) + # print(kind(t), ",") + # end + # ``` + # and *check* it afterwards. + + kinds = [K"function",K"Whitespace",K"Identifier",K"{",K"Identifier", + K"<:",K"Identifier",K"}",K"(",K"Identifier",K"::", + K"{",K"Identifier",K"}",K"=",K"Integer",K")", + + K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"(", + K"Identifier",K"+",K"Identifier",K",",K"Whitespace", + K"Identifier",K"+",K"Identifier",K")",K";", + + K"NewlineWs",K"end", + + K"NewlineWs",K"try", + K"NewlineWs",K"Identifier", + K"NewlineWs",K"catch", + K"NewlineWs",K"Identifier", + K"NewlineWs",K"end", + + K"NewlineWs",K"@",K"Identifier",K"Whitespace",K"Identifier", + K"+",K"Identifier", + + K"NewlineWs",K"Identifier",K"[",K"[",K"Integer",K"Whitespace", + K"Integer",K"Whitespace",K"Integer",K"]",K"]", + + K"NewlineWs",K"[",K"Integer",K"*",K"Integer",K",",K"Integer", + K";",K"Integer",K",",K"Integer",K"]", + + K"NewlineWs",K"\"",K"String",K"\"",K";",K"Whitespace",K"'",K"Char",K"'", + + K"NewlineWs",K"(",K"Identifier",K"&&",K"Identifier",K")",K"||", + K"(",K"Identifier",K"||",K"Identifier",K")", + + K"NewlineWs",K"Comment", + + K"NewlineWs",K"Comment", + + K"NewlineWs",K"Integer",K"%",K"Integer", + + K"NewlineWs",K"Identifier",K"'",K"/",K"Identifier",K"'", + + K"NewlineWs",K"Identifier",K".",K"'",K"\\",K"Identifier",K".",K"'", + + K"NewlineWs",K"`",K"CmdString",K"`", + + K"NewlineWs",K"Integer",K"Identifier",K"(",K"Integer",K")", + + K"NewlineWs",K"{",K"}", + + K"NewlineWs",K"'",K"Char",K"EndMarker"] + + for (i, n) in enumerate(tokenize(str)) + @test kind(n) == kinds[i] + end + + @testset "roundtrippability" begin + @test join(untokenize.(collect(tokenize(str)), str)) == str + end + + @test all((t.endbyte - t.startbyte + 1)==sizeof(untokenize(t, str)) for t in tokenize(str)) +end # testset + +@testset "issue 5, '..'" begin + @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K"..",K"Float",K"EndMarker"] +end + +@testset "issue 17, >>" begin + str = ">> " + @test untokenize(tok(str), str)==">>" +end + +@testset "tokenize newlines" begin + n = "\n" + rn = "\r\n" + nl = K"NewlineWs" + for i in 0:5 + j = 5 - i + @test toks(n^i * rn^j) == vcat(fill(n => nl, i), fill(rn => nl, j)) + @test toks(rn^i * n^j) == vcat(fill(rn => nl, i), fill(n => nl, j)) + end +end + +@testset "test added operators" begin + @test tok("1+=2", 2).kind == K"op=" + @test tok("1-=2", 2).kind == K"op=" + @test tok("1*=2", 2).kind == K"op=" + @test tok("1^=2", 2).kind == K"op=" + @test tok("1÷=2", 2).kind == K"op=" + @test tok("1\\=2", 2).kind == K"op=" + @test tok("1\$=2", 2).kind == K"op=" + @test tok("1⊻=2", 2).kind == K"op=" + @test tok("1:=2", 2).kind == K":=" + @test tok("1-->2", 2).kind == K"-->" + @test tok("1<--2", 2).kind == K"<--" + @test tok("1<-->2", 2).kind == K"<-->" + @test tok("1>:2", 2).kind == K">:" +end + +@testset "infix" begin + @test tok("1 in 2", 3).kind == K"in" + @test tok("1 in[1]", 3).kind == K"in" + + @test tok("1 isa 2", 3).kind == K"isa" + @test tok("1 isa[2]", 3).kind == K"isa" +end + +@testset "tokenizing true/false literals" begin + @test tok("somtext true", 3).kind == K"Bool" + @test tok("somtext false", 3).kind == K"Bool" + @test tok("somtext tr", 3).kind == K"Identifier" + @test tok("somtext truething", 3).kind == K"Identifier" + @test tok("somtext falsething", 3).kind == K"Identifier" +end + + +roundtrip(str) = join(untokenize.(collect(tokenize(str)), str)) + +@testset "lexing anon functions '->' " begin + @test tok("a->b", 2).kind==K"->" +end + +@testset "comments" begin + ts = collect(tokenize(""" + # + \"\"\" + f + \"\"\" + 1 + """)) + + kinds = [K"Comment", K"NewlineWs", + K"\"\"\"", K"String", K"String", K"\"\"\"", K"NewlineWs", + K"Integer", K"NewlineWs", + K"EndMarker"] + @test kind.(ts) == kinds + + @test toks("#=# text=#") == ["#=# text=#"=>K"Comment"] + + @test toks("#= #= =#") == ["#= #= =#"=>K"ErrorEofMultiComment"] + @test toks("#=#==#=#") == ["#=#==#=#"=>K"Comment"] + @test toks("#=#==#=") == ["#=#==#="=>K"ErrorEofMultiComment"] + # comment terminated by \r\n + @test toks("#\r\n") == ["#" => K"Comment", "\r\n" => K"NewlineWs"] +end + + +@testset "invalid UTF-8" begin + @test toks("#=\xf5b\n=#") == [ + "#=\xf5b\n=#" => K"ErrorInvalidUTF8", + ] + @test toks("#\xf5b\n") == [ + "#\xf5b" => K"ErrorInvalidUTF8", + "\n" => K"NewlineWs" + ] + @test toks("\"\xf5\"") == [ + "\"" => K"\"" + "\xf5" => K"ErrorInvalidUTF8" + "\"" => K"\"" + ] + @test toks("'\xf5'") == [ + "'" => K"'" + "\xf5" => K"ErrorInvalidUTF8" + "'" => K"'" + ] + @test toks("`\xf5`") == [ + "`" => K"`" + "\xf5" => K"ErrorInvalidUTF8" + "`" => K"`" + ] +end + +@testset "primes" begin + str = """ + ImageMagick.save(fn, reinterpret(ARGB32, [0xf0884422]'')) + D = ImageMagick.load(fn) + """ + tokens = collect(tokenize(str)) + @test string(untokenize(tokens[16], str)) == string(untokenize(tokens[17], str))=="'" + + @test roundtrip("'a'") == "'a'" + @test kind.(collect(tokenize("'a'"))) == [K"'", K"Char", K"'", K"EndMarker"] + + # ' is not an operator here, so doesn't consume the suffix ᵀ + @test roundtrip("'ᵀ'") == "'ᵀ'" + @test kind.(collect(tokenize("'₁'"))) == [K"'", K"Char", K"'", K"EndMarker"] + + @test roundtrip("''") == "''" + @test kind.(collect(tokenize("''"))) == [K"'", K"'", K"EndMarker"] + + @test roundtrip("'''") == "'''" + @test kind.(collect(tokenize("'''"))) == [K"'", K"Char", K"'", K"EndMarker"] + + @test roundtrip("''''") == "''''" + @test kind.(collect(tokenize("''''"))) == [K"'", K"Char", K"'", K"'", K"EndMarker"] + + @test tok("()'", 3).kind == K"'" + @test tok("{}'", 3).kind == K"'" + @test tok("[]'", 3).kind == K"'" + @test tok("outer'", 2).kind == K"'" + @test tok("mutable'", 2).kind == K"'" + @test tok("as'", 2).kind == K"'" + @test tok("isa'", 2).kind == K"'" + @test untokenize.(collect(tokenize("a'ᵀ")), "a'ᵀ") == ["a", "'ᵀ", ""] +end + +@testset "keywords" begin + for kw in ["baremodule", + "begin", + "break", + "catch", + "const", + "continue", + "do", + "else", + "elseif", + "end", + "export", + "finally", + "for", + "function", + "global", + "if", + "import", + "let", + "local", + "macro", + "module", + "quote", + "return", + "struct", + "try", + "using", + "while", + + "abstract", + "as", + "doc", + "mutable", + "outer", + "primitive", + "type", + "var"] + + @test kind(tok(kw)) == Kind(kw) + end +end + +@testset "issue in PR #45" begin + @test length(collect(tokenize("x)"))) == 3 +end + +@testset "lex binary" begin + @test tok("0b0101").kind==K"BinInt" +end + +@testset "show" begin + io = IOBuffer() + show(io, collect(tokenize("\"abc\nd\"ef"))[2]) + @test String(take!(io)) == "1-5 String " +end + +~(tok::RawToken, t::Tuple) = tok.kind == t[1] && untokenize(tok, t[3]) == t[2] + +@testset "raw strings" begin + str = raw""" str"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[1] ~ (K"Whitespace" , " " , str) + @test ts[2] ~ (K"Identifier" , "str" , str) + @test ts[3] ~ (K"\"" , "\"" , str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) + @test ts[5] ~ (K"\"" , "\"" , str) + @test ts[6] ~ (K"Whitespace" , " " , str) + @test ts[7] ~ (K"EndMarker" , "" , str) + + str = raw"""`x $ \ y`""" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"`" , "`" , str) + @test ts[2] ~ (K"CmdString" , "x \$ \\ y" , str) + @test ts[3] ~ (K"`" , "`" , str) + @test ts[4] ~ (K"EndMarker" , "" , str) + + # str"\\" + str = "str\"\\\\\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"Identifier" , "str" , str) + @test ts[2] ~ (K"\"" , "\"" , str) + @test ts[3] ~ (K"String" , "\\\\" , str) + @test ts[4] ~ (K"\"" , "\"" , str) + @test ts[5] ~ (K"EndMarker" , "" , str) + + # str"\\\"" + str = "str\"\\\\\\\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"Identifier" , "str" , str) + @test ts[2] ~ (K"\"" , "\"" , str) + @test ts[3] ~ (K"String" , "\\\\\\\"" , str) + @test ts[4] ~ (K"\"" , "\"" , str) + @test ts[5] ~ (K"EndMarker" , "" , str) + + # Contextual keywords and operators allowed as raw string prefixes + str = raw""" var"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (K"var" , "var", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) + + str = raw""" outer"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (K"outer" , "outer", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) + + str = raw""" isa"x $ \ y" """ + ts = collect(tokenize(str)) + @test ts[2] ~ (K"isa" , "isa", str) + @test ts[4] ~ (K"String" , "x \$ \\ y", str) +end + +@testset "string escaped newline whitespace" begin + str = "\"x\\\n \ty\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"\"", "\"", str) + @test ts[2] ~ (K"String", "x", str) + @test ts[3] ~ (K"Whitespace", "\\\n \t", str) + @test ts[4] ~ (K"String", "y", str) + @test ts[5] ~ (K"\"", "\"", str) + + # No newline escape for raw strings + str = "r\"x\\\ny\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"Identifier", "r", str) + @test ts[2] ~ (K"\"", "\"", str) + @test ts[3] ~ (K"String", "x\\\ny", str) + @test ts[4] ~ (K"\"", "\"", str) +end + +@testset "triple quoted string line splitting" begin + str = "\"\"\"\nx\r\ny\rz\n\r\"\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"\"\"\"" , "\"\"\"", str) + @test ts[2] ~ (K"String" , "\n", str) + @test ts[3] ~ (K"String" , "x\r\n", str) + @test ts[4] ~ (K"String" , "y\r", str) + @test ts[5] ~ (K"String" , "z\n", str) + @test ts[6] ~ (K"String" , "\r", str) + @test ts[7] ~ (K"\"\"\"" , "\"\"\"", str) + + # Also for raw strings + str = "r\"\"\"\nx\ny\"\"\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"Identifier" , "r", str) + @test ts[2] ~ (K"\"\"\"" , "\"\"\"", str) + @test ts[3] ~ (K"String" , "\n", str) + @test ts[4] ~ (K"String" , "x\n", str) + @test ts[5] ~ (K"String" , "y", str) + @test ts[6] ~ (K"\"\"\"" , "\"\"\"", str) +end + +@testset "interpolation" begin + @testset "basic" begin + str = "\"\$x \$y\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"\"" , "\"", str) + @test ts[2] ~ (K"$" , "\$", str) + @test ts[3] ~ (K"Identifier" , "x" , str) + @test ts[4] ~ (K"String" , " " , str) + @test ts[5] ~ (K"$" , "\$", str) + @test ts[6] ~ (K"Identifier" , "y" , str) + @test ts[7] ~ (K"\"" , "\"", str) + @test ts[8] ~ (K"EndMarker" , "" , str) + end + + @testset "nested" begin + str = """"str: \$(g("str: \$(h("str"))"))" """ + ts = collect(tokenize(str)) + @test length(ts) == 23 + @test ts[1] ~ (K"\"" , "\"" , str) + @test ts[2] ~ (K"String" , "str: ", str) + @test ts[3] ~ (K"$" , "\$" , str) + @test ts[4] ~ (K"(" , "(" , str) + @test ts[5] ~ (K"Identifier", "g" , str) + @test ts[6] ~ (K"(" , "(" , str) + @test ts[7] ~ (K"\"" , "\"" , str) + @test ts[8] ~ (K"String" , "str: ", str) + @test ts[9] ~ (K"$" , "\$" , str) + @test ts[10] ~ (K"(" , "(" , str) + @test ts[11] ~ (K"Identifier", "h" , str) + @test ts[12] ~ (K"(" , "(" , str) + @test ts[13] ~ (K"\"" , "\"" , str) + @test ts[14] ~ (K"String" , "str" , str) + @test ts[15] ~ (K"\"" , "\"" , str) + @test ts[16] ~ (K")" , ")" , str) + @test ts[17] ~ (K")" , ")" , str) + @test ts[18] ~ (K"\"" , "\"" , str) + @test ts[19] ~ (K")" , ")" , str) + @test ts[20] ~ (K")" , ")" , str) + @test ts[21] ~ (K"\"" , "\"" , str) + @test ts[22] ~ (K"Whitespace", " " , str) + @test ts[23] ~ (K"EndMarker" , "" , str) + end + + @testset "duplicate \$" begin + str = "\"\$\$\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"\"" , "\"", str) + @test ts[2] ~ (K"$" , "\$", str) + @test ts[3] ~ (K"$" , "\$", str) + @test ts[4] ~ (K"\"" , "\"", str) + @test ts[5] ~ (K"EndMarker" , "" , str) + end + + @testset "Unmatched parens" begin + # issue 73: https://github.com/JuliaLang/Tokenize.jl/issues/73 + str = "\"\$(fdsf\"" + ts = collect(tokenize(str)) + @test ts[1] ~ (K"\"" , "\"" , str) + @test ts[2] ~ (K"$" , "\$" , str) + @test ts[3] ~ (K"(" , "(" , str) + @test ts[4] ~ (K"Identifier" , "fdsf" , str) + @test ts[5] ~ (K"\"" , "\"" , str) + @test ts[6] ~ (K"EndMarker" , "" , str) + end + + @testset "Unicode" begin + # issue 178: https://github.com/JuliaLang/Tokenize.jl/issues/178 + str = """ "\$uₕx \$(uₕx - ux)" """ + ts = collect(tokenize(str)) + @test ts[ 1] ~ (K"Whitespace" , " " , str) + @test ts[ 2] ~ (K"\"" , "\"" , str) + @test ts[ 3] ~ (K"$" , "\$" , str) + @test ts[ 4] ~ (K"Identifier" , "uₕx" , str) + @test ts[ 5] ~ (K"String" , " " , str) + @test ts[ 6] ~ (K"$" , "\$" , str) + @test ts[ 7] ~ (K"(" , "(" , str) + @test ts[ 8] ~ (K"Identifier" , "uₕx" , str) + @test ts[ 9] ~ (K"Whitespace" , " " , str) + @test ts[10] ~ (K"-" , "-" , str) + @test ts[11] ~ (K"Whitespace" , " " , str) + @test ts[12] ~ (K"Identifier" , "ux" , str) + @test ts[13] ~ (K")" , ")" , str) + @test ts[14] ~ (K"\"" , "\"" , str) + @test ts[15] ~ (K"Whitespace" , " " , str) + @test ts[16] ~ (K"EndMarker" , "" , str) + end + + @testset "var\"...\" disabled in interpolations" begin + str = """ "\$var"x" " """ + ts = collect(tokenize(str)) + @test ts[ 1] ~ (K"Whitespace" , " " , str) + @test ts[ 2] ~ (K"\"" , "\"" , str) + @test ts[ 3] ~ (K"$" , "\$" , str) + @test ts[ 4] ~ (K"var" , "var" , str) + @test ts[ 5] ~ (K"\"" , "\"" , str) + @test ts[ 6] ~ (K"Identifier" , "x" , str) + @test ts[ 7] ~ (K"\"" , "\"" , str) + @test ts[ 8] ~ (K"String" , " " , str) + @test ts[ 9] ~ (K"\"" , "\"" , str) + @test ts[10] ~ (K"Whitespace" , " " , str) + @test ts[11] ~ (K"EndMarker" , "" , str) + end + + @testset "chars after interpolation identifier" begin + # Operators allowed + @test toks("\"\$x?\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "?"=>K"String" + "\""=>K"\"" + ] + @test toks("\"\$x⫪\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "⫪"=>K"String" + "\""=>K"\"" + ] + # Some chars disallowed (eg, U+0DF4) + @test toks("\"\$x෴\"") == [ + "\""=>K"\"" + "\$"=>K"$" + "x"=>K"Identifier" + "෴"=>K"ErrorInvalidInterpolationTerminator" + "\""=>K"\"" + ] + end +end + +@testset "inferred" begin + l = tokenize("abc") + @inferred Tokenize.next_token(l) +end + +@testset "modifying function names (!) followed by operator" begin + @test toks("a!=b") == ["a"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] + @test toks("a!!=b") == ["a!"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] + @test toks("!=b") == ["!="=>K"!=", "b"=>K"Identifier"] +end + +@testset "integer literals" begin + @test onlytok("1234") == K"Integer" + @test onlytok("12_34") == K"Integer" + + @test toks("1234_") == ["1234"=>K"Integer", "_"=>K"Identifier"] + @test toks("1234x") == ["1234"=>K"Integer", "x"=>K"Identifier"] + + @test onlytok("_1234") == K"Identifier" + + @test toks("1__2") == ["1"=>K"Integer", "__2"=>K"Identifier"] +end + +@testset "hex integer literals" begin + @test onlytok("0x0167_032") == K"HexInt" + @test onlytok("0x2_0_2") == K"HexInt" + # trailing junk + # https://github.com/JuliaLang/julia/issues/16356 + @test onlytok("0xenomorph") == K"ErrorInvalidNumericConstant" + @test onlytok("0xaα") == K"ErrorInvalidNumericConstant" + @test toks("0x ") == ["0x"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] + @test onlytok("0x") == K"ErrorInvalidNumericConstant" + @test onlytok("0xg") == K"ErrorInvalidNumericConstant" + @test onlytok("0x_") == K"ErrorInvalidNumericConstant" + @test toks("0x-") == ["0x"=>K"ErrorInvalidNumericConstant", "-"=>K"-"] +end + +@testset "hexfloat literals" begin + @test onlytok("0x.1p1") == K"Float" + @test onlytok("0x00p2") == K"Float" + @test onlytok("0x00P2") == K"Float" + @test onlytok("0x0.00p23") == K"Float" + @test onlytok("0x0.0ap23") == K"Float" + @test onlytok("0x0.0_0p2") == K"Float" + @test onlytok("0x0_0_0.0_0p2") == K"Float" + @test onlytok("0x0p+2") == K"Float" + @test onlytok("0x0p-2") == K"Float" + # errors + @test onlytok("0x") == K"ErrorInvalidNumericConstant" + @test onlytok("0x2__2") == K"ErrorInvalidNumericConstant" + @test onlytok("0x1p") == K"ErrorInvalidNumericConstant" + @test onlytok("0x.p0") == K"ErrorInvalidNumericConstant" + @test onlytok("0x.") == K"ErrorHexFloatMustContainP" + @test onlytok("0x1.0") == K"ErrorHexFloatMustContainP" +end + +@testset "binary literals" begin + @test onlytok("0b0101001_0100_0101") == K"BinInt" + + @test onlytok("0b") == K"ErrorInvalidNumericConstant" + @test toks("0b ") == ["0b"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] + @test onlytok("0b101__101") == K"ErrorInvalidNumericConstant" + @test onlytok("0b123") == K"ErrorInvalidNumericConstant" +end + +@testset "octal literals" begin + @test onlytok("0o0167") == K"OctInt" + @test onlytok("0o01054001_0100_0101") == K"OctInt" + + @test onlytok("0o") == K"ErrorInvalidNumericConstant" + @test onlytok("0o78p") == K"ErrorInvalidNumericConstant" + @test toks("0o ") == ["0o"=>K"ErrorInvalidNumericConstant", " "=>K"Whitespace"] +end + +@testset "float literals" begin + @test onlytok("1.0") == K"Float" + + @test onlytok("1.0e0") == K"Float" + @test onlytok("1.0e-0") == K"Float" + @test onlytok("1.0E0") == K"Float" + @test onlytok("1.0E-0") == K"Float" + @test onlytok("1.0f0") == K"Float32" + @test onlytok("1.0f-0") == K"Float32" + @test onlytok("1.e0") == K"Float" + @test onlytok("1.f0") == K"Float32" + + @test onlytok("0e0") == K"Float" + @test onlytok("0e+0") == K"Float" + @test onlytok("0E0") == K"Float" + @test onlytok("201E+0") == K"Float" + @test onlytok("2f+0") == K"Float32" + @test onlytok("2048f0") == K"Float32" + + # underscores + @test onlytok("1_1.11") == K"Float" + @test onlytok("11.1_1") == K"Float" + @test onlytok("1_1.1_1") == K"Float" + @test onlytok("1.2_3") == K"Float" + @test onlytok("3_2.5_2") == K"Float" + @test toks("_1.1_1") == ["_1"=>K"Identifier", ".1_1"=>K"Float"] + + # juxtapositions with identifiers + @test toks("3e2_2") == ["3e2"=>K"Float", "_2"=>K"Identifier"] + @test toks("1e") == ["1"=>K"Integer", "e"=>K"Identifier"] + + # Floating point with \minus rather than - + @test onlytok("1.0e−0") == K"Float" + @test onlytok("1.0f−0") == K"Float32" + @test onlytok("0x0p−2") == K"Float" + + # Errors + @test onlytok("1._") == K"ErrorInvalidNumericConstant" + @test onlytok("1.1.") == K"ErrorInvalidNumericConstant" + @test onlytok("1e+") == K"ErrorInvalidNumericConstant" + @test onlytok("1.0e+") == K"ErrorInvalidNumericConstant" + @test onlytok("1.e1.") == K"ErrorInvalidNumericConstant" + @test onlytok("1e1.") == K"ErrorInvalidNumericConstant" + @test toks("1.e") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "e"=>K"Identifier"] + @test toks("3.2e2.2") == ["3.2e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"] + @test toks("3e2.2") == ["3e2."=>K"ErrorInvalidNumericConstant", "2"=>K"Integer"] + @test toks("1.2.f") == ["1.2."=>K"ErrorInvalidNumericConstant", "f"=>K"Identifier"] +end + +@testset "numbers with trailing `.` " begin + @test toks("1.") == ["1."=>K"Float"] + + @test toks("1.)") == ["1."=>K"Float", ")"=>K")"] + @test toks("1.]") == ["1."=>K"Float", "]"=>K"]"] + @test toks("1.}") == ["1."=>K"Float", "}"=>K"}"] + @test toks("1.,") == ["1."=>K"Float", ","=>K","] + @test toks("1.;") == ["1."=>K"Float", ";"=>K";"] + @test toks("1.#") == ["1."=>K"Float", "#"=>K"Comment"] + + # ellipses + @test toks("1..") == ["1"=>K"Integer", ".."=>K".."] + @test toks("1...") == ["1"=>K"Integer", "..."=>K"..."] + @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] + @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] + + # Dotted operators and other dotted suffixes + @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] + @test toks("1234.0+1") == ["1234.0"=>K"Float", "+"=>K"+", "1"=>K"Integer"] + @test toks("1234.0 .+1") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] + @test toks("1234 .f(a)") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", + "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] + @test toks("1234.0 .f(a)") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", + "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] + @test toks("1f0./1") == ["1f0"=>K"Float32", "."=>K".", "/"=>K"/", "1"=>K"Integer"] + + # Dotted operators after numeric constants are ok + @test toks("1e1.⫪") == ["1e1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] + @test toks("1.1.⫪") == ["1.1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] + @test toks("1e1.−") == ["1e1"=>K"Float", "."=>K".", "−"=>K"-"] + @test toks("1.1.−") == ["1.1"=>K"Float", "."=>K".", "−"=>K"-"] + # Non-dottable operators are not ok + @test toks("1e1.\$") == ["1e1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] + @test toks("1.1.\$") == ["1.1."=>K"ErrorInvalidNumericConstant", "\$"=>K"$"] + + # Ambiguous dotted operators + @test toks("1.+") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+"] + @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] + @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] + @test toks("1.⫪") == ["1."=>K"ErrorAmbiguousNumericConstant", "⫪"=>K"⫪"] + # non-dottable ops are the exception + @test toks("1.:") == ["1."=>K"Float", ":"=>K":"] + @test toks("1.\$") == ["1."=>K"Float", "\$"=>K"$"] + + # Ambiguous - literal vs multiply by juxtaposition + @test toks("1.x") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "x"=>K"Identifier"] + @test toks("1.(") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "("=>K"("] + @test toks("1.[") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "["=>K"["] + @test toks("1.{") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "{"=>K"{"] + @test toks("1.@") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "@"=>K"@"] + @test toks("1.\"") == ["1."=>K"ErrorAmbiguousNumericDotMultiply", "\""=>K"\""] +end + +@testset "julia 0.6 types" begin + @test onlytok("mutable") == K"mutable" + @test onlytok("primitive") == K"primitive" + @test onlytok("struct") == K"struct" + @test onlytok("where") == K"where" + + @test tok("mutable struct s{T} where T", 1).kind == K"mutable" + @test tok("mutable struct s{T} where T", 3).kind == K"struct" + @test tok("mutable struct s{T} where T", 10).kind == K"where" +end + +@testset "CMDs" begin + @test tok("`cmd`",1).kind == K"`" + @test tok("`cmd`",2).kind == K"CmdString" + @test tok("`cmd`",3).kind == K"`" + @test tok("`cmd`",4).kind == K"EndMarker" + @test tok("```cmd```", 1).kind == K"```" + @test tok("```cmd```", 2).kind == K"CmdString" + @test tok("```cmd```", 3).kind == K"```" + @test tok("```cmd```", 4).kind == K"EndMarker" + @test tok("```cmd````cmd`", 1).kind == K"```" + @test tok("```cmd````cmd`", 2).kind == K"CmdString" + @test tok("```cmd````cmd`", 3).kind == K"```" + @test tok("```cmd````cmd`", 4).kind == K"`" + @test tok("```cmd````cmd`", 5).kind == K"CmdString" + @test tok("```cmd````cmd`", 6).kind == K"`" + @test tok("```cmd````cmd`", 7).kind == K"EndMarker" +end + +@testset "where" begin + @test tok("a where b", 3).kind == K"where" +end + +@testset "IO position" begin + io = IOBuffer("#1+1") + skip(io, 1) + @test length(collect(tokenize(io))) == 4 +end + +@testset "dotted and suffixed operators" begin + +for opkind in Tokenize._nondot_symbolic_operator_kinds() + op = string(opkind) + strs = [ + 1 => [ # unary + "$(op)b", + ".$(op)b", + ], + 2 => [ # binary + "a $op b", + "a .$op b", + "a $(op)₁ b", + "a $(op)\U0304 b", + "a .$(op)₁ b" + ] + ] + + for (arity, container) in strs + for str in container + expr = JuliaSyntax.fl_parse(str, raise = false) + if VERSION < v"1.7" && str == "a .&& b" + expr = Expr(Symbol(".&&"), :a, :b) + end + if expr isa Expr && (expr.head != :error && expr.head != :incomplete) + tokens = collect(tokenize(str)) + exop = expr.head == :call ? expr.args[1] : expr.head + #println(str) + # For dotted operators, we need to reconstruct the operator from separate tokens + # Note: .. and ... are not dotted operators, they're regular operators + exop_str = string(exop) + is_dotted = occursin(".", exop_str) && exop != :.. && exop != :... + if is_dotted + # Dotted operators are now two tokens: . and the operator + dot_pos = arity == 1 ? 1 : 3 + op_pos = arity == 1 ? 2 : 4 + reconstructed_op = Symbol(Tokenize.untokenize(tokens[dot_pos], str) * + Tokenize.untokenize(tokens[op_pos], str)) + if reconstructed_op != exop + @info "" arity str exop reconstructed_op + end + @test reconstructed_op == exop + else + # Regular operators and suffixed operators + op_pos = arity == 1 ? 1 : 3 + if Symbol(Tokenize.untokenize(tokens[op_pos], str)) != exop + @info "" arity str exop op_pos + end + @test Symbol(Tokenize.untokenize(tokens[op_pos], str)) == exop + end + else + break + end + end + end +end +end + +@testset "Normalization of Unicode symbols" begin + # https://github.com/JuliaLang/julia/pull/25157 + @test tok("\u00b7").kind == K"⋅" + @test tok("\u0387").kind == K"⋅" + @test toks(".\u00b7") == ["."=>K".", "\u00b7"=>K"⋅"] + @test toks(".\u0387") == ["."=>K".", "\u0387"=>K"⋅"] + + # https://github.com/JuliaLang/julia/pull/40948 + @test tok("−").kind == K"-" + @test tok("−=").kind == K"op=" + @test toks(".−") == ["."=>K".", "−"=>K"-"] +end + +@testset "perp" begin + @test tok("1 ⟂ 2", 3).kind==K"⟂" +end + +@testset "outer" begin + @test tok("outer", 1).kind==K"outer" +end + +@testset "invalid operator errors" begin + @test toks("--") == ["--"=>K"ErrorInvalidOperator"] + @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] + @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] +end + +@testset "hat suffix" begin + @test tok("ŝ", 1).kind==K"Identifier" + @test untokenize(collect(tokenize("ŝ"))[1], "ŝ") == "ŝ" +end + +@testset "suffixed op" begin + s = "+¹" + @test is_operator(tok(s, 1).kind) + @test untokenize(collect(tokenize(s))[1], s) == s +end + +@testset "circ arrow right op" begin + s = "↻" + @test collect(tokenize(s))[1].kind == K"↻" +end + +@testset "invalid float" begin + s = ".0." + @test collect(tokenize(s))[1].kind == K"ErrorInvalidNumericConstant" +end + +@testset "allow prime after end" begin + @test tok("begin end'", 4).kind === K"'" +end + +@testset "new ops" begin + ops = [ + raw"= += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ~ := $=" + raw"=>" + raw"?" + raw"← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ -->" + raw"||" + raw"&&" + raw"> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:" + raw"<|" + raw"|>" + raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯" + raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" + raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" + raw"//" + raw"<< >> >>>" + raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" + raw"::" + raw"." + ] + if VERSION >= v"1.6.0" + push!(ops, raw"<-- <-->") + end + if VERSION >= v"1.7.0" + append!(ops, [ + "−" + "\u00b7 \u0387" + "⫪ ⫫" + ]) + end + if VERSION >= v"1.10-DEV" + push!(ops, "⥷ ⥺ ⟇") + end + allops = split(join(ops, " "), " ") + @test all(s->Base.isoperator(Symbol(s)) == is_operator(first(collect(tokenize(s))).kind), allops) + + # "\U1f8b2" added in Julia 1.12 + @test is_operator(first(collect(tokenize("🢲")))) +end + +const all_kws = Set([ + # Keywords + "baremodule", + "begin", + "break", + "catch", + "const", + "continue", + "do", + "else", + "elseif", + "end", + "export", + "finally", + "for", + "function", + "global", + "if", + "import", + "let", + "local", + "macro", + "module", + "public", + "quote", + "return", + "struct", + "try", + "using", + "while", + # Contextual keywords + "abstract", + "as", + "doc", + "mutable", + "outer", + "primitive", + "type", + "var", + # Word-like operators + "in", + "isa", + "where", +]) + +function check_kw_hashes(iter) + for cs in iter + str = String([cs...]) + if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash) + @test str in all_kws + end + end +end + +@testset "simple_hash" begin + @test length(all_kws) == length(Tokenize._kw_hash) + + @testset "Length $len keywords" for len in 1:5 + check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...)) + end +end + + +@testset "UTF-8 BOM" begin + @test kind.(collect(tokenize("\ufeff[1\ufeff2]"))) == [ + K"Whitespace", + K"[", + K"Integer", + K"Whitespace", + K"Integer", + K"]", + K"EndMarker" + ] +end + +@testset "lexer initialization" begin + # Ranges of EndMarker + @test (t = last(collect(tokenize("+"))); (t.startbyte, t.endbyte)) == (1,0) + @test (t = last(collect(tokenize("+*"))); (t.startbyte, t.endbyte)) == (2,1) +end + +@testset "invalid UTF-8 characters" begin + @test onlytok("\x00") == K"ErrorUnknownCharacter" + @test onlytok("₁") == K"ErrorIdentifierStart" + + bad_chars = [ + first("\xe2") # malformed + first("\xc0\x9b") # overlong + first("\xf0\x83\x99\xae") # overlong + ] + + @testset "bad char $(repr(c))" for c in bad_chars + @test Tokenize.is_identifier_char(c) == false + @test Tokenize.is_identifier_start_char(c) == false + @test Tokenize.is_never_id_char(c) == true + @test Tokenize.is_dottable_operator_start_char(c) == false + @test Tokenize.isopsuffix(c) == false + @test Tokenize.is_operator_start_char(c) == false + @test Tokenize.iswhitespace(c) == false + @test Tokenize.ishex(c) == false + end +end + +@testset "unbalanced bidirectional unicode" begin + open_embedding = ['\U202A', '\U202B', '\U202D', '\U202E'] + close_embedding = '\U202C' + open_isolate = ['\U2066', '\U2067', '\U2068'] + close_isolate = '\U2069' + close_all = '\n' + + all_bidi_codes = [open_embedding; close_embedding; open_isolate; close_isolate] + + bidi_pairs = [Iterators.product(open_embedding, [close_embedding, close_all])..., + Iterators.product(open_isolate, [close_isolate, close_all])...] + + @testset "delimiter $kd" for (kd, chunk_kind) in [ + (K"\"", K"String"), + (K"\"\"\"", K"String"), + (K"`", K"CmdString"), + (K"```", K"CmdString") + ] + d = string(kd) + @testset "Single unbalanced codes" begin + for c in all_bidi_codes + @test toks("$d$c$d") == + [d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd] + @test toks("pfx$d$c$d") == + ["pfx"=>K"Identifier", d=>kd, "$c"=>K"ErrorBidiFormatting", d=>kd] + end + end + @testset "Balanced pairs" begin + for (openc, closec) in bidi_pairs + str = "$(openc)##$(closec)" + @test toks("$d$str$d") == + [d=>kd, str=>chunk_kind, d=>kd] + @test toks("pfx$d$str$d") == + ["pfx"=>K"Identifier", d=>kd, str=>chunk_kind, d=>kd] + end + end + end + + @testset "multi line comments" begin + @testset "Single unbalanced codes" begin + for c in all_bidi_codes + comment = "#=$c=#" + @test toks(comment) == [comment=>K"ErrorBidiFormatting"] + end + end + @testset "Balanced pairs" begin + for (openc, closec) in bidi_pairs + str = "#=$(openc)zz$(closec)=#" + @test toks(str) == [str=>K"Comment"] + end + end + end + + @testset "extended balanced/unbalanced bidi state" begin + @testset "delimiter $kd" for (kd, chunk_kind) in [ + (K"\"", K"String"), + (K"\"\"\"", K"String"), + (K"`", K"CmdString"), + (K"```", K"CmdString") + ] + d = string(kd) + for balanced in [# Balanced pairs + "\u202a\u202bzz\u202c\u202c" + "\u2066\u2067zz\u2069\u2069" + # Newline is complete bidi state reset + "\u202a\u2067zz\n" + "\u202a\u202azz\n" + # \r\n and \n terminate a line + "\u202azz\r\n" + ] + @test toks("$d$balanced$d") == [ + d=>kd + balanced=>chunk_kind + d=>kd + ] + end + for unbalanced in ["\u202azz\u202c\u202c" + "\u202a\u202bzz\u202c" + # \r does not terminate a bidi line + "\u202azz\r" + ] + @test toks("$d$unbalanced$d") == [ + d=>kd + unbalanced=>K"ErrorBidiFormatting" + d=>kd + ] + end + end + end + + # Interpolations reset bidi state + @test toks("\"\u202a\$zz\n\"") == [ + "\""=>K"\"" + "\u202a"=>K"ErrorBidiFormatting" + "\$"=>K"$" + "zz"=>K"Identifier" + "\n"=>K"String" + "\""=>K"\"" + ] + @testset "newline escaping" begin + @test toks("\"a\u202a\\\n\"") == [ + "\""=>K"\"" + "a\u202a"=>K"String" + "\\\n"=>K"Whitespace" + "\""=>K"\"" + ] + @test toks("\"a\u202a\\\r\n\"") == [ + "\""=>K"\"" + "a\u202a"=>K"String" + "\\\r\n"=>K"Whitespace" + "\""=>K"\"" + ] + @test toks("\"a\u202a\\\r\"") == [ + "\""=>K"\"" + "a\u202a"=>K"ErrorBidiFormatting" + "\\\r"=>K"Whitespace" + "\""=>K"\"" + ] + end + + @testset "delimiter '" begin + for c in all_bidi_codes + @test toks("'$c'") == ["'"=>K"'", "$c"=>K"Char", "'"=>K"'"] + end + end +end + +@testset "dotop miscellanea" begin + @test strtok("a .-> b") == ["a", " ", ".", "-", ">", " ", "b", ""] + @test strtok(".>: b") == [".", ">:", " ", "b", ""] + @test strtok(".<: b") == [".", "<:", " ", "b", ""] + @test strtok("a ||₁ b") == ["a", " ", "||", "₁", " ", "b", ""] + @test strtok("a ||̄ b") == ["a", " ", "||", "̄", " ", "b", ""] + @test strtok("a .||₁ b") == ["a", " ", ".", "||", "₁", " ", "b", ""] + @test strtok("a &&₁ b") == ["a", " ", "&&", "₁", " ", "b", ""] + @test strtok("a &&̄ b") == ["a", " ", "&&", "̄", " ", "b", ""] + @test strtok("a .&&₁ b") == ["a", " ", ".", "&&", "₁", " ", "b", ""] +end + +end diff --git a/JuliaSyntax/test/utils.jl b/JuliaSyntax/test/utils.jl new file mode 100644 index 0000000000000..371da98c9e174 --- /dev/null +++ b/JuliaSyntax/test/utils.jl @@ -0,0 +1,24 @@ +@testset "_printstyled" begin + ps(str; kws...) = sprint(io->JuliaSyntax._printstyled(IOContext(io, :color=>true), str; kws...)) + + @test ps("XX"; fgcolor=:red) == "\e[31mXX\e[0;0m" + @test ps("XX"; fgcolor=42) == "\e[38;5;42mXX\e[0;0m" + @test ps("XX"; fgcolor=(10,100,200)) == "\e[38;2;10;100;200mXX\e[0;0m" + + ps("XX"; bgcolor=:red) == "\e[41mXX\e[0;0m" + @test ps("XX"; bgcolor=42) == "\e[48;5;42mXX\e[0;0m" + @test ps("XX"; bgcolor=(10,100,200)) == "\e[48;2;10;100;200mXX\e[0;0m" + + @test ps("XX"; href="https://www.example.com") == + "\e]8;;https://www.example.com\e\\XX\e[0;0m\e]8;;\e\\" + + @test ps("XX", fgcolor=:red, bgcolor=:green, href="https://www.example.com") == + "\e]8;;https://www.example.com\e\\\e[31m\e[42mXX\e[0;0m\e]8;;\e\\" +end + +@testset "ambiguities" begin + if VERSION >= v"1.8" + @test detect_ambiguities(JuliaSyntax) == [] + @test detect_unbound_args(JuliaSyntax) == [] + end +end diff --git a/JuliaSyntax/tools/bump_in_Base.jl b/JuliaSyntax/tools/bump_in_Base.jl new file mode 100644 index 0000000000000..aec2876deb645 --- /dev/null +++ b/JuliaSyntax/tools/bump_in_Base.jl @@ -0,0 +1,74 @@ +function find_checksum_files(checksum_dir) + filter(readdir(checksum_dir, join=true)) do path + occursin(r"^JuliaSyntax-", basename(path)) + end +end + +function bump_in_Base(julia_dir, juliasyntax_dir, juliasyntax_branch_or_commit) + julia_git_dir = joinpath(julia_dir, ".git") + JuliaSyntax_git_dir = joinpath(juliasyntax_dir, ".git") + if !isdir(julia_git_dir) + @error "Julia .git directory not found" julia_git_dir + return 1 + end + if !isdir(JuliaSyntax_git_dir) + @error "JuliaSyntax .git directory not found" JuliaSyntax_git_dir + return 1 + end + + @info "Vendoring JuliaSyntax into Base" julia_dir juliasyntax_branch_or_commit + + remote_containing_branches = filter(b->occursin(r"^origin/(main|release-.*)$", b), + strip.(split( + read(`git --git-dir=$JuliaSyntax_git_dir branch -r --contains $juliasyntax_branch_or_commit`, String), + '\n', keepempty=false))) + if isempty(remote_containing_branches) + @warn "No remote main or release branches contain the given commit. This is ok for testing, but is otherwise an error." juliasyntax_branch_or_commit + else + @info "Given commit is accessible on remote branch" remote_containing_branches + end + + commit_sha = strip(String(read(`git --git-dir=$JuliaSyntax_git_dir show -s --pretty=tformat:%H $juliasyntax_branch_or_commit`))) + + cd(julia_dir) do + status = read(`git status --porcelain --untracked-files=no`, String) + if status != "" + @error "Julia git directory contains uncommitted changes" status=Text(status) + return 1 + end + + verfile_path = joinpath("deps", "JuliaSyntax.version") + @info "Updating JuliaSyntax.version" verfile_path + write(verfile_path, replace(read(verfile_path, String), r"JULIASYNTAX_SHA1.*"=>"JULIASYNTAX_SHA1 = "*commit_sha)) + run(`git add $verfile_path`) + + @info "Updating JuliaSyntax checksums" + deps_dir = "deps" + checksum_dir = joinpath(deps_dir, "checksums") + old_checksum_paths = find_checksum_files(checksum_dir) + if !isempty(old_checksum_paths) + run(`git rm -rf $old_checksum_paths`) + end + run(`make -C $deps_dir`) + run(`git add $(find_checksum_files(checksum_dir))`) + + # Force rebuild of Base to include the newly vendored JuliaSyntax next time Julia is built. + # (TODO: fix the Makefile instead?) + touch("base/Base.jl") + + @info "JuliaSyntax version updated. You can now test or commit the following changes" + run(`git diff --cached`) + end + + return 0 +end + +if !isinteractive() + if length(ARGS) != 2 + println("Usage: bump_in_Base.jl \$julia_dir \$juliasyntax_branch_or_commit") + exit(1) + else + juliasyntax_dir = dirname(@__DIR__) + exit(bump_in_Base(ARGS[1], juliasyntax_dir, ARGS[2])) + end +end diff --git a/JuliaSyntax/tools/check_all_packages.jl b/JuliaSyntax/tools/check_all_packages.jl new file mode 100644 index 0000000000000..32f255e0cb6ea --- /dev/null +++ b/JuliaSyntax/tools/check_all_packages.jl @@ -0,0 +1,99 @@ +# hacky script to parse all Julia files in all packages in General +# to Exprs and report errors +# +# Run this after registry_download.jl (so the pkgs directory is populated). + +using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization + +include("../test/test_utils.jl") +include("../test/fuzz_test.jl") + +srcpaths = isempty(ARGS) ? [joinpath(@__DIR__, "pkgs")] : abspath.(ARGS) +source_paths = vcat(find_source_in_path.(srcpaths)...) + +file_count = length(source_paths) + +exception_count = 0 +mismatch_count = 0 +t0 = time() +exceptions = [] + +all_reduced_failures = String[] + +Logging.with_logger(TerminalLogger()) do + global exception_count, mismatch_count, t0 + @withprogress for (ifile, fpath) in enumerate(source_paths) + @logprogress ifile/file_count time_ms=round((time() - t0)/ifile*1000, digits = 2) + text = read(fpath, String) + expr_cache = fpath*".Expr" + e2 = if isfile(expr_cache) + open(deserialize, fpath*".Expr") + else + @warn "Expr cache not found, parsing using reference parser" expr_cache maxlog=1 + JuliaSyntax.fl_parseall(text, filename=fpath) + end + @assert Meta.isexpr(e2, :toplevel) + try + e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true) + if !exprs_roughly_equal(e2, e1) + mismatch_count += 1 + failing_source = sprint(context=:color=>true) do io + for c in reduce_tree(parseall(SyntaxNode, text)) + JuliaSyntax.highlight(io, c.source, JuliaSyntax.byte_range(c), context_lines_inner=5) + println(io, "\n") + end + end + reduced_failures = reduce_text.(reduce_tree(text), + parsers_fuzzy_disagree) + append!(all_reduced_failures, reduced_failures) + @error("Parsers succeed but disagree", + fpath, + failing_source=Text(failing_source), + reduced_failures, + ) + end + catch err + err isa InterruptException && rethrow() + ex = (err, catch_backtrace()) + push!(exceptions, ex) + ref_parse = "success" + if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) + ref_parse = "fail" + if err isa JuliaSyntax.ParseError + # Both parsers agree that there's an error, and + # JuliaSyntax didn't have an internal error. + continue + end + end + + exception_count += 1 + parse_to_syntax = "success" + try + JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, code) + catch err2 + parse_to_syntax = "fail" + end + @error "Parse failed" fpath exception=ex parse_to_syntax + end + end +end + +t_avg = round((time() - t0)/file_count*1000, digits = 2) + +println() +@info """ + Finished parsing $file_count files. + $(exception_count) failures compared to reference parser + $(mismatch_count) Expr mismatches + $(t_avg)ms per file""" + +open(joinpath(@__DIR__, "reduced_failures.jl"), write=true) do io + for str in all_reduced_failures + println(io, repr(str)) + end + for str in all_reduced_failures + println(io, "#------------------------------") + println(io, str) + println(io) + end +end diff --git a/JuliaSyntax/tools/registry_download.jl b/JuliaSyntax/tools/registry_download.jl new file mode 100644 index 0000000000000..e866a6ee72a94 --- /dev/null +++ b/JuliaSyntax/tools/registry_download.jl @@ -0,0 +1,46 @@ +# Hacky script to download the latest version of all packages registered in the +# General registry for testing the parser. +# +# This uses internal Pkg APIs and seems to work on Julia 1.7 + +using Pkg +using Downloads + +registry = only(filter(r->r.name == "General", Pkg.Registry.reachable_registries())) + +packages = [] + +for (uuid,pkg) in registry + versions = collect(Pkg.Registry.registry_info(pkg).version_info) + latest_ver, ver_info = last(sort(versions, by=first)) + if ver_info.yanked + continue + end + + push!(packages, (; uuid, pkg.name, version=latest_ver, ver_info.git_tree_sha1)) + +end + +server = Pkg.pkg_server() +output_dir = "pkgs" +mkpath(output_dir) + +asyncmap(packages, ntasks=5) do pkg + url = "$server/package/$(pkg.uuid)/$(pkg.git_tree_sha1)" + outfile_path = joinpath(output_dir, "$(pkg.name)_$(pkg.version).tgz") + if isfile(outfile_path) + @info "Skipping package" pkg + return outfile_path + else + @info "Download package" url outfile_path + for i=1:5 + try + Downloads.download(url, outfile_path) + break + catch + @error "Error downloading" pkg exception=current_exceptions() + end + sleep(i) + end + end +end diff --git a/JuliaSyntax/tools/untar_packages.jl b/JuliaSyntax/tools/untar_packages.jl new file mode 100644 index 0000000000000..2c6986890bb63 --- /dev/null +++ b/JuliaSyntax/tools/untar_packages.jl @@ -0,0 +1,68 @@ +using Serialization +using JuliaSyntax + +pkgspath = joinpath(@__DIR__, "pkgs") +tarspath = joinpath(@__DIR__, "pkg_tars") + +mkpath(pkgspath) +mkpath(tarspath) + +tar_info = [(m = match(r"(.*)_(\d+\.\d+\.\d+.*)\.tgz$", f); (f, m[1], VersionNumber(m[2]))) + for f in readdir(tarspath) if endswith(f, ".tgz")] + +tar_maxver = Dict{String,VersionNumber}() +for (_,name,ver) in tar_info + v = get(tar_maxver, name, v"0.0.0") + if v < ver + tar_maxver[name] = ver + end +end + +@info "# Untarring packages" + +for tinfos in Iterators.partition(tar_info, 50) + @sync for (tarname, pkgname, pkgver) in tinfos + @async begin + dir = joinpath(pkgspath, "$(pkgname)_$(pkgver)") + if pkgver != tar_maxver[pkgname] + if isdir(dir) + # Clean up old packages + rm(dir; recursive=true, force=true) + end + elseif !isdir(dir) || !isdir(joinpath(dir, "src")) + rm(dir; recursive=true, force=true) + mkpath(dir) + tar_path = joinpath(tarspath, tarname) + try + run(`tar -xf $tar_path -C $dir`) + catch err + @error "could not untar $tar_path" + end + end + end + end +end + +@info "# Parsing files with reference parser" + +let i = 0, tot_files = 0 + for (r, _, files) in walkdir(pkgspath) + for f in files + tot_files += 1 + endswith(f, ".jl") || continue + fpath = joinpath(r, f) + outpath = joinpath(r, f*".Expr") + if !islink(fpath) && isfile(fpath) && !isfile(outpath) + code = read(fpath, String) + fl_ex = JuliaSyntax.fl_parseall(code, filename=fpath) + i += 1 + if i % 100 == 0 + @info "$i/$tot_files files parsed" + end + open(outpath, "w") do io + serialize(io, fl_ex) + end + end + end + end +end diff --git a/Makefile b/Makefile index 462f7dddd161b..2b3d869d024f2 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ ifndef JULIA_VAGRANT_BUILD endif endif -TOP_LEVEL_PKGS := Compiler +TOP_LEVEL_PKGS := Compiler JuliaSyntax JuliaLowering TOP_LEVEL_PKG_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/,$(TOP_LEVEL_PKGS)) diff --git a/base/Base.jl b/base/Base.jl index 92d3ad2c04059..c5513b0af0ce3 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -315,9 +315,9 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) @eval Core const Compiler = $Base.Compiler @eval Compiler const fl_parse = $Base.fl_parse -# External libraries vendored into Base +# Compiler frontend Core.println("JuliaSyntax/src/JuliaSyntax.jl") -include(@__MODULE__, string(BUILDROOT, "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl) +include(@__MODULE__, string(DATAROOT, "julia/JuliaSyntax/src/JuliaSyntax.jl")) end_base_include = time_ns() diff --git a/base/sysimg.jl b/base/sysimg.jl index fd71544c205cc..28da6b46bbf7c 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -153,7 +153,12 @@ end empty!(Base.TOML_CACHE.d) Base.TOML.reinit!(Base.TOML_CACHE.p, "") -@eval Base BUILDROOT = "" + +# Clear some build-related globals (TODO: Use Base.delete_binding?) +@eval Base begin + DATAROOT = "" + BUILDROOT = "" +end @eval Sys begin BINDIR = "" STDLIB = "" diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk deleted file mode 100644 index 4a8afa8fbd53c..0000000000000 --- a/deps/JuliaSyntax.mk +++ /dev/null @@ -1,16 +0,0 @@ -$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR))) - -$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted - @# no build steps - echo 1 > $@ - -$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(BUILDROOT)/base)) - -clean-JuliaSyntax: - -rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled -get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE) -extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted -configure-JuliaSyntax: extract-JuliaSyntax -compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled -fastcheck-JuliaSyntax: check-JuliaSyntax -check-JuliaSyntax: compile-JuliaSyntax diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version deleted file mode 100644 index 94f480c65dcf7..0000000000000 --- a/deps/JuliaSyntax.version +++ /dev/null @@ -1,4 +0,0 @@ -JULIASYNTAX_BRANCH = main -JULIASYNTAX_SHA1 = 99e975a726a82994de3f8e961e6fa8d39aed0d37 -JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git -JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1 diff --git a/deps/Makefile b/deps/Makefile index 392b4fad2b2e2..f6e55d23e6ac3 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -36,8 +36,6 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST) # prevent installing libs into usr/lib64 on opensuse unexport CONFIG_SITE -DEP_LIBS := JuliaSyntax - ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0) DEP_LIBS += blastrampoline endif @@ -212,7 +210,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS) DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \ openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \ objconv openssl libssh2 nghttp2 curl libgit2 libwhich zlib zstd p7zip csl \ - sanitizers libsuitesparse lld libtracyclient ittapi nvtx JuliaSyntax \ + sanitizers libsuitesparse lld libtracyclient ittapi nvtx \ terminfo mmtk_julia DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL) @@ -281,7 +279,4 @@ include $(SRCDIR)/terminfo.mk # MMTk include $(SRCDIR)/mmtk_julia.mk -# vendored Julia libs -include $(SRCDIR)/JuliaSyntax.mk - include $(SRCDIR)/tools/uninstallers.mk diff --git a/sysimage.mk b/sysimage.mk index 296a137c12fcc..e7917875e0ef2 100644 --- a/sysimage.mk +++ b/sysimage.mk @@ -76,9 +76,12 @@ COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \ base/traits.jl \ base/tuple.jl) COMPILER_SRCS += $(shell find $(JULIAHOME)/Compiler/src -name \*.jl -and -not -name verifytrim.jl -and -not -name show.jl) +# Julia-based compiler frontend is bootstrapped into Base for now +COMPILER_FRONTEND_SRCS = $(shell find $(JULIAHOME)/JuliaSyntax/src -name \*.jl) # sort these to remove duplicates BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \ $(shell find $(BUILDROOT)/base -name \*.jl -and -not -name sysimg.jl)) \ + $(COMPILER_FRONTEND_SRCS) \ $(JULIAHOME)/Compiler/src/ssair/show.jl \ $(JULIAHOME)/Compiler/src/verifytrim.jl STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIBS_SRCS) diff --git a/test/Makefile b/test/Makefile index 61946e650dee5..37918be958e84 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,7 +11,7 @@ export JULIA_LOAD_PATH := @$(PATHSEP)@stdlib unexport JULIA_PROJECT := unexport JULIA_BINDIR := -TESTGROUPS = unicode strings compiler Compiler +TESTGROUPS = unicode strings compiler Compiler JuliaSyntax TESTS = all default stdlib $(TESTGROUPS) \ $(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \ $(filter-out runtests testdefs relocatedepot, \ diff --git a/test/choosetests.jl b/test/choosetests.jl index ec1ee983a1f4c..b5cd9622ce302 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -30,7 +30,8 @@ const TESTNAMES = [ "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", "smallarrayshrink", "opaque_closure", "filesystem", "download", "scopedvalues", "compileall", "rebinding", - "faulty_constructor_method_should_not_cause_stack_overflows" + "faulty_constructor_method_should_not_cause_stack_overflows", + "JuliaSyntax" ] const INTERNET_REQUIRED_LIST = [ @@ -46,6 +47,12 @@ const INTERNET_REQUIRED_LIST = [ const NETWORK_REQUIRED_LIST = vcat(INTERNET_REQUIRED_LIST, ["Sockets"]) +const TOP_LEVEL_PKGS = [ + "Compiler" + "JuliaSyntax" + "JuliaLowering" +] + function test_path(test) t = split(test, '/') if t[1] in STDLIBS @@ -61,6 +68,9 @@ function test_path(test) elseif t[1] == "Compiler" testpath = length(t) >= 2 ? t[2:end] : ("runtests",) return joinpath(@__DIR__, "..", t[1], "test", testpath...) + elseif t[1] == "JuliaSyntax" + testpath = length(t) >= 2 ? t[2:end] : ("runtests_vendored",) + return joinpath(@__DIR__, "..", t[1], "test", testpath...) else return joinpath(@__DIR__, test) end @@ -225,9 +235,11 @@ function choosetests(choices = []) filter!(!in(tests), unhandled) filter!(!in(skip_tests), tests) + is_package_test(testname) = testname in STDLIBS || testname in TOP_LEVEL_PKGS + new_tests = String[] for test in tests - if test in STDLIBS || test == "Compiler" + if is_package_test(test) testfile = test_path("$test/testgroups") if isfile(testfile) testgroups = readlines(testfile) @@ -238,7 +250,7 @@ function choosetests(choices = []) end end end - filter!(x -> (x != "stdlib" && !(x in STDLIBS) && x != "Compiler") , tests) + filter!(x -> (x != "stdlib" && !is_package_test(x)) , tests) append!(tests, new_tests) requested_all || explicit_pkg || filter!(x -> x != "Pkg", tests)