diff --git a/CHANGELOG.md b/CHANGELOG.md index ad35839..9b17255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added the ability to read a `sourceRanges` attribute from quarto to correctly point to included files in stacktraces [#339]. - Support new requirement from `quarto` to population environment variables explicitly in the notebook process [#306]. - Contributing guidelines with AI assistance policy and AGENTS.md for AI coding assistants [#335]. @@ -468,3 +469,4 @@ caching is enabled. Delete this folder to clear the cache. [#259] [#305]: https://github.com/PumasAI/QuartoNotebookRunner.jl/issues/305 [#306]: https://github.com/PumasAI/QuartoNotebookRunner.jl/issues/306 [#335]: https://github.com/PumasAI/QuartoNotebookRunner.jl/issues/335 +[#339]: https://github.com/PumasAI/QuartoNotebookRunner.jl/issues/339 diff --git a/src/server.jl b/src/server.jl index d0084d4..3ed5c11 100644 --- a/src/server.jl +++ b/src/server.jl @@ -66,6 +66,21 @@ mutable struct File end end +struct SourceRange + file::Union{String,Nothing} + lines::UnitRange{Int} + source_line::Union{Nothing,Int} # first line in source +end + +function SourceRange(file, lines, source_lines::UnitRange) + if length(lines) != length(source_lines) + error( + "Mismatching lengths of lines $lines ($(length(lines))) and source_lines $source_lines ($(length(source_lines)))", + ) + end + SourceRange(file, lines, first(source_lines)) +end + function _has_juliaup() try success(`juliaup --version`) && success(`julia --version`) @@ -339,13 +354,15 @@ function evaluate!( options::Union{String,Dict{String,Any}} = Dict{String,Any}(), chunk_callback = (i, n, c) -> nothing, markdown::Union{String,Nothing} = nothing, + source_ranges::Union{Nothing,Vector} = nothing, ) _check_output_dst(output) options = _parsed_options(options) path = abspath(f.path) if isfile(path) - source_code_hash, raw_chunks, file_frontmatter = raw_text_chunks(f, markdown) + source_code_hash, raw_chunks, file_frontmatter = + raw_text_chunks(f, markdown; source_ranges) merged_options = _extract_relevant_options(file_frontmatter, options) # A change of parameter values must invalidate the source code hash. @@ -583,9 +600,9 @@ write_json(::Nothing, data) = data Return a vector of raw markdown and code chunks from `file` ready for evaluation by `evaluate_raw_cells!`. """ -raw_text_chunks(file::File, ::Nothing) = raw_text_chunks(file.path) -raw_text_chunks(file::File, markdown::String) = - raw_markdown_chunks_from_string(file.path, markdown) +raw_text_chunks(file::File, ::Nothing; source_ranges = nothing) = raw_text_chunks(file.path) +raw_text_chunks(file::File, markdown::String; source_ranges = nothing) = + raw_markdown_chunks_from_string(file.path, markdown; source_ranges) function raw_text_chunks(path::String) endswith(path, ".qmd") && return raw_markdown_chunks(path) @@ -607,7 +624,27 @@ raw_markdown_chunks(path::String) = struct Unset end -function raw_markdown_chunks_from_string(path::String, markdown::String) +function compute_line_file_lookup(nlines, path, source_ranges) + nlines_ranges = maximum(r -> r.lines.stop, source_ranges) # number of lines reported might be different from the markdown string due to quarto bugs + lookup = fill((; file = "unknown", line = 0), nlines_ranges) + for source_range in source_ranges + file::String = something(source_range.file, "unknown") + for line in source_range.lines + source_line = line - first(source_range.lines) + source_range.source_line + lookup[line] = (; file, line = source_line) + end + end + return lookup +end +function compute_line_file_lookup(nlines, path, source_ranges::Nothing) + return [(; file = path, line) for line = 1:nlines] +end + +function raw_markdown_chunks_from_string( + path::String, + markdown::String; + source_ranges = nothing, +) raw_chunks = [] source_code_hash = hash(VERSION) pars = Parser() @@ -616,6 +653,9 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) source_code_hash = hash(file_fromtmatter, source_code_hash) source_lines = collect(eachline(IOBuffer(markdown))) terminal_line = 1 + + line_file_lookup = compute_line_file_lookup(length(source_lines), path, source_ranges) + code_cells = false for (node, enter) in ast if enter && @@ -625,7 +665,7 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) md = join(source_lines[terminal_line:(line-1)], "\n") push!( raw_chunks, - (type = :markdown, source = md, file = path, line = terminal_line), + (; type = :markdown, source = md, line_file_lookup[terminal_line]...), ) if contains(md, r"`{(?:julia|python|r)} ") source_code_hash = hash(md, source_code_hash) @@ -636,7 +676,7 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) # this option could in the future also include a vector of line numbers, which knitr supports. # all other options seem to be quarto-rendering related, like where to put figure captions etc. source = node.literal - cell_options = extract_cell_options(source; file = path, line = line) + cell_options = extract_cell_options(source; line_file_lookup[line]...) evaluate = get(cell_options, "eval", Unset()) if !(evaluate isa Union{Bool,Unset}) error( @@ -649,12 +689,11 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) is_r_toplevel(node) ? :r : error("Unhandled code block language") push!( raw_chunks, - ( + (; type = :code, language = language, source, - file = path, - line, + line_file_lookup[line]..., evaluate, cell_options, ), @@ -666,7 +705,7 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) md = join(source_lines[terminal_line:end], "\n") push!( raw_chunks, - (type = :markdown, source = md, file = path, line = terminal_line), + (; type = :markdown, source = md, line_file_lookup[terminal_line]...), ) if contains(md, r"`{(?:julia|python|r)} ") source_code_hash = hash(md, source_code_hash) @@ -690,7 +729,7 @@ function raw_markdown_chunks_from_string(path::String, markdown::String) if raw_chunks[end].type == :code push!( raw_chunks, - (type = :markdown, source = "", file = path, line = terminal_line), + (; type = :markdown, source = "", file = path, line = terminal_line), ) end @@ -1499,6 +1538,7 @@ function run!( showprogress::Bool = true, options::Union{String,Dict{String,Any}} = Dict{String,Any}(), chunk_callback = (i, n, c) -> nothing, + source_ranges::Union{Nothing,Vector} = nothing, ) try borrow_file!(server, path; options, optionally_create = true) do file @@ -1519,7 +1559,15 @@ function run!( result_task = Threads.@spawn begin try - evaluate!(file, output; showprogress, options, markdown, chunk_callback) + evaluate!( + file, + output; + showprogress, + options, + markdown, + chunk_callback, + source_ranges, + ) finally put!(file.run_decision_channel, :evaluate_finished) end diff --git a/src/socket.jl b/src/socket.jl index 88b5d97..47313dd 100644 --- a/src/socket.jl +++ b/src/socket.jl @@ -345,9 +345,6 @@ function _handle_response_internal( # Running: if type == "run" - options = _get_options(request.content) - markdown = _get_markdown(options) - function chunk_callback(i, n, chunk) _write_json( socket, @@ -362,6 +359,10 @@ function _handle_response_internal( end result = try + options = _get_options(request.content) + markdown = _get_markdown(options) + source_ranges = _get_source_ranges(request.content) + (; notebook = run!( notebooks, @@ -370,6 +371,7 @@ function _handle_response_internal( markdown, showprogress, chunk_callback, + source_ranges, ) ) catch error @@ -464,6 +466,26 @@ _get_file(content::String) = content _get_options(content::Dict) = get(Dict{String,Any}, content, "options") _get_options(::String) = Dict{String,Any}() +function _get_source_ranges(content::Dict) + ranges = get(content, "sourceRanges", nothing) + ranges === nothing && return nothing + return map(ranges) do range + file = get(range, "file", nothing) + _lines::Vector{Int} = range["lines"] + @assert length(_lines) == 2 + lines = _lines[1]:_lines[2] + _source_lines::Union{Nothing,Vector{Int}} = get(range, "sourceLines", nothing) + source_lines = if _source_lines === nothing + 1:length(lines) # source lines are only missing in degenerate cases like additional newlines anyway so this doesn't really matter + else + @assert length(_source_lines) == 2 + _source_lines[1]:_source_lines[2] + end + SourceRange(file, lines, source_lines) + end +end +_get_source_ranges(::String) = nothing + function _get_nested(d::Dict, keys...) _d = d for key in keys diff --git a/test/examples/sourceranges/to_include.qmd b/test/examples/sourceranges/to_include.qmd new file mode 100644 index 0000000..60f0d85 --- /dev/null +++ b/test/examples/sourceranges/to_include.qmd @@ -0,0 +1,3 @@ +```{julia} +print("$(@__FILE__):$(@__LINE__)") +``` \ No newline at end of file diff --git a/test/examples/sourceranges/with_include.qmd b/test/examples/sourceranges/with_include.qmd new file mode 100644 index 0000000..271ece9 --- /dev/null +++ b/test/examples/sourceranges/with_include.qmd @@ -0,0 +1,9 @@ +--- +engine: julia +--- + +{{< include to_include.qmd >}} + +```{julia} +print("$(@__FILE__):$(@__LINE__)") +``` \ No newline at end of file diff --git a/test/testsets/socket_server/client.js b/test/testsets/socket_server/client.js index 3f06ad6..0b4d3b8 100644 --- a/test/testsets/socket_server/client.js +++ b/test/testsets/socket_server/client.js @@ -22,27 +22,30 @@ function handle() { const isready = () => toJSON({ type: 'isready', content: '' }); const status = () => toJSON({ type: 'status', content: '' }); - const notebook = (arg) => { - if (path.isAbsolute(arg)) { - return arg + const content = (arg) => { + if (typeof arg === 'string') { + if (path.isAbsolute(arg)) { + return arg + } + throw new Error('No notebook with absolute path specified.'); } - throw new Error('No notebook with absolute path specified.'); + return arg; } const type = process.argv[4]; - const arg = process.argv[5]; + const arg = process.argv.length >= 6 ? JSON.parse(process.argv[5]) : undefined; switch (type) { case 'run': - return run(notebook(arg)); + return run(content(arg)); case 'close': - return close(notebook(arg)); + return close(content(arg)); case 'forceclose': - return forceclose(notebook(arg)); + return forceclose(content(arg)); case 'stop': return stop(); case 'isopen': - return isopen(notebook(arg)); + return isopen(content(arg)); case 'isready': return isready(); case 'status': diff --git a/test/testsets/socket_server/socket_server.jl b/test/testsets/socket_server/socket_server.jl index 5e024f8..3441938 100644 --- a/test/testsets/socket_server/socket_server.jl +++ b/test/testsets/socket_server/socket_server.jl @@ -12,17 +12,25 @@ include("../../utilities/prelude.jl") @test json(`$node $client $(server.port) $(server.key) isready`) - d1 = json(`$node $client $(server.port) $(server.key) isopen $(cell_types)`) + d1 = json( + `$node $client $(server.port) $(server.key) isopen $(JSON3.write(cell_types))`, + ) @test d1 == false - d2 = json(`$node $client $(server.port) $(server.key) run $(cell_types)`) + d2 = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(cell_types))`, + ) @test length(d2["notebook"]["cells"]) == 9 - d3 = json(`$node $client $(server.port) $(server.key) isopen $(cell_types)`) + d3 = json( + `$node $client $(server.port) $(server.key) isopen $(JSON3.write(cell_types))`, + ) @test d3 == true t_before_run = Dates.now() - d4 = json(`$node $client $(server.port) $(server.key) run $(cell_types)`) + d4 = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(cell_types))`, + ) t_after_run = Dates.now() @test d2 == d4 @@ -31,20 +39,26 @@ include("../../utilities/prelude.jl") @test occursin("workers active: 1", d5) @test occursin(abspath(cell_types), d5) - d6 = json(`$node $client $(server.port) $(server.key) close $(cell_types)`) + d6 = json( + `$node $client $(server.port) $(server.key) close $(JSON3.write(cell_types))`, + ) @test d6["status"] == true - d7 = json(`$node $client $(server.port) $(server.key) isopen $(cell_types)`) + d7 = json( + `$node $client $(server.port) $(server.key) isopen $(JSON3.write(cell_types))`, + ) @test d7 == false - d8 = json(`$node $client $(server.port) $(server.key) run $(cell_types)`) + d8 = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(cell_types))`, + ) @test d2 == d8 # test that certain commands on notebooks fail while those notebooks are already running sleep_10 = abspath("../../examples/sleep_10.qmd") sleep_task = Threads.@spawn json( - `$node $client $(server.port) $(server.key) run $(sleep_10)`, + `$node $client $(server.port) $(server.key) run $(JSON3.write(sleep_10))`, ) # wait until server lock locks due to the `run` command above @@ -59,10 +73,10 @@ include("../../utilities/prelude.jl") # both of these tasks should then try to access the worker that is busy and fail because # the lock is already held d9_task = Threads.@spawn json( - `$node $client $(server.port) $(server.key) run $(sleep_10)`, + `$node $client $(server.port) $(server.key) run $(JSON3.write(sleep_10))`, ) d10_task = Threads.@spawn json( - `$node $client $(server.port) $(server.key) close $(sleep_10)`, + `$node $client $(server.port) $(server.key) close $(JSON3.write(sleep_10))`, ) d9 = fetch(d9_task) @@ -90,7 +104,7 @@ end sleep_10 = abspath("../../examples/sleep_10.qmd") sleep_task = Threads.@spawn json( - `$node $client $(server.port) $(server.key) run $(sleep_10)`, + `$node $client $(server.port) $(server.key) run $(JSON3.write(sleep_10))`, ) # wait until server lock locks due to the `run` command above @@ -103,7 +117,9 @@ end end # force-closing should kill the worker even if it's running - d1 = json(`$node $client $(server.port) $(server.key) forceclose $(sleep_10)`) + d1 = json( + `$node $client $(server.port) $(server.key) forceclose $(JSON3.write(sleep_10))`, + ) @test d1 == Dict{String,Any}("status" => true) d2 = fetch(sleep_task) @@ -113,9 +129,13 @@ end cell_types = abspath("../../examples/cell_types.qmd") - d3 = json(`$node $client $(server.port) $(server.key) run $(cell_types)`) + d3 = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(cell_types))`, + ) - d4 = json(`$node $client $(server.port) $(server.key) forceclose $(cell_types)`) + d4 = json( + `$node $client $(server.port) $(server.key) forceclose $(JSON3.write(cell_types))`, + ) @test d4 == Dict{String,Any}("status" => true) run(`$node $client $(server.port) $(server.key) stop`) @@ -123,3 +143,102 @@ end wait(server) end end + +@testset "source ranges" begin + cd(@__DIR__) do + with_include = abspath("../../examples/sourceranges/with_include.qmd") + to_include = abspath("../../examples/sourceranges/to_include.qmd") + + with_include_md = read(with_include, String) + to_include_md = read(to_include, String) + + with_include_A, with_include_B = + split(with_include_md, r"{{< include to_include\.qmd >}}\r?\n") + + lines_A = length(split(with_include_A, "\n")) + lines_B = length(split(with_include_B, "\n")) + lines_to_include = length(split(to_include_md, "\n")) + + # this mocks the current behavior of quarto where it sometimes inserts newlines + # without tracking info after includes + empty_line = "" + + full = join([with_include_A, to_include_md, empty_line, with_include_B], "\n") + + ends = cumsum([lines_A, lines_to_include, 1, lines_B]) + + source_ranges = [ + (; file = with_include, lines = [1, ends[1]], sourceLines = [1, lines_A]), + (; + file = to_include, + lines = [ends[1] + 1, ends[2]], + sourceLines = [1, lines_to_include], + ), + (; + # the empty lines that quarto sometimes add lack file and sourceLines + lines = [ends[2] + 1, ends[3]], + ), + (; + file = with_include, + lines = [ends[3] + 1, ends[4]], + sourceLines = [lines_A + 1, lines_A + lines_B], + ), + ] + + node = NodeJS_18_jll.node() + client = joinpath(@__DIR__, "client.js") + server = QuartoNotebookRunner.serve(; showprogress = false) + sleep(1) + json(cmd) = JSON3.read(read(cmd, String), Any) + + options = (; target = (; markdown = (; value = full))) + + full_lines = split(full, "\n") + + content_without_ranges = (; file = with_include, options) + result = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(content_without_ranges))`, + ) + + first_line_with_print = + findfirst(contains(raw"""print("$(@__FILE__):$(@__LINE__)")"""), full_lines) + last_line_with_print = + findlast(contains(raw"""print("$(@__FILE__):$(@__LINE__)")"""), full_lines) + + # check that the FILE/LINE printouts reflect only the concatenated (root) file + @test result["notebook"]["cells"][2]["outputs"][1]["text"] == + "$(with_include):$first_line_with_print" + @test result["notebook"]["cells"][4]["outputs"][1]["text"] == + "$(with_include):$last_line_with_print" + + content_with_ranges = (; file = with_include, sourceRanges = source_ranges, options) + result = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(content_with_ranges))`, + ) + + line_in_with_include = findfirst( + contains(raw"""print("$(@__FILE__):$(@__LINE__)")"""), + collect(eachline(with_include)), + ) + line_in_to_include = findfirst( + contains(raw"""print("$(@__FILE__):$(@__LINE__)")"""), + collect(eachline(to_include)), + ) + + # check that the FILE/LINE printouts reflect the original files and line numbers + @test result["notebook"]["cells"][2]["outputs"][1]["text"] == + "$(to_include):$line_in_to_include" + @test result["notebook"]["cells"][4]["outputs"][1]["text"] == + "$(with_include):$line_in_with_include" + + # modify one of the source line boundaries so it mismatches + source_ranges[1].sourceLines[2] -= 1 + result = json( + `$node $client $(server.port) $(server.key) run $(JSON3.write(content_with_ranges))`, + ) + @test contains( + result["juliaError"], + "Mismatching lengths of lines 1:5 (5) and source_lines 1:4", + ) + end +end