From 9b27032a2a026ec2a86c447c867b0d5d9c785933 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 24 May 2025 21:35:55 +0800 Subject: [PATCH 1/4] refactor(`val_opts`): optimize options validation logic - Merge `valid_opts` and `valid_opt_types` into a single dictionary constant `VALID_OPTS_TYPES` --- src/DelimitedFiles.jl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/DelimitedFiles.jl b/src/DelimitedFiles.jl index 1da2560..1bd3e82 100644 --- a/src/DelimitedFiles.jl +++ b/src/DelimitedFiles.jl @@ -479,15 +479,24 @@ function readdlm_string(sbuff::String, dlm::AbstractChar, T::Type, eol::Abstract return readdlm_string(sbuff, dlm, T, eol, auto, optsd) end -const valid_opts = [:header, :has_header, :use_mmap, :quotes, :comments, :dims, :comment_char, :skipstart, :skipblanks] -const valid_opt_types = [Bool, Bool, Bool, Bool, Bool, NTuple{2,Integer}, Char, Integer, Bool] +const VALID_OPTS_TYPES = Dict{Symbol, Type}( + :header => Bool, + :has_header => Bool, + :use_mmap => Bool, + :quotes => Bool, + :comments => Bool, + :dims => NTuple{2,Integer}, + :comment_char => Char, + :skipstart => Integer, + :skipblanks => Bool +) function val_opts(opts) d = Dict{Symbol, Union{Bool, NTuple{2, Integer}, Char, Integer}}() for (opt_name, opt_val) in opts - in(opt_name, valid_opts) || + haskey(VALID_OPTS_TYPES, opt_name) || throw(ArgumentError("unknown option $opt_name")) - opt_typ = valid_opt_types[findfirst(isequal(opt_name), valid_opts)::Int] + opt_typ = VALID_OPTS_TYPES[opt_name] isa(opt_val, opt_typ) || throw(ArgumentError("$opt_name should be of type $opt_typ, got $(typeof(opt_val))")) d[opt_name] = opt_val From 442fe92542107c6ac47489a3a90dcc54ba816725 Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 24 May 2025 21:58:49 +0800 Subject: [PATCH 2/4] docs(readdlm): improve keyword arguments documentation format --- src/DelimitedFiles.jl | 52 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/DelimitedFiles.jl b/src/DelimitedFiles.jl index 1bd3e82..a4cf972 100644 --- a/src/DelimitedFiles.jl +++ b/src/DelimitedFiles.jl @@ -171,7 +171,13 @@ readdlm(input, dlm::AbstractChar, eol::AbstractChar; opts...) = readdlm_auto(input, dlm, Float64, eol, true; opts...) """ - readdlm(source, delim::AbstractChar, T::Type, eol::AbstractChar; header=false, skipstart=0, skipblanks=true, use_mmap, quotes=true, dims, comments=false, comment_char='#') + readdlm( + source, delim::AbstractChar, T::Type, eol::AbstractChar + ; header=false, skipstart=0, skipblanks=true, + use_mmap=false, quotes=true, + dims::NTuple{2,Integer}, + comments=false, comment_char='#' + ) Read a matrix from the source where each line (separated by `eol`) gives one row, with elements separated by the given delimiter. The source can be a text file, stream or byte @@ -182,24 +188,32 @@ If `T` is a numeric type, the result is an array of that type, with any non-nume as `NaN` for floating-point types, or zero. Other useful values of `T` include `String`, `AbstractString`, and `Any`. -If `header` is `true`, the first row of data will be read as header and the tuple -`(data_cells, header_cells)` is returned instead of only `data_cells`. - -Specifying `skipstart` will ignore the corresponding number of initial lines from the input. - -If `skipblanks` is `true`, blank lines in the input will be ignored. - -If `use_mmap` is `true`, the file specified by `source` is memory mapped for potential -speedups if the file is large. Default is `false`. On a Windows filesystem, `use_mmap` should not be set -to `true` unless the file is only read once and is also not written to. -Some edge cases exist where an OS is Unix-like but the filesystem is Windows-like. - -If `quotes` is `true`, columns enclosed within double-quote (\") characters are allowed to -contain new lines and column delimiters. Double-quote characters within a quoted field must -be escaped with another double-quote. Specifying `dims` as a tuple of the expected rows and -columns (including header, if any) may speed up reading of large files. If `comments` is -`true`, lines beginning with `comment_char` and text following `comment_char` in any line -are ignored. +Keyword arguments: +- `header::Bool=false`: + if `true`, the first row of data will be read as header and the tuple + `(data_cells, header_cells)` is returned instead of only `data_cells`. +- `skipstart::Integer=0`: + if `skipstart > 0`, ignore the corresponding number of initial lines from the input. +- `skipblanks::Bool=true`: + if `true`, blank lines in the input will be ignored. +- `use_mmap::Bool=false`: + if `true`, the file specified by `source` is memory mapped for potential + speedups if the file is large. + On a Windows filesystem, `use_mmap` should not be set + to `true` unless the file is only read once and is also not written to. + Some edge cases exist where an OS is Unix-like but the filesystem is Windows-like. +- `quotes::Bool=true`: + if `true`, columns enclosed within double-quote (\") characters are allowed to + contain new lines and column delimiters. Double-quote characters within a quoted field must + be escaped with another double-quote. +- `dims::NTuple{2,Integer}`: + a tuple of the expected rows and columns (including header, if any) may speed up + reading of large files. +- `comments::Bool=false`: + if `true`, lines beginning with `comment_char` and text following `comment_char` in any line + are ignored. +- `comment_char::Char='#'`: + the character that marks the beginning of a comment. # Examples ```jldoctest From 859699c712865baa9ed9bb734cf3b8cae1be549f Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 24 May 2025 22:02:00 +0800 Subject: [PATCH 3/4] docs(writedlm): add doc for Keyword arguments Copy from `readdlm` --- src/DelimitedFiles.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/DelimitedFiles.jl b/src/DelimitedFiles.jl index a4cf972..1f8707f 100644 --- a/src/DelimitedFiles.jl +++ b/src/DelimitedFiles.jl @@ -827,6 +827,12 @@ Write `A` (a vector, matrix, or an iterable collection of iterable rows) as text `delim` (which defaults to tab, but can be any printable Julia object, typically a `Char` or `AbstractString`). +Keyword arguments: +- `quotes::Bool=true`: + if `true`, columns enclosed within double-quote (\") characters are allowed to + contain new lines and column delimiters. Double-quote characters within a quoted field must + be escaped with another double-quote. + For example, two vectors `x` and `y` of the same length can be written as two columns of tab-delimited text to `f` by either `writedlm(f, [x y])` or by `writedlm(f, zip(x, y))`. From 87f73822faabc310d229fffb1d683183f877c3ca Mon Sep 17 00:00:00 2001 From: Chengyu HAN Date: Sat, 24 May 2025 22:06:34 +0800 Subject: [PATCH 4/4] docs: rename Keyword options placeholder --- src/DelimitedFiles.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DelimitedFiles.jl b/src/DelimitedFiles.jl index 1f8707f..9281737 100644 --- a/src/DelimitedFiles.jl +++ b/src/DelimitedFiles.jl @@ -188,7 +188,7 @@ If `T` is a numeric type, the result is an array of that type, with any non-nume as `NaN` for floating-point types, or zero. Other useful values of `T` include `String`, `AbstractString`, and `Any`. -Keyword arguments: +Keyword options: - `header::Bool=false`: if `true`, the first row of data will be read as header and the tuple `(data_cells, header_cells)` is returned instead of only `data_cells`. @@ -820,14 +820,14 @@ function writedlm(fname::AbstractString, a, dlm; opts...) end """ - writedlm(f, A, delim='\\t'; opts) + writedlm(f, A, delim='\\t'; options...) Write `A` (a vector, matrix, or an iterable collection of iterable rows) as text to `f` (either a filename string or an `IO` stream) using the given delimiter `delim` (which defaults to tab, but can be any printable Julia object, typically a `Char` or `AbstractString`). -Keyword arguments: +Keyword `options`: - `quotes::Bool=true`: if `true`, columns enclosed within double-quote (\") characters are allowed to contain new lines and column delimiters. Double-quote characters within a quoted field must