diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96b49aba..0b8a84ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,6 @@ jobs: fail-fast: false matrix: version: - - '1.7' - '1.8' - '1.9' - '1' # automatically expands to the latest stable 1.x release of Julia diff --git a/Project.toml b/Project.toml index 31eaa8b9..c13669bd 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" XML = "72c71f33-b9b6-44de-8c94-c961784809e2" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" @@ -17,11 +18,12 @@ ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" Tables = "1" XML = "0.3.5" ZipArchives = "2" -julia = "1.7" +julia = "1.8" + [extras] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "DataFrames"] +test = ["Test", "DataFrames"] \ No newline at end of file diff --git a/data/customXml.xlsx b/data/customXml.xlsx index 2dc7f9fa..55c580ac 100644 Binary files a/data/customXml.xlsx and b/data/customXml.xlsx differ diff --git a/data/general.xlsx b/data/general.xlsx index a5c92d67..9ec8612e 100644 Binary files a/data/general.xlsx and b/data/general.xlsx differ diff --git a/docs/src/api.md b/docs/src/api.md index 2a2b32bb..34cb938f 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -41,4 +41,8 @@ XLSX.setUniformAlignment XLSX.setUniformStyle XLSX.setColumnWidth XLSX.setRowHeight +XLSX.getMergedCells +XLSX.isMergedCell +XLSX.getMergedBaseCell +XLSX.addDefinedName ``` diff --git a/src/XLSX.jl b/src/XLSX.jl index 59309249..deadb45c 100644 --- a/src/XLSX.jl +++ b/src/XLSX.jl @@ -7,6 +7,7 @@ import Printf.@printf import ZipArchives import XML import Tables +import Unicode import Base.convert const SPREADSHEET_NAMESPACE_XPATH_ARG = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" diff --git a/src/cellformats.jl b/src/cellformats.jl index 0c80d10b..42bca51b 100644 --- a/src/cellformats.jl +++ b/src/cellformats.jl @@ -220,11 +220,21 @@ function process_sheetcell(f::Function, xl::XLSXFile, sheetcell::String; kw...): else error("Unexpected defined name value: $v.") end + elseif is_valid_non_contiguous_sheetcellrange(sheetcell) + sheetncrng = NonContiguousRange(sheetcell) + @assert hassheet(xl, sheetncrng.sheet) "Sheet $(ref.sheet) not found." + newid = f(xl[sheetncrng.sheet], sheetncrng; kw...) elseif is_valid_sheet_column_range(sheetcell) sheetcolrng = SheetColumnRange(sheetcell) + @assert hassheet(xl, sheetcolrng.sheet) "Sheet $(ref.sheet) not found." newid = f(xl[sheetcolrng.sheet], sheetcolrng.colrng; kw...) + elseif is_valid_sheet_row_range(sheetcell) + sheetrowrng = SheetRowRange(sheetcell) + @assert hassheet(xl, sheetrowrng.sheet) "Sheet $(ref.sheet) not found." + newid = f(xl[sheetrowrng.sheet], sheetrowrng.rowrng; kw...) elseif is_valid_sheet_cellrange(sheetcell) sheetcellrng = SheetCellRange(sheetcell) + @assert hassheet(xl, sheetcellrng.sheet) "Sheet $(ref.sheet) not found." newid = f(xl[sheetcellrng.sheet], sheetcellrng.rng; kw...) elseif is_valid_sheet_cellname(sheetcell) ref = SheetCellRef(sheetcell) @@ -253,7 +263,7 @@ function process_ranges(f::Function, ws::Worksheet, ref_or_rng::AbstractString; if is_defined_name_value_a_constant(v) error("Can only assign attributes to cells but `$(ref_or_rng)` is a constant: $(ref_or_rng)=$v.") elseif is_defined_name_value_a_reference(v) - if is_non_contiguous_range(v) + if is_valid_non_contiguous_range(string(v)) _ = f.(Ref(get_xlsxfile(wb)), replace.(split(string(v), ","), "'" => "", "\$" => ""); kw...) newid = -1 else @@ -265,6 +275,9 @@ function process_ranges(f::Function, ws::Worksheet, ref_or_rng::AbstractString; elseif is_valid_column_range(ref_or_rng) colrng = ColumnRange(ref_or_rng) newid = f(ws, colrng; kw...) + elseif is_valid_row_range(ref_or_rng) + rowrng = RowRange(ref_or_rng) + newid = f(ws, rowrng; kw...) elseif is_valid_cellrange(ref_or_rng) rng = CellRange(ref_or_rng) newid = f(ws, rng; kw...) @@ -296,6 +309,43 @@ function process_columnranges(f::Function, ws::Worksheet, colrng::ColumnRange; k error("Column range $colrng is out of bounds. Worksheet `$(ws.name)` only has dimension `$dim`.") end end +function process_rowranges(f::Function, ws::Worksheet, rowrng::RowRange; kw...)::Int + bounds = row_bounds(rowrng) + dim = (get_dimension(ws)) + + top = bounds[begin] + bottom = bounds[end] + left = dim.start.column_number + right = dim.stop.column_number + + OK = dim.start.column_number <= left + OK &= dim.stop.column_number >= right + OK &= dim.start.row_number <= top + OK &= dim.stop.row_number >= bottom + + if OK + rng = CellRange(top, left, bottom, right) + return f(ws, rng; kw...) + else + error("Row range $rowrng is out of bounds. Worksheet `$(ws.name)` only has dimension `$dim`.") + end +end +function process_ncranges(f::Function, ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int + bounds = nc_bounds(ncrng) + dim = (get_dimension(ws)) + OK = dim.start.column_number <= bounds.start.column_number + OK &= dim.stop.column_number >= bounds.stop.column_number + OK &= dim.start.row_number <= bounds.start.row_number + OK &= dim.stop.row_number >= bounds.stop.row_number + if OK + for r in ncrng.rng + _ = f(ws, r; kw...) + end + return -1 + else + error("Non-contiguous range $ncrng is out of bounds. Worksheet `$(ws.name)` only has dimension `$dim`.") + end +end function process_cellranges(f::Function, ws::Worksheet, rng::CellRange; kw...)::Int for cellref in rng if getcell(ws, cellref) isa EmptyCell @@ -305,12 +355,12 @@ function process_cellranges(f::Function, ws::Worksheet, rng::CellRange; kw...):: end return -1 # Each cell may have a different attribute Id so we can't return a single value. end -function process_get_sheetcell(f::Function, xl::XLSXFile, sheetcell::String) +function process_get_sheetcell(f::Function, xl::XLSXFile, sheetcell::String; kw...) ref = SheetCellRef(sheetcell) @assert hassheet(xl, ref.sheet) "Sheet $(ref.sheet) not found." - return f(getsheet(xl, ref.sheet), ref.cellref) + return f(getsheet(xl, ref.sheet), ref.cellref; kw...) end -function process_get_cellref(f::Function, ws::Worksheet, cellref::CellRef) +function process_get_cellref(f::Function, ws::Worksheet, cellref::CellRef; kw...) wb = get_workbook(ws) cell = getcell(ws, cellref) @@ -319,21 +369,21 @@ function process_get_cellref(f::Function, ws::Worksheet, cellref::CellRef) end cell_style = styles_cell_xf(wb, parse(Int, cell.style)) - return f(wb, cell_style) + return f(wb, cell_style; kw...) end -function process_get_cellname(f::Function, ws::Worksheet, ref_or_rng::AbstractString) +function process_get_cellname(f::Function, ws::Worksheet, ref_or_rng::AbstractString; kw...) if is_workbook_defined_name(get_workbook(ws), ref_or_rng) wb = get_workbook(ws) v = get_defined_name_value(wb, ref_or_rng) if is_defined_name_value_a_constant(v) # Can these have fonts? - error("Can only assign borderds to cells but `$(ref_or_rng)` is a constant: $(ref_or_rng)=$v.") + error("Can only assign borders to cells but `$(ref_or_rng)` is a constant: $(ref_or_rng)=$v.") elseif is_defined_name_value_a_reference(v) - new_att = f(get_xlsxfile(wb), replace(string(v), "'" => "")) + new_att = f(get_xlsxfile(wb), replace(string(v), "'" => ""); kw...) else error("Unexpected defined name value: $v.") end elseif is_valid_cellname(ref_or_rng) - new_att = f(ws, CellRef(ref_or_rng)) + new_att = f(ws, CellRef(ref_or_rng); kw...) else error("Invalid cell reference or range: $ref_or_rng") end @@ -406,7 +456,7 @@ end setFont(xf::XLSXFile, cr::String, kw...) -> ::Int Set the font used by a single cell, a cell range, a column range or -a named cell or named range in a worksheet or XLSXfile. +row range or a named cell or named range in a worksheet or XLSXfile. Font attributes are specified using keyword arguments: - `bold::Bool = nothing` : set to `true` to make the font bold. @@ -457,6 +507,8 @@ julia> setFont(sh, "A:B"; italic=true, color="FF8888FF", under="single") julia> setFont(xf, "Sheet1!A:B"; italic=true, color="FF8888FF", under="single") # Column range +julia> setFont(xf, "Sheet1!6:12"; italic=false, color="FF8888FF", under="none") # Row range + julia> setFont(sh, "bigred"; size=48, color="FF00FF00") # Named cell or range julia> setFont(xf, "bigred"; size=48, color="FF00FF00") # Named cell or range @@ -466,6 +518,8 @@ julia> setFont(xf, "bigred"; size=48, color="FF00FF00") function setFont end setFont(ws::Worksheet, rng::CellRange; kw...)::Int = process_cellranges(setFont, ws, rng; kw...) setFont(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setFont, ws, colrng; kw...) +setFont(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setFont, ws, rowrng; kw...) +setFont(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setFont, ws, ncrng; kw...) setFont(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setFont, ws, ref_or_rng; kw...) setFont(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setFont, xl, sheetcell; kw...) function setFont(sh::Worksheet, cellref::CellRef; @@ -559,8 +613,8 @@ end setUniformFont(sh::Worksheet, cr::String; kw...) -> ::Int setUniformFont(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the font used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be uniformly the same font. +Set the font used by a cell range, a column range or row range or +a named range in a worksheet or XLSXfile to be uniformly the same font. First, the font attributes of the first cell in the range (the top-left cell) are updated according to the given `kw...` (using `setFont()`). The resultant font is @@ -595,6 +649,8 @@ julia> setUniformFont(sh, "A:B"; italic=true, color="FF8888FF", under="single") julia> setUniformFont(xf, "Sheet1!A:B"; italic=true, color="FF8888FF", under="single") # Column range +julia> setUniformFont(sh, "33"; italic=true, color="FF8888FF", under="single") # Row + julia> setUniformFont(sh, "bigred"; size=48, color="FF00FF00") # Named range julia> setUniformFont(xf, "bigred"; size=48, color="FF00FF00") # Named range @@ -603,6 +659,8 @@ julia> setUniformFont(xf, "bigred"; size=48, color="FF00FF00") """ function setUniformFont end setUniformFont(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setUniformFont, ws, colrng; kw...) +setUniformFont(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformFont, ws, rowrng; kw...) +setUniformFont(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformFont, ws, ncrng; kw...) setUniformFont(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setUniformFont, xl, sheetcell; kw...) setUniformFont(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setUniformFont, ws, ref_or_rng; kw...) setUniformFont(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_attribute(setFont, ws, rng, ["fontId", "applyFont"]; kw...) @@ -669,7 +727,8 @@ function getFont(wb::Workbook, cell_style::XML.Node)::Union{Nothing,CellFont} if isnothing(XML.attributes(c)) || length(XML.attributes(c)) == 0 font_atts[XML.tag(c)] = nothing else - # @assert length(XML.attributes(c)) == 1 "Too many font attributes found for $(XML.tag(c)) Expected 1, found $(length(XML.attributes(c)))." + #@assert length(XML.attributes(c)) == 1 "Too many font attributes found for $(XML.tag(c)) Expected 1, found $(length(XML.attributes(c)))." + for (k, v) in XML.attributes(c) font_atts[XML.tag(c)] = Dict(k => v) end @@ -801,7 +860,7 @@ end setBorder(xf::XLSXFile, cr::String; kw...) -> ::Int Set the borders used used by a single cell, a cell range, a column range or -a named cell or named range in a worksheet or XLSXfile. +row range or a named cell or named range in a worksheet or XLSXfile. Borders are independently defined for the keywords: - `left::Vector{Pair{String,String} = nothing` @@ -877,6 +936,8 @@ Julia> setBorder(xf, "Sheet1!D4"; left = ["style" => "dotted", "color" => "F function setBorder end setBorder(ws::Worksheet, rng::CellRange; kw...)::Int = process_cellranges(setBorder, ws, rng; kw...) setBorder(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setBorder, ws, colrng; kw...) +setBorder(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setBorder, ws, rowrng; kw...) +setBorder(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setBorder, ws, ncrng; kw...) setBorder(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setBorder, ws, ref_or_rng; kw...) setBorder(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setBorder, xl, sheetcell; kw...) function setBorder(sh::Worksheet, cellref::CellRef; @@ -969,8 +1030,8 @@ end setUniformBorder(sh::Worksheet, cr::String; kw...) -> ::Int setUniformBorder(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the border used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be uniformly the same border. +Set the border used by a cell range, a column range or row range or +a named range in a worksheet or XLSXfile to be uniformly the same border. First, the border attributes of the first cell in the range (the top-left cell) are updated according to the given `kw...` (using `setBorder()`). The resultant border is @@ -1010,6 +1071,8 @@ Julia> setUniformBorder(xf, "Sheet1!A1:F20"; left = ["style" => "dotted", "c """ function setUniformBorder end setUniformBorder(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setUniformBorder, ws, colrng; kw...) +setUniformBorder(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformBorder, ws, rowrng; kw...) +setUniformBorder(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformBorder, ws, ncrng; kw...) setUniformBorder(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setUniformBorder, xl, sheetcell; kw...) setUniformBorder(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setUniformBorder, ws, ref_or_rng; kw...) setUniformBorder(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_attribute(setBorder, ws, rng, ["borderId", "applyBorder"]; kw...) @@ -1018,8 +1081,8 @@ setUniformBorder(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_at setOutsideBorder(sh::Worksheet, cr::String; kw...) -> ::Int setOutsideBorder(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the border around the outside of a cell range, a column range or a named -range in a worksheet or XLSXfile. +Set the border around the outside of a cell range, a column range or row range +or a named range in a worksheet or XLSXfile. Two key words can be defined: - `style::String = nothing` : defines the style of the outside border @@ -1030,6 +1093,11 @@ ouside edge of the range will be set to the specified style and color. The borders of internal edges and any diagonal will remain unchanged. Border settings for all internal cells in the range will remain unchanged. +Top and bottom borders for column ranges and left and right borders for +row ranges are taken from the worksheet `dimension`. + +An outside border cannot be set for a non-contiguous range. + The value returned is is -1. For keyword definitions see [`setBorder()`](@ref). @@ -1044,6 +1112,7 @@ Julia> setOutsideBorder(xf, "Sheet1!A1:F20"; style = "dotted", color = "FF000FF0 """ function setOutsideBorder end setOutsideBorder(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setOutsideBorder, ws, colrng; kw...) +setOutsideBorder(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setOutsideBorder, ws, rowrng; kw...) setOutsideBorder(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setOutsideBorder, xl, sheetcell; kw...) setOutsideBorder(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setOutsideBorder, ws, ref_or_rng; kw...) function setOutsideBorder(ws::Worksheet, rng::CellRange; @@ -1202,7 +1271,7 @@ end setFill(xf::XLSXFile, cr::String; kw...) -> ::Int Set the fill used used by a single cell, a cell range, a column range or -a named cell or named range in a worksheet or XLSXfile. +row range or a named cell or named range in a worksheet or XLSXfile. The following keywords are used to define a fill: - `pattern::String = nothing` : Sets the patternType for the fill. @@ -1251,10 +1320,14 @@ Julia> setFill(sh, "B2"; pattern="gray125", bgColor = "FF000000") Julia> setFill(xf, "Sheet1!A1:F20"; pattern="none", fgColor = "88FF8800") +Julia> setFill(sh, "11:24"; pattern="none", fgColor = "88FF8800") + ``` """ function setFill end setFill(ws::Worksheet, rng::CellRange; kw...)::Int = process_cellranges(setFill, ws, rng; kw...) +setFill(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setFill, ws, rowrng; kw...) +setFill(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setFill, ws, ncrng; kw...) setFill(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setFill, ws, colrng; kw...) setFill(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setFill, ws, ref_or_rng; kw...) setFill(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setFill, xl, sheetcell; kw...) @@ -1330,8 +1403,8 @@ end setUniformFill(sh::Worksheet, cr::String; kw...) -> ::Int setUniformFill(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the fill used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be uniformly the same fill. +Set the fill used by a cell range, a column range or row range or a +named range in a worksheet or XLSXfile to be uniformly the same fill. First, the fill attributes of the first cell in the range (the top-left cell) are updated according to the given `kw...` (using `setFill()`). The resultant fill is @@ -1366,6 +1439,8 @@ Julia> setUniformFill(xf, "Sheet1!A1:F20"; pattern="none", fgColor = "88FF8800") """ function setUniformFill end setUniformFill(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setUniformFill, ws, colrng; kw...) +setUniformFill(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformFill, ws, rowrng; kw...) +setUniformFill(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformFill, ws, ncrng; kw...) setUniformFill(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setUniformFill, xl, sheetcell; kw...) setUniformFill(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setUniformFill, ws, ref_or_rng; kw...) setUniformFill(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_attribute(setFill, ws, rng, ["fillId", "applyFill"]; kw...) @@ -1451,7 +1526,7 @@ end setAlignment(xf::XLSXFile, cr::String; kw...) -> ::Int} Set the alignment used used by a single cell, a cell range, a column range or -a named cell or named range in a worksheet or XLSXfile. +row range or a named cell or named range in a worksheet or XLSXfile. The following keywords are used to define an alignment: - `horizontal::String = nothing` : Sets the horizontal alignment. @@ -1499,6 +1574,8 @@ julia> setAlignment(sh, "L6"; horizontal="center", rotation="90", shrink=true, i function setAlignment end setAlignment(ws::Worksheet, rng::CellRange; kw...)::Int = process_cellranges(setAlignment, ws, rng; kw...) setAlignment(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setAlignment, ws, colrng; kw...) +setAlignment(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setAlignment, ws, rowrng; kw...) +setAlignment(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setAlignment, ws, ncrng; kw...) setAlignment(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setAlignment, ws, ref_or_rng; kw...) setAlignment(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setAlignment, xl, sheetcell; kw...) function setAlignment(sh::Worksheet, cellref::CellRef; @@ -1581,8 +1658,8 @@ end setUniformAlignment(sh::Worksheet, cr::String; kw...) -> ::Int setUniformAlignment(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the alignment used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be uniformly the same alignment. +Set the alignment used by a cell range, a column range or row range or a +named range in a worksheet or XLSXfile to be uniformly the same alignment. First, the alignment attributes of the first cell in the range (the top-left cell) are updated according to the given `kw...` (using `setAlignment()`). The resultant alignment @@ -1618,6 +1695,8 @@ Julia> setUniformAlignment(xf, "Sheet1!A1:F20"; horizontal="center", vertical="t """ function setUniformAlignment end setUniformAlignment(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setUniformAlignment, ws, colrng; kw...) +setUniformAlignment(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformAlignment, ws, rowrng; kw...) +setUniformAlignment(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformAlignment, ws, ncrng; kw...) setUniformAlignment(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setUniformAlignment, xl, sheetcell; kw...) setUniformAlignment(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setUniformAlignment, ws, ref_or_rng; kw...) setUniformAlignment(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_attribute(setAlignment, ws, rng; kw...) @@ -1693,7 +1772,7 @@ end setFormat(xf::XLSXFile, cr::String; kw...) -> ::Int Set the format used used by a single cell, a cell range, a column range or -a named cell or named range in a worksheet or XLSXfile. +row range or a named cell or named range in a worksheet or XLSXfile. The function uses one keyword used to define a format: - `format::String = nothing` : Defines a built-in or custom number format @@ -1732,6 +1811,8 @@ julia> XLSX.setFormat(sh, "A2"; format = "_-£* #,##0.00_-;-£* #,##0.00_-;_-£* function setFormat end setFormat(ws::Worksheet, rng::CellRange; kw...)::Int = process_cellranges(setFormat, ws, rng; kw...) setFormat(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setFormat, ws, colrng; kw...) +setFormat(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setFormat, ws, ncrng; kw...) +setFormat(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setFormat, ws, rowrng; kw...) setFormat(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setFormat, ws, ref_or_rng; kw...) setFormat(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setFormat, xl, sheetcell; kw...) function setFormat(sh::Worksheet, cellref::CellRef; @@ -1803,8 +1884,8 @@ end setUniformFormat(sh::Worksheet, cr::String; kw...) -> ::Int setUniformFormat(xf::XLSXFile, cr::String, kw...) -> ::Int -Set the number format used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be to be uniformly the same format. +Set the number format used by a cell range, a column range or row range or a +named range in a worksheet or XLSXfile to be to be uniformly the same format. First, the number format of the first cell in the range (the top-left cell) is updated according to the given `kw...` (using `setFormat()`). The resultant format is @@ -1830,6 +1911,8 @@ julia> XLSX.setUniformFormat(sh, "F1:F5"; format = "Currency") """ function setUniformFormat end setUniformFormat(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setUniformFormat, ws, colrng; kw...) +setUniformFormat(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformFormat, ws, rowrng; kw...) +setUniformFormat(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformFormat, ws, ncrng; kw...) setUniformFormat(xl::XLSXFile, sheetcell::AbstractString; kw...)::Int = process_sheetcell(setUniformFormat, xl, sheetcell; kw...) setUniformFormat(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setUniformFormat, ws, ref_or_rng; kw...) setUniformFormat(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_attribute(setFormat, ws, rng; kw...) @@ -1842,9 +1925,9 @@ setUniformFormat(ws::Worksheet, rng::CellRange; kw...)::Int = process_uniform_at setUniformStyle(sh::Worksheet, cr::String) -> ::Int setUniformStyle(xf::XLSXFile, cr::String) -> ::Int -Set the cell `style` used by a cell range, a column range or a named range in a -worksheet or XLSXfile to be the same as that of the first cell in the range -that is not an `EmptyCell`. +Set the cell `style` used by a cell range, a column range or row range +or a named range in a worksheet or XLSXfile to be the same as that of +the first cell in the range that is not an `EmptyCell`. As a result, every cell in the range will have a uniform `style`. @@ -1868,6 +1951,8 @@ julia> XLSX.setUniformStyle(sh, "F1:F5") """ function setUniformStyle end setUniformStyle(ws::Worksheet, colrng::ColumnRange)::Int = process_columnranges(setUniformStyle, ws, colrng) +setUniformStyle(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setUniformStyle, ws, rowrng; kw...) +setUniformStyle(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setUniformStyle, ws, ncrng; kw...) setUniformStyle(xl::XLSXFile, sheetcell::AbstractString)::Int = process_sheetcell(setUniformStyle, xl, sheetcell) setUniformStyle(ws::Worksheet, ref_or_rng::AbstractString)::Int = process_ranges(setUniformStyle, ws, ref_or_rng) function setUniformStyle(ws::Worksheet, rng::CellRange)::Union{Nothing, Int} @@ -1939,6 +2024,8 @@ julia> XLSX.setColumnWidth(sh, "I"; width = 24.37) """ function setColumnWidth end setColumnWidth(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setColumnWidth, ws, colrng; kw...) +setColumnWidth(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setColumnWidth, ws, rowrng; kw...) +setColumnWidth(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setColumnWidth, ws, ncrng; kw...) setColumnWidth(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setColumnWidth, ws, ref_or_rng; kw...) setColumnWidth(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setColumnWidth, xl, sheetcell; kw...) setColumnWidth(ws::Worksheet, cr::CellRef; kw...)::Int = setColumnWidth(ws::Worksheet, CellRange(cr, cr); kw...) @@ -2032,6 +2119,7 @@ function getColumnWidth end getColumnWidth(xl::XLSXFile, sheetcell::String)::Union{Nothing,Float64} = process_get_sheetcell(getColumnWidth, xl, sheetcell) getColumnWidth(ws::Worksheet, cr::String) = process_get_cellname(getColumnWidth, ws, cr) function getColumnWidth(ws::Worksheet, cellref::CellRef)::Union{Nothing,Real} + # May be better if column width were part of ws.cache? @assert get_xlsxfile(ws).is_writable "Cannot get column width: `XLSXFile` is not writable." @@ -2106,6 +2194,8 @@ julia> XLSX.setRowHeight(sh, "I"; height = 24.56) """ function setRowHeight end setRowHeight(ws::Worksheet, colrng::ColumnRange; kw...)::Int = process_columnranges(setRowHeight, ws, colrng; kw...) +setRowHeight(ws::Worksheet, rowrng::RowRange; kw...)::Int = process_rowranges(setRowHeight, ws, rowrng; kw...) +setRowHeight(ws::Worksheet, ncrng::NonContiguousRange; kw...)::Int = process_ncranges(setRowHeight, ws, ncrng; kw...) setRowHeight(ws::Worksheet, ref_or_rng::AbstractString; kw...)::Int = process_ranges(setRowHeight, ws, ref_or_rng; kw...) setRowHeight(xl::XLSXFile, sheetcell::String; kw...)::Int = process_sheetcell(setRowHeight, xl, sheetcell; kw...) setRowHeight(ws::Worksheet, cr::CellRef; kw...)::Int = setRowHeight(ws::Worksheet, CellRange(cr, cr); kw...) @@ -2134,6 +2224,7 @@ function setRowHeight(ws::Worksheet, rng::CellRange; height::Union{Nothing,Real} if first == true return -1 # All rows were empty end + return 0 # meaningless return value. Int required to comply with reference decoding structure. end @@ -2180,3 +2271,157 @@ function getRowHeight(ws::Worksheet, cellref::CellRef)::Union{Nothing,Real} return -1 # Row specified not found (is empty) end + +# +# -- Get merged cells +# + +""" + getMergedCells(ws::Worksheet) -> Union{Vector{CellRange}, Nothing} + +Return a vector of the `CellRange` of all merged cells in the specified worksheet. +Return nothing if the worksheet contains no merged cells + + +# Examples: +```julia +julia> f = XLSX.readxlsx("test.xlsx") +XLSXFile("C:\\Users\\tim\\Downloads\\test.xlsx") containing 1 Worksheet + sheetname size range +------------------------------------------------- + Sheet1 2x2 A1:B2 + +julia> s = f["Sheet1"] +2×2 XLSX.Worksheet: ["Sheet1"](A1:B2) + +julia> XLSX.getMergedCells(s) +1-element Vector{XLSX.CellRange}: + B1:B2 + +``` +""" +function getMergedCells(ws::Worksheet)::Union{Vector{CellRange}, Nothing} + # May be better if merged cells were part of ws.cache? + + @assert get_xlsxfile(ws).use_cache_for_sheet_data "Cannot get merged cells because cache is not enabled." + + # No need to update the xml file using the worksheet cache first (like we did for column width) + # because we cannot change merged cells in XLSX.jl. + sheetdoc = xmlroot(ws.package, "xl/worksheets/sheet$(ws.sheetId).xml") # find the block in the worksheet's xml file + i, j = get_idces(sheetdoc, "worksheet", "mergeCells") + + if isnothing(j) # There are no existing merged cells. + return nothing + end + + c = XML.children(sheetdoc[i][j]) + @assert length(c) == parse(Int, sheetdoc[i][j]["count"]) "Unexpected number of mergeCells found: $(length(c)). Expected $(sheetdoc[i][j]["count"])." + + mergedCells = Vector{CellRange}() + for cell in c + @assert haskey(cell, "ref") "No `ref` attribute found in `mergeCell` element." + push!(mergedCells, CellRange(cell["ref"])) + end + + return mergedCells +end + +""" + isMergedCell(ws::Worksheet, cr::String) -> Bool + isMergedCell(xf::XLSXFile, cr::String) -> Bool + +Return `true` if a cell is part of a merged cell range and `false` if not. + +Alternatively, if you have already obtained the merged cells for the worksheet, +you can avoid repeated determinations and pass them as a keyword argument to +the function: + + isMergedCell(ws::Worksheet, cr::String; mergedCells::Union{Vector{CellRange}, Nothing, Missing}=missing) -> Bool + isMergedCell(xf::XLSXFile, cr::String; mergedCells::Union{Vector{CellRange}, Nothing, Missing}=missing) -> Bool + +# Examples: +```julia +julia> XLSX.isMergedCell(xf, "Sheet1!A1") + +julia> XLSX.isMergedCell(sh, "A1") + +julia> mc = XLSX.getMergedCells(sh) +julia> XLSX.isMergedCell(sh, XLSX.CellRef("A1"), mc) + +``` +""" +function isMergedCell end +isMergedCell(xl::XLSXFile, sheetcell::String; kw...)::Bool = process_get_sheetcell(isMergedCell, xl, sheetcell; kw...) +isMergedCell(ws::Worksheet, cr::String; kw...)::Bool = process_get_cellname(isMergedCell, ws, cr; kw...) +#isMergedCell(ws::Worksheet, cellref::CellRef)::Bool = isMergedCell(ws, cellref, getMergedCells(ws)) +function isMergedCell(ws::Worksheet, cellref::CellRef; mergedCells::Union{Vector{CellRange}, Nothing, Missing} = missing)::Bool + + @assert get_xlsxfile(ws).use_cache_for_sheet_data "Cannot get merged cells because cache is not enabled." + + if ismissing(mergedCells) # Get mergedCells if missing + mergedCells=getMergedCells(ws) + end + if isnothing(mergedCells) # No merged cells in sheet + return false + end + for rng in mergedCells + if cellref ∈ rng + return true + end + end + + return false +end + +""" + getMergedBaseCell(ws::Worksheet, cr::String) -> Union{Nothing, NamedTuple{CellRef, Any}} + getMergedBaseCell(xf::XLSXFile, cr::String) -> Union{Nothing, NamedTuple{CellRef, Any}} + +Return the cell reference and cell value of the base cell of a merged cell range in a worksheet as a named tuple. +If the specified cell is not part of a merged cell range, return `nothing`. + +The base cell is the top-left cell of the merged cell range and is the reference cell for the range. + +The tuple returned contains: +- `baseCell` : the reference (`CellRef`) of the base cell +- `baseValue` : the value of the base cell + +Additionally, if you have already obtained the merged cells for the worksheet, +you can avoid repeated determinations and pass them as a keyword argument to +the function: + + getMergedBaseCell(ws::Worksheet, cr::String; mergedCells::Union{Vector{CellRange}, Nothing, Missing}=missing) -> Union{Nothing, NamedTuple{CellRef, Any}} + getMergedBaseCell(xf::XLSXFile, cr::String; mergedCells::Union{Vector{CellRange}, Nothing, Missing}=missing) -> Union{Nothing, NamedTuple{CellRef, Any}} + +# Examples: +```julia +julia> XLSX.getMergedBaseCell(xf, "Sheet1!B2") +(baseCell = B1, baseValue = 3) + +julia> XLSX.getMergedBaseCell(sh, "B2") +(baseCell = B1, baseValue = 3) + + +``` +""" +function getMergedBaseCell end +getMergedBaseCell(xl::XLSXFile, sheetcell::String; kw...) = process_get_sheetcell(getMergedBaseCell, xl, sheetcell; kw...) +getMergedBaseCell(ws::Worksheet, cr::String; kw...) = process_get_cellname(getMergedBaseCell, ws, cr; kw...) +#getMergedBaseCell(ws::Worksheet, cellref::CellRef) = getMergedBaseCell(ws, cellref, getMergedCells(ws)) +function getMergedBaseCell(ws::Worksheet, cellref::CellRef; mergedCells::Union{Vector{CellRange}, Nothing, Missing}=missing) + + @assert get_xlsxfile(ws).use_cache_for_sheet_data "Cannot get merged cells because cache is not enabled." + + if ismissing(mergedCells) # Get mergedCells if missing + mergedCells=getMergedCells(ws) + end + if isnothing(mergedCells) # No merged cells in sheet + return nothing + end + for rng in mergedCells + if cellref ∈ rng + return (; baseCell=rng.start, baseValue = ws[rng.start]) + end + end + return nothing +end \ No newline at end of file diff --git a/src/cellref.jl b/src/cellref.jl index 07cac399..2bb0f29b 100644 --- a/src/cellref.jl +++ b/src/cellref.jl @@ -58,8 +58,10 @@ Base.string(c::CellRef) = c.name Base.show(io::IO, c::CellRef) = print(io, string(c)) Base.:(==)(c1::CellRef, c2::CellRef) = c1.name == c2.name Base.hash(c::CellRef) = hash(c.name) +Base.isless(c1::CellRef, c2::CellRef) = Base.isless(string(c1), string(c2)) const RGX_COLUMN_NAME = r"^[A-Z]?[A-Z]?[A-Z]$" +const RGX_ROW_NAME = r"^[1-9][0-9]*$" const RGX_CELLNAME = r"^[A-Z]+[0-9]+$" const RGX_CELLRANGE = r"^[A-Z]+[0-9]+:[A-Z]+[0-9]+$" @@ -75,6 +77,18 @@ function is_valid_column_name(n::AbstractString) :: Bool return true end +function is_valid_row_name(n::AbstractString) :: Bool + if !occursin(RGX_ROW_NAME, n) + return false + end + + row_number = parse(Int, n) + if row_number < 1 || row_number > EXCEL_MAX_ROWS + return false + end + + return true +end const RGX_CELLNAME_LEFT = r"^[A-Z]+" const RGX_CELLNAME_RIGHT = r"[0-9]+$" @@ -86,7 +100,6 @@ const RGX_CELLNAME_RIGHT = r"[0-9]+$" if isdigit(c) # this block is safe since n is encoded as ASCII column_name = SubString(n, 1, i-1) row = parse(Int, SubString(n, i, length(n))) - return column_name, row end end @@ -97,6 +110,10 @@ end # Checks whether `n` is a valid name for a cell. function is_valid_cellname(n::AbstractString) :: Bool + if is_valid_non_contiguous_range(n) # Non-contiguous ranges are comma separated `CellRef-like` or `CellRange-like` strings + return false + end + if !occursin(RGX_CELLNAME, n) return false end @@ -136,6 +153,9 @@ julia> XLSX.split_cellrange("AB12:CD24") end function is_valid_cellrange(n::AbstractString) :: Bool + if is_valid_non_contiguous_range(n) # Non-contiguous ranges are comma separated `CellRef-like` or `CellRange-like` strings + return false + end if !occursin(RGX_CELLRANGE, n) return false @@ -170,6 +190,7 @@ Base.string(cr::CellRange) = "$(string(cr.start)):$(string(cr.stop))" Base.show(io::IO, cr::CellRange) = print(io, string(cr)) Base.:(==)(cr1::CellRange, cr2::CellRange) = cr1.start == cr2.start && cr2.stop == cr2.stop Base.hash(cr::CellRange) = hash(cr.start) + hash(cr.stop) +Base.isless(cr1::CellRange, cr2::CellRange) = Base.isless(string(cr1), string(cr2)) # needed for tests macro range_str(cellrange) CellRange(cellrange) @@ -247,7 +268,7 @@ function relative_cell_position(ref::CellRef, rng::CellRange) end # -# ColumnRange +# ColumnRange and RowRange # Base.string(cr::ColumnRange) = "$(encode_column_number(cr.start)):$(encode_column_number(cr.stop))" @@ -256,6 +277,12 @@ Base.:(==)(cr1::ColumnRange, cr2::ColumnRange) = cr1.start == cr2.start && cr2.s Base.hash(cr::ColumnRange) = hash(cr.start) + hash(cr.stop) Base.in(column_number::Integer, rng::ColumnRange) = rng.start <= column_number && column_number <= rng.stop +Base.string(cr::RowRange) = "$(cr.start):$(cr.stop)" +Base.show(io::IO, cr::RowRange) = print(io, string(cr)) +Base.:(==)(cr1::RowRange, cr2::RowRange) = cr1.start == cr2.start && cr2.stop == cr2.stop +Base.hash(cr::RowRange) = hash(cr.start) + hash(cr.stop) +Base.in(row_number::Integer, rng::RowRange) = rng.start <= row_number && row_number <= rng.stop + function relative_column_position(column_number::Integer, rng::ColumnRange) @assert column_number ∈ rng "Column $column_number is outside range $rng." return column_number - rng.start + 1 @@ -267,8 +294,13 @@ const RGX_COLUMN_RANGE = r"^[A-Z]?[A-Z]?[A-Z]:[A-Z]?[A-Z]?[A-Z]$" const RGX_COLUMN_RANGE_START = r"^[A-Z]+" const RGX_COLUMN_RANGE_STOP = r"[A-Z]+$" const RGX_SINGLE_COLUMN = r"^[A-Z]+$" +const RGX_ROW_RANGE = r"^[1-9][0-9]*:[1-9][0-9]*$" +const RGX_ROW_RANGE_START = r"^[1-9][0-9]*" +const RGX_ROW_RANGE_STOP = r"[1-9][0-9]*$" +const RGX_SINGLE_ROW = r"^[1-9][0-9]*$" # Returns tuple (column_name_start, column_name_stop). +# Also works for row ranges (row_name_start, row_name_stop)! @inline function split_column_range(n::AbstractString) if !occursin(":", n) return n, n @@ -279,24 +311,39 @@ const RGX_SINGLE_COLUMN = r"^[A-Z]+$" end function is_valid_column_range(r::AbstractString) :: Bool - if occursin(RGX_SINGLE_COLUMN, r) return true end - if !occursin(RGX_COLUMN_RANGE, r) return false end - start_name, stop_name = split_column_range(r) - if !is_valid_column_name(start_name) || !is_valid_column_name(stop_name) return false end - + return true +end +function is_valid_row_range(r::AbstractString) :: Bool + if occursin(RGX_SINGLE_ROW, r) + row_number = parse(Int, r) + @assert row_number > 0 && row_number <= EXCEL_MAX_ROWS "Row number should be in the range from 1 to $EXCEL_MAX_ROWS." + return true + end + if !occursin(RGX_ROW_RANGE, r) + return false + end + start_name, stop_name = split_column_range(r) # Function works for row ranges too. + if !is_valid_row_name(start_name) || !is_valid_row_name(stop_name) + return false + end return true end +function RowRange(r::AbstractString) + @assert is_valid_row_range(r) "Invalid row range: $r." + start_name, stop_name = split_column_range(r) # Function works for row ranges too. + return RowRange(parse(Int, start_name), parse(Int, stop_name)) +end function ColumnRange(r::AbstractString) @assert is_valid_column_range(r) "Invalid column range: $r." start_name, stop_name = split_column_range(r) @@ -305,9 +352,13 @@ end convert(::Type{ColumnRange}, str::AbstractString) = ColumnRange(str) convert(::Type{ColumnRange}, column_range::ColumnRange) = column_range +convert(::Type{RowRange}, str::AbstractString) = RowRange(str) +convert(::Type{RowRange}, row_range::RowRange) = row_range column_bounds(r::ColumnRange) = (r.start, r.stop) Base.length(r::ColumnRange) = r.stop - r.start + 1 +row_bounds(r::RowRange) = (r.start, r.stop) +Base.length(r::RowRange) = r.stop - r.start + 1 # ColumnRange iterator: element is a String with the column name, the state is the column number. function Base.iterate(itr::ColumnRange, state::Int=itr.start) @@ -318,6 +369,15 @@ function Base.iterate(itr::ColumnRange, state::Int=itr.start) return encode_column_number(state), state + 1 end +# RowRange iterator: element is a String with the row name (e.g. "1"). The state is the row number. +function Base.iterate(itr::RowRange, state::Int=itr.start) + if state > itr.stop + return nothing + end + + return string(state), state + 1 +end + # CellRange iterator: element is a CellRef, the state is a CellPosition. function Base.iterate(rng::CellRange, state::CellPosition=CellPosition(rng.start)) @@ -340,33 +400,100 @@ function Base.length(rng::CellRange) end # -# SheetCellRef, SheetCellRange, SheetColumnRange +# SheetCellRef, SheetCellRange, SheetColumnRange, SheetRowRange, NonContiguousRange # -Base.string(cr::SheetCellRef) = string(cr.sheet, "!", cr.cellref) +Base.string(cr::SheetCellRef) = string(quoteit(cr.sheet), "!", cr.cellref) Base.show(io::IO, cr::SheetCellRef) = print(io, string(cr)) Base.:(==)(cr1::SheetCellRef, cr2::SheetCellRef) = cr1.sheet == cr2.sheet && cr2.cellref == cr2.cellref Base.hash(cr::SheetCellRef) = hash(cr.sheet) + hash(cr.cellref) -Base.string(cr::SheetCellRange) = string(cr.sheet, "!", cr.rng) +Base.string(cr::SheetCellRange) = string(quoteit(cr.sheet), "!", cr.rng) Base.show(io::IO, cr::SheetCellRange) = print(io, string(cr)) Base.:(==)(cr1::SheetCellRange, cr2::SheetCellRange) = cr1.sheet == cr2.sheet && cr2.rng == cr2.rng Base.hash(cr::SheetCellRange) = hash(cr.sheet) + hash(cr.rng) -Base.string(cr::SheetColumnRange) = string(cr.sheet, "!", cr.colrng) +Base.string(cr::SheetColumnRange) = string(quoteit(cr.sheet), "!", cr.colrng) Base.show(io::IO, cr::SheetColumnRange) = print(io, string(cr)) Base.:(==)(cr1::SheetColumnRange, cr2::SheetColumnRange) = cr1.sheet == cr2.sheet && cr2.colrng == cr2.colrng Base.hash(cr::SheetColumnRange) = hash(cr.sheet) + hash(cr.colrng) +Base.string(cr::SheetRowRange) = string(quoteit(cr.sheet), "!", cr.colrng) +Base.show(io::IO, cr::SheetRowRange) = print(io, string(cr)) +Base.:(==)(cr1::SheetRowRange, cr2::SheetRowRange) = cr1.sheet == cr2.sheet && cr2.rowrng == cr2.rowrng +Base.hash(cr::SheetRowRange) = hash(cr.sheet) + hash(cr.colrng) + +Base.string(cr::NonContiguousRange) = join([string(quoteit(cr.sheet), "!", x) for x in cr.rng],",") +Base.show(io::IO, cr::NonContiguousRange) = print(io, string(cr)) +Base.:(==)(cr1::NonContiguousRange, cr2::NonContiguousRange) = cr1.sheet == cr2.sheet && cr2.rng == cr2.rng +Base.hash(cr::NonContiguousRange) = hash(cr.sheet) + hash(cr.rng) + +function Base.in(ref::SheetCellRef, ncrng::NonContiguousRange) :: Bool + if ref.sheet != ncrng.sheet + return false + end + for r in ncrng.rng + if r isa CellRef + if ref == r + return true + end + else + if ref in r + return true + end + end + end + return false +end + +function nc_bounds(r::NonContiguousRange)::CellRange # Smallest rectangualar `CellRange` that contains all the elements in `r`. + top = EXCEL_MAX_ROWS + bottom = 0 + left = EXCEL_MAX_COLS + right = 0 + for rng in r.rng + if isa(rng, CellRef) + top = min(top, row_number(rng)) + bottom = max(bottom, row_number(rng)) + left = min(left, column_number(rng)) + right = max(right, column_number(rng)) + else + top = min(top, row_number(rng.start)) + bottom = max(bottom, row_number(rng.stop)) + left = min(left, column_number(rng.start)) + right = max(right, column_number(rng.stop)) + end + end + return CellRange(CellRef(top, left), CellRef(bottom, right)) +end +function Base.length(r::NonContiguousRange)::Int # Number of cells in `r`. + s = 0 + for rng in r.rng + if rng isa CellRef + s += 1 + else + s += length(rng) + end + end + return s +end + const RGX_SHEET_CELLNAME = r"^.+![A-Z]+[0-9]+$" const RGX_SHEET_CELLRANGE = r"^.+![A-Z]+[0-9]+:[A-Z]+[0-9]+$" const RGX_SHEET_COLUMN_RANGE = r"^.+![A-Z]?[A-Z]?[A-Z]:[A-Z]?[A-Z]?[A-Z]$" +const RGX_SHEET_ROW_RANGE = r"^.+![1-9][0-9]*:[1-9][0-9]*$" const RGX_SHEET_CELLNAME_RIGHT = r"[A-Z]+[0-9]+$" const RGX_SHEET_CELLRANGE_RIGHT = r"[A-Z]+[0-9]+:[A-Z]+[0-9]+$" const RGX_SHEET_COLUMN_RANGE_RIGHT = r"[A-Z]?[A-Z]?[A-Z]:[A-Z]?[A-Z]?[A-Z]$" +const RGX_SHEET_ROW_RANGE_RIGHT = r"[1-9][0-9]*:[1-9][0-9]*$" function is_valid_sheet_cellname(n::AbstractString) :: Bool + + if is_valid_non_contiguous_range(n) # Non-contiguous ranges are comma separated `CellRef-like` or `CellRange-like` strings + return false + end + if !occursin(RGX_SHEET_CELLNAME, n) return false end @@ -380,6 +507,11 @@ function is_valid_sheet_cellname(n::AbstractString) :: Bool end function is_valid_sheet_cellrange(n::AbstractString) :: Bool + + if is_valid_non_contiguous_range(n) # Non-contiguous ranges are comma separated `CellRef-like` or `CellRange-like` strings + return false + end + if !occursin(RGX_SHEET_CELLRANGE, n) return false end @@ -404,10 +536,22 @@ function is_valid_sheet_column_range(n::AbstractString) :: Bool return true end +function is_valid_sheet_row_range(n::AbstractString) :: Bool + if !occursin(RGX_SHEET_ROW_RANGE, n) + return false + end + + row_range = match(RGX_SHEET_ROW_RANGE_RIGHT, n).match + if !is_valid_row_range(row_range) + return false + end + + return true +end const RGX_SHEET_PREFIX = r"^.+!" const RGX_CELLNAME_RIGHT_FIXED = r"\$[A-Z]+\$[0-9]+$" -const RGX_SHEET_CELNAME_RIGHT_FIXED = r"\$[A-Z]+\$[0-9]+:\$[A-Z]+\$[0-9]+$" +const RGX_SHEET_CELLNAME_RIGHT_FIXED = r"\$[A-Z]+\$[0-9]+:\$[A-Z]+\$[0-9]+$" function parse_sheetname_from_sheetcell_name(n::AbstractString) :: SubString @assert occursin(RGX_SHEET_PREFIX, n) "$n is not a SheetCell reference." @@ -434,7 +578,7 @@ function SheetCellRange(n::AbstractString) local cellrange::CellRange if is_valid_fixed_sheet_cellrange(n) - fixed_cellrange = match(RGX_SHEET_CELNAME_RIGHT_FIXED, n).match + fixed_cellrange = match(RGX_SHEET_CELLNAME_RIGHT_FIXED, n).match cellrange = CellRange(replace(fixed_cellrange, "\$" => "")) else @assert is_valid_sheet_cellrange(n) "$n is not a valid SheetCellRange." @@ -451,12 +595,104 @@ function SheetColumnRange(n::AbstractString) sheetname = parse_sheetname_from_sheetcell_name(n) return SheetColumnRange(sheetname, ColumnRange(column_range)) end +function SheetRowRange(n::AbstractString) + @assert is_valid_sheet_row_range(n) "$n is not a valid SheetRowRange." + row_range = match(RGX_SHEET_ROW_RANGE_RIGHT, n).match + sheetname = parse_sheetname_from_sheetcell_name(n) + return SheetRowRange(sheetname, RowRange(row_range)) +end # Named ranges +const RGX_FIXED_CELLNAME = r"^\$[A-Z]+\$[0-9]+$" +const RGX_FIXED_CELLRANGE = r"^\$[A-Z]+\$[0-9]+:\$[A-Z]+\$[0-9]+$" const RGX_FIXED_SHEET_CELLNAME = r"^.+!\$[A-Z]+\$[0-9]+$" const RGX_FIXED_SHEET_CELLRANGE = r"^.+!\$[A-Z]+\$[0-9]+:\$[A-Z]+\$[0-9]+$" +is_valid_fixed_cellname(s::AbstractString) = occursin(RGX_FIXED_CELLNAME, s) +is_valid_fixed_cellrange(s::AbstractString) = occursin(RGX_FIXED_CELLRANGE, s) is_valid_fixed_sheet_cellname(s::AbstractString) = occursin(RGX_FIXED_SHEET_CELLNAME, s) is_valid_fixed_sheet_cellrange(s::AbstractString) = occursin(RGX_FIXED_SHEET_CELLRANGE, s) -is_non_contiguous_range(v) = occursin(",", string(v)) # Non-contiguous ranges are comma separated `SheetCellRef-like` or `SheetCellRange-like` strings +is_valid_non_contiguous_range(v::AbstractString) :: Bool = is_valid_non_contiguous_cellrange(v) || is_valid_non_contiguous_sheetcellrange(v) + +function is_valid_non_contiguous_sheetcellrange(v::AbstractString) :: Bool + + if !occursin(",", string(v)) # Non-contiguous ranges are comma separated `SheetCellRef-like` or `SheetCellRange-like` strings + return false + end + + ranges = split(v, ",") + for r in ranges + if !is_valid_sheet_cellname(r) && !is_valid_sheet_cellrange(r) && !is_valid_fixed_sheet_cellname(r) && !is_valid_fixed_sheet_cellrange(r) + return false + end + end + + firstsheet = parse_sheetname_from_sheetcell_name(ranges[begin]) + + if any(parse_sheetname_from_sheetcell_name(r) != firstsheet for r in ranges) # All `SheetCellRef`s and `SheetCellRange`s should have the same sheet name + return false + end + + return true +end + +function is_valid_non_contiguous_cellrange(v::AbstractString) :: Bool + + if !occursin(",", string(v)) # Non-contiguous ranges are comma separated `SheetCellRef-like` or `SheetCellRange-like` strings + + return false + end + + ranges = split(v, ",") + + for r in ranges + if !is_valid_cellname(r) && !is_valid_cellrange(r) &&!is_valid_fixed_cellname(r) && !is_valid_fixed_cellrange(r) + return false + end + end + + return true +end + +NonContiguousRange(s::Worksheet, v::AbstractString)::NonContiguousRange = nCR(s.name, string.(split(v, ","))) +function NonContiguousRange(v::AbstractString)::NonContiguousRange + + @assert is_valid_non_contiguous_range(v) "$v is not a valid non-contiguous range." + + ranges = string.(split(v, ",")) + firstsheet = parse_sheetname_from_sheetcell_name(ranges[1]) + @assert all(parse_sheetname_from_sheetcell_name(r) == firstsheet for r in ranges) "All `CellRef`s and `CellRange`s should have the same sheet name." + + return nCR(unquoteit(firstsheet), ranges) +end + +function nCR(s::AbstractString, ranges::Vector{String}) :: NonContiguousRange + noncontig = Vector{Union{CellRef, CellRange}}() + + for n in ranges + if is_valid_fixed_sheet_cellname(n) + fixed_cellname = match(RGX_CELLNAME_RIGHT_FIXED, n).match + push!(noncontig, CellRef(replace(fixed_cellname, "\$" => ""))) + elseif is_valid_sheet_cellname(n) + push!(noncontig, CellRef(match(RGX_SHEET_CELLNAME_RIGHT, n).match)) + elseif is_valid_fixed_sheet_cellrange(n) + fixed_cellrange = match(RGX_SHEET_CELLNAME_RIGHT_FIXED, n).match + push!(noncontig, CellRange(replace(fixed_cellrange, "\$" => ""))) + elseif is_valid_sheet_cellrange(n) + push!(noncontig, CellRange(match(RGX_SHEET_CELLRANGE_RIGHT, n).match)) + elseif is_valid_fixed_cellname(n) + push!(noncontig, CellRef(replace(n, "\$" => ""))) + elseif is_valid_fixed_cellrange(n) + push!(noncontig, CellRange(replace(n, "\$" => ""))) + elseif is_valid_cellname(n) + push!(noncontig, CellRef(n)) + elseif is_valid_cellrange(n) + push!(noncontig, CellRange(n)) + else + error("Invalid non-contiguous range: $n.") + end + end + + return NonContiguousRange(s, noncontig) +end \ No newline at end of file diff --git a/src/read.jl b/src/read.jl index 51a77c07..3146a6a0 100644 --- a/src/read.jl +++ b/src/read.jl @@ -377,31 +377,52 @@ function parse_workbook!(xf::XLSXFile) name = XML.attributes(defined_name_node)["name"] local defined_value::DefinedNameValueTypes - - if is_valid_fixed_sheet_cellname(defined_value_string) || is_valid_sheet_cellname(defined_value_string) - defined_value = SheetCellRef(defined_value_string) - elseif is_valid_fixed_sheet_cellrange(defined_value_string) || is_valid_sheet_cellrange(defined_value_string) - defined_value = SheetCellRange(defined_value_string) + if is_valid_non_contiguous_range(defined_value_string) + defined_value = NonContiguousRange(unquoteit(defined_value_string)) + isabs=Vector{Bool}(undef,length(defined_value.rng)) + for (i, d) in enumerate(split(defined_value_string, ",")) + isabs[i]=is_valid_fixed_sheet_cellname(d) || is_valid_fixed_sheet_cellrange(d) + end + @assert length(isabs)==length(defined_value.rng) "Error parsing absolute references in non-contiguous range." + elseif is_valid_fixed_sheet_cellname(defined_value_string) + defined_value = SheetCellRef(unquoteit(defined_value_string)) + isabs=true + elseif is_valid_sheet_cellname(defined_value_string) + defined_value = SheetCellRef(unquoteit(defined_value_string)) + isabs=false + elseif is_valid_fixed_sheet_cellrange(defined_value_string) + defined_value = SheetCellRange(unquoteit(defined_value_string)) + isabs=true + elseif is_valid_sheet_cellrange(defined_value_string) + defined_value = SheetCellRange(unquoteit(defined_value_string)) + isabs=false elseif occursin(r"^\".*\"$", defined_value_string) # is enclosed by quotes defined_value = defined_value_string[2:end-1] # remove enclosing quotes if isempty(defined_value) defined_value = missing end + isabs=false elseif tryparse(Int, defined_value_string) !== nothing defined_value = parse(Int, defined_value_string) + isabs=false elseif tryparse(Float64, defined_value_string) !== nothing defined_value = parse(Float64, defined_value_string) + isabs=false elseif isempty(defined_value_string) defined_value = missing + isabs=false else # Couldn't parse definedName. Will silently ignore it, since this is not a critical feature. - continue + # Actually is just interpreted as a string anyway and added to the defined names (is this true?). + defined_value = string(defined_value_string) + isabs=false + #continue - # debug - #error("Could not parse value $(defined_value_string) for definedName $name.") + # debug - Now more important since we are writing updated defined names to back to output file. + # error("Could not parse value $(defined_value_string) for definedName $name.") end - a = XML.attributes(defined_name_node) + a = XML.attributes(defined_name_node) if haskey(a,"localSheetId") # is a Worksheet level name @@ -410,10 +431,10 @@ function parse_workbook!(xf::XLSXFile) # Which is the order of the elements under element in workbook.xml . localSheetId = parse(Int, a["localSheetId"])+1 sheetId = workbook.sheets[localSheetId].sheetId - workbook.worksheet_names[(sheetId, name)] = defined_value + workbook.worksheet_names[(sheetId, name)] = DefinedNameValue(defined_value, isabs) else # is a Workbook level name - workbook.workbook_names[name] = defined_value + workbook.workbook_names[name] = DefinedNameValue(defined_value, isabs) end end @@ -473,7 +494,10 @@ end readdata(source, sheet, ref) readdata(source, sheetref) -Returns a scalar or matrix with values from a spreadsheet. +Return a scalar, vector or matrix with values from a spreadsheet file. +'ref' can be a defined name, a cell reference or a cell, column, row +or non-contiguous range. + See also [`XLSX.getdata`](@ref). @@ -499,6 +523,15 @@ julia> XLSX.readdata("myfile.xlsx", "mysheet!A2:B4") 1 "first" 2 "second" 3 "third" + +Non-contiguous ranges return vectors. + +julia> XLSX.readdata("customXml.xlsx", "Mock-up", "Location") # `Location` is a `definedName` for a non-contiguous range +4-element Vector{Any}: + "Here" + missing + missing + missing ``` """ function readdata(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, ref) @@ -526,7 +559,8 @@ end [infer_eltypes], [stop_in_empty_row], [stop_in_row_function], - [keep_empty_rows] + [keep_empty_rows], + [normalizenames] ) -> DataTable Returns tabular data from a spreadsheet as a struct `XLSX.DataTable`. @@ -537,10 +571,14 @@ For example, `"B:D"` will select columns `B`, `C` and `D`. If `columns` is not given, the algorithm will find the first sequence of consecutive non-empty cells. +Alternatively, use `columns` to specify a row range, like `"2:4"`. +This will select rows `2`, `3` and `4`. + Use `first_row` to indicate the first row from the table. `first_row=5` will look for a table starting at sheet row `5`. If `first_row` is not given, the algorithm will look for the first -non-empty row in the spreadsheet. +non-empty row in the spreadsheet (if a column range is specified) +or range (if a row range is specified). `header` is a `Bool` indicating if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels @@ -550,14 +588,18 @@ will generate column labels. The default value is `header=true`. Use `column_labels` to specify names for the header of the table. +Use `normalizenames=true` to normalize column names to valid Julia identifiers. + Use `infer_eltypes=true` to get `data` as a `Vector{Any}` of typed vectors. The default value is `infer_eltypes=false`. -`stop_in_empty_row` is a boolean indicating whether an empty row marks the end of the table. -If `stop_in_empty_row=false`, the `TableRowIterator` will continue to fetch rows until there's no more rows in the Worksheet. +`stop_in_empty_row` is a boolean indicating whether an empty row marks the +end of the table. If `stop_in_empty_row=false`, the `TableRowIterator` will +continue to fetch rows until there's no more rows in the Worksheet or range. The default behavior is `stop_in_empty_row=true`. -`stop_in_row_function` is a Function that receives a `TableRow` and returns a `Bool` indicating if the end of the table was reached. +`stop_in_row_function` is a Function that receives a `TableRow` and returns + a `Bool` indicating if the end of the table was reached. Example for `stop_in_row_function`: @@ -568,9 +610,14 @@ function stop_function(r) end ``` -`keep_empty_rows` determines whether rows where all column values are equal to `missing` are kept (`true`) or dropped (`false`) from the resulting table. -`keep_empty_rows` never affects the *bounds* of the table; the number of rows read from a sheet is only affected by, `first_row`, `stop_in_empty_row` and `stop_in_row_function` (if specified). -`keep_empty_rows` is only checked once the first and last row of the table have been determined, to see whether to keep or drop empty rows between the first and the last row. +`keep_empty_rows` determines whether rows where all column values are equal +to `missing` are kept (`true`) or dropped (`false`) from the resulting table. +`keep_empty_rows` never affects the *bounds* of the table; the number of +rows read from a sheet is only affected by, `first_row`, `stop_in_empty_row` +and `stop_in_row_function` (if specified). +`keep_empty_rows` is only checked once the first and last row of the table +have been determined, to see whether to keep or drop empty rows between the +first and the last row. # Example @@ -582,16 +629,47 @@ julia> df = DataFrame(XLSX.readtable("myfile.xlsx", "mysheet")) See also: [`XLSX.gettable`](@ref). """ -function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false) +function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false, normalizenames::Bool=false) + c = openxlsx(source, enable_cache=enable_cache) do xf + gettable(getsheet(xf, sheet); first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, keep_empty_rows, normalizenames) + end + return c +end + +function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, columns::ColumnRange; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false, normalizenames::Bool=false) c = openxlsx(source, enable_cache=enable_cache) do xf - gettable(getsheet(xf, sheet); first_row=first_row, column_labels=column_labels, header=header, infer_eltypes=infer_eltypes, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows) + gettable(getsheet(xf, sheet), columns; first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, keep_empty_rows, normalizenames) end return c end -function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, columns::Union{ColumnRange, AbstractString}; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false) +# `readtable` on a row range only partially works. +# Each row in the table is truncated when there is an empty column even if there are more columns in the row. +# It also evaluates the rows on the basis of the table row count, not the sheet row count, giving wrong results. +# These limitations arise because I am trying to implement this functionality without changing the existing code. +# It probably needs a dedicated RowRange implementation. +# It is best not to use this function with row ranges. Use `readdata` or `getdata` instead, both of which work +# on row ranges, or index the sheet directly to get the rows you want (e.g. sh["3"] or sh["3:5"]). +#= +function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, rows::RowRange; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false, normalizenames::Bool=false) + if rows.start == rows.stop && header==true + error("Only 1 row specified in `RowRange` with `header=true`.\nThe header row is the same as the data row. Specify at least two rows to read header data with `header=true`.") + end + first_row = isnothing(first_row) ? rows.start : first_row + stop_in_row_function = isnothing(stop_in_row_function) ? r -> r.row >= rows.stop-first_row+1 : stop_in_row_function c = openxlsx(source, enable_cache=enable_cache) do xf - gettable(getsheet(xf, sheet), columns; first_row=first_row, column_labels=column_labels, header=header, infer_eltypes=infer_eltypes, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows) + gettable(getsheet(xf, sheet); first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, keep_empty_rows, normalizenames) end return c end +=# +function readtable(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, range::AbstractString; first_row::Union{Nothing, Int} = nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, enable_cache::Bool=false, keep_empty_rows::Bool=false, normalizenames::Bool=false) + if is_valid_row_range(range) + range = RowRange(range) + elseif is_valid_column_range(range) + range = ColumnRange(range) + else + error("The columns argument must be a valid column range or row range.") + end + return readtable(source, sheet, range; first_row, column_labels, header, infer_eltypes, stop_in_empty_row, stop_in_row_function, enable_cache, keep_empty_rows, normalizenames) +end diff --git a/src/stream.jl b/src/stream.jl index 6c286291..c299087c 100644 --- a/src/stream.jl +++ b/src/stream.jl @@ -20,7 +20,7 @@ end # About Iterators * `SheetRowIterator` is an abstract iterator that has `SheetRow` as its elements. `SheetRowStreamIterator` and `WorksheetCache` implements `SheetRowIterator` interface. -* `SheetRowStreamIterator` is a dumb iterator for row elements in sheetData XML tag of a worksheet. +* `SheetRowStreamIterator` is a dumb iterator for row elements in sheetData XML tag of a worksheet. Empty rows are not represented in the XML file so cannot be seen by the iterator. * `WorksheetCache` has a `SheetRowStreamIterator` and caches all values read from the stream. * `TableRowIterator` is a smart iterator that looks for tabular data, but uses a SheetRowIterator under the hood. @@ -301,6 +301,10 @@ for sheetrow in XLSX.eachrow(sheet) end end ``` + +Note: The `eachrow` row iterator will not return any row that +consists entirely of `EmptyCell`s. These are simply not seen +by the iterator. """ function eachrow(ws::Worksheet) :: SheetRowIterator if is_cache_enabled(ws) diff --git a/src/table.jl b/src/table.jl index e0fbe436..2d5b11ab 100644 --- a/src/table.jl +++ b/src/table.jl @@ -51,24 +51,24 @@ function last_column_index(sr::SheetRow, anchor_column::Int) :: Int return last_column_index end -function _colname_prefix_symbol(sheet::Worksheet, cell::Cell) +function _colname_prefix_string(sheet::Worksheet, cell::Cell) d = getdata(sheet, cell) if d isa String - return Symbol(XML.unescape(d)) + return XML.unescape(d) else - return Symbol(d) + return string(d) end end -_colname_prefix_symbol(sheet::Worksheet, ::EmptyCell) = Symbol("#Empty") +_colname_prefix_string(sheet::Worksheet, ::EmptyCell) = "#Empty" # helper function to manage problematic column labels # Empty cell -> "#Empty" # No_unique_label -> No_unique_label_2 -function push_unique!(vect::Vector{Symbol}, sheet::Worksheet, cell::AbstractCell, iter::Int=1) - name = _colname_prefix_symbol(sheet, cell) +function push_unique!(vect::Vector{String}, sheet::Worksheet, cell::AbstractCell, iter::Int=1) + name = _colname_prefix_string(sheet, cell) if iter > 1 - name = Symbol(name, '_', iter) + name = name*"_$iter" end if name in vect @@ -80,6 +80,20 @@ function push_unique!(vect::Vector{Symbol}, sheet::Worksheet, cell::AbstractCell nothing end +# Issue 260 +const RESERVED = Set(["local", "global", "export", "let", + "for", "struct", "while", "const", "continue", "import", + "function", "if", "else", "try", "begin", "break", "catch", + "return", "using", "baremodule", "macro", "finally", + "module", "elseif", "end", "quote", "do"]) +normalizename(name::Symbol) = name +function normalizename(name::String)::Symbol + uname = strip(Unicode.normalize(name)) + id = Base.isidentifier(uname) ? uname : map(c->Base.is_id_char(c) ? c : '_', uname) + cleansed = string((isempty(id) || !Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id) + return Symbol(replace(cleansed, r"(_)\1+"=>"_")) +end + """ eachtablerow(sheet, [columns]; [first_row], [column_labels], [header], [stop_in_empty_row], [stop_in_row_function], [keep_empty_rows]) @@ -134,37 +148,46 @@ function eachtablerow( stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, keep_empty_rows::Bool=false, + normalizenames::Bool=false ) :: TableRowIterator - if first_row === nothing - first_row = _find_first_row_with_data(sheet, convert(ColumnRange, cols).start) - end + #let col_lab - itr = eachrow(sheet) - column_range = convert(ColumnRange, cols) - - if column_labels === nothing - column_labels = Vector{Symbol}() - if header - # will use getdata to get column names - for column_index in column_range.start:column_range.stop - sheet_row = find_row(itr, first_row) - cell = getcell(sheet_row, column_index) - push_unique!(column_labels, sheet, cell) + if first_row === nothing + first_row = _find_first_row_with_data(sheet, convert(ColumnRange, cols).start) + end + + itr = eachrow(sheet) + column_range = convert(ColumnRange, cols) + col_lab = Vector{String}() + + if column_labels === nothing + if header + # will use getdata to get column names + for column_index in column_range.start:column_range.stop + sheet_row = find_row(itr, first_row) + cell = getcell(sheet_row, column_index) + push_unique!(col_lab, sheet, cell) + end + else + # generate column_labels if there's no header information anywhere + for c in column_range + push!(col_lab, string(c)) + end end else - # generate column_labels if there's no header information anywhere - for c in column_range - push!(column_labels, Symbol(c)) - end + # check consistency for column_range and column_labels + @assert length(column_labels) == length(column_range) "`column_range` (length=$(length(column_range))) and `column_labels` (length=$(length(column_labels))) must have the same length." end - else - # check consistency for column_range and column_labels - @assert length(column_labels) == length(column_range) "`column_range` (length=$(length(column_range))) and `column_labels` (length=$(length(column_labels))) must have the same length." - end - - first_data_row = header ? first_row + 1 : first_row - return TableRowIterator(sheet, Index(column_range, column_labels), first_data_row, stop_in_empty_row, stop_in_row_function, keep_empty_rows) + if normalizenames + column_labels = normalizename.(column_labels===nothing ? col_lab : column_labels) + else + column_labels = Symbol.(column_labels===nothing ? col_lab : column_labels) + end + + first_data_row = header ? first_row + 1 : first_row + return TableRowIterator(sheet, Index(column_range, column_labels), first_data_row, stop_in_empty_row, stop_in_row_function, keep_empty_rows) + # end end function TableRowIterator(sheet::Worksheet, index::Index, first_data_row::Int, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, keep_empty_rows::Bool=false) @@ -179,6 +202,7 @@ function eachtablerow( stop_in_empty_row::Bool=true, stop_in_row_function::Union{Nothing, Function}=nothing, keep_empty_rows::Bool=false, + normalizenames::Bool=false ) :: TableRowIterator if first_row === nothing @@ -224,7 +248,7 @@ function eachtablerow( # if got here, it's because all columns are non-empty column_range = ColumnRange(column_start, column_stop) - return eachtablerow(sheet, column_range; first_row=first_row, column_labels=column_labels, header=header, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows) + return eachtablerow(sheet, column_range; first_row=first_row, column_labels=column_labels, header=header, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows, normalizenames=normalizenames) end end end @@ -324,7 +348,7 @@ function Base.iterate(itr::TableRowIterator) table_row_index = 1 return TableRow(table_row_index, itr.index, sheet_row), TableRowIteratorState(table_row_index, row_number(sheet_row), sheet_row_iterator_state) else - next = iterate(itr.itr, sheet_row_iterator_state) + next = iterate(itr.itr, sheet_row_iterator_state) end end @@ -369,7 +393,7 @@ function Base.iterate(itr::TableRowIterator, state::TableRowIteratorState) end if is_empty_table_row(sheet_row) - if itr.stop_in_empty_row + if itr.stop_in_empty_row # user asked to stop fetching table rows if we find an empty row return nothing elseif !itr.keep_empty_rows @@ -394,7 +418,7 @@ function Base.iterate(itr::TableRowIterator, state::TableRowIteratorState) @assert !is_empty_table_row(sheet_row) || itr.keep_empty_rows table_row = TableRow(table_row_index, itr.index, sheet_row) - # user asked to stop + # user asked to stop (or end of row range) if itr.stop_in_row_function !== nothing && itr.stop_in_row_function(table_row) return nothing end @@ -463,6 +487,7 @@ function check_table_data_dimension(data::Vector) nothing end +#function gettable(itr::TableRowIterator; infer_eltypes::Bool=false, normalizenames::Bool=false) :: DataTable function gettable(itr::TableRowIterator; infer_eltypes::Bool=false) :: DataTable column_labels = get_column_labels(itr) columns_count = table_columns_count(itr) @@ -516,7 +541,8 @@ end [infer_eltypes], [stop_in_empty_row], [stop_in_row_function], - [keep_empty_rows] + [keep_empty_rows], + [normalizenames] ) -> DataTable Returns tabular data from a spreadsheet as a struct `XLSX.DataTable`. @@ -540,6 +566,8 @@ will generate column labels. The default value is `header=true`. Use `column_labels` as a vector of symbols to specify names for the header of the table. +Use `normalizenames=true` to normalize column names to valid Julia identifiers. + Use `infer_eltypes=true` to get `data` as a `Vector{Any}` of typed vectors. The default value is `infer_eltypes=false`. @@ -571,15 +599,15 @@ julia> df = XLSX.openxlsx("myfile.xlsx") do xf DataFrame(XLSX.gettable(xf["mysheet"])) end ``` - + See also: [`XLSX.readtable`](@ref). """ -function gettable(sheet::Worksheet, cols::Union{ColumnRange, AbstractString}; first_row::Union{Nothing, Int}=nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Function, Nothing}=nothing, keep_empty_rows::Bool=false) - itr = eachtablerow(sheet, cols; first_row=first_row, column_labels=column_labels, header=header, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows) - return gettable(itr; infer_eltypes=infer_eltypes) +function gettable(sheet::Worksheet, cols::Union{ColumnRange, AbstractString}; first_row::Union{Nothing, Int}=nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Function, Nothing}=nothing, keep_empty_rows::Bool=false, normalizenames::Bool=false) + itr = eachtablerow(sheet, cols; first_row, column_labels, header, stop_in_empty_row, stop_in_row_function, keep_empty_rows, normalizenames) + return gettable(itr; infer_eltypes) end -function gettable(sheet::Worksheet; first_row::Union{Nothing, Int}=nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Function, Nothing}=nothing, keep_empty_rows::Bool=false) - itr = eachtablerow(sheet; first_row=first_row, column_labels=column_labels, header=header, stop_in_empty_row=stop_in_empty_row, stop_in_row_function=stop_in_row_function, keep_empty_rows=keep_empty_rows) - return gettable(itr; infer_eltypes=infer_eltypes) +function gettable(sheet::Worksheet; first_row::Union{Nothing, Int}=nothing, column_labels=nothing, header::Bool=true, infer_eltypes::Bool=false, stop_in_empty_row::Bool=true, stop_in_row_function::Union{Function, Nothing}=nothing, keep_empty_rows::Bool=false, normalizenames::Bool=false) + itr = eachtablerow(sheet; first_row, column_labels, header, stop_in_empty_row, stop_in_row_function, keep_empty_rows, normalizenames) + return gettable(itr; infer_eltypes) end diff --git a/src/types.jl b/src/types.jl index ae9ba8c3..bc35312e 100644 --- a/src/types.jl +++ b/src/types.jl @@ -201,6 +201,15 @@ struct ColumnRange return new(a, b) end end +struct RowRange + start::Int # row number + stop::Int # row number + + function RowRange(a::Int, b::Int) + @assert a <= b "Invalid RowRange. Start row must be located before end row." + return new(a, b) + end +end struct SheetCellRef sheet::String @@ -212,10 +221,19 @@ struct SheetCellRange rng::CellRange end +struct NonContiguousRange + sheet::String + rng::Vector{Union{CellRef, CellRange}} +end + struct SheetColumnRange sheet::String colrng::ColumnRange end +struct SheetRowRange + sheet::String + rowrng::RowRange +end abstract type MSOfficePackage end @@ -309,9 +327,18 @@ mutable struct SharedStringTable is_loaded::Bool # for lazy-loading of sst XML file (implies that this struct must be mutable) end -const DefinedNameValueTypes = Union{SheetCellRef, SheetCellRange, Int, Float64, String, Missing} +const DefinedNameValueTypes = Union{SheetCellRef, SheetCellRange, NonContiguousRange, Int, Float64, String, Missing} +const DefinedNameRangeTypes = Union{SheetCellRef, SheetCellRange, NonContiguousRange} + +struct DefinedNameValue + value::DefinedNameValueTypes + isabs::Union{Bool, Vector{Bool}} +end # Workbook is the result of parsing file `xl/workbook.xml`. +# The `xl/workbook.xml` wi9ll need to be updated using the Workbook_names and +# worksheet_names from here when a workbook is saved in case any new defined +# names have been created. mutable struct Workbook package::MSOfficePackage # parent XLSXFile sheets::Vector{Worksheet} # workbook -> sheets -> . sheetId determines the index of the WorkSheet in this vector. @@ -320,15 +347,16 @@ mutable struct Workbook sst::SharedStringTable # shared string table buffer_styles_is_float::Dict{Int, Bool} # cell style -> true if is float buffer_styles_is_datetime::Dict{Int, Bool} # cell style -> true if is datetime - workbook_names::Dict{String, DefinedNameValueTypes} # definedName - worksheet_names::Dict{Tuple{Int, String}, DefinedNameValueTypes} # definedName. (sheetId, name) -> value. + workbook_names::Dict{String, DefinedNameValue} # definedName + worksheet_names::Dict{Tuple{Int, String}, DefinedNameValue} # definedName. (sheetId, name) -> value. styles_xroot::Union{XML.Node, Nothing} end """ `XLSXFile` represents a reference to an Excel file. -It is created by using [`XLSX.readxlsx`](@ref) or [`XLSX.openxlsx`](@ref). +It is created by using [`XLSX.readxlsx`](@ref) or [`XLSX.openxlsx`](@ref) +or [`XLSX.opentemplate`](@ref) or [`XLSX.newxlsx`](@ref). From a `XLSXFile` you can navigate to a `XLSX.Worksheet` reference as shown in the example below. diff --git a/src/workbook.jl b/src/workbook.jl index db0375c2..402a6053 100644 --- a/src/workbook.jl +++ b/src/workbook.jl @@ -14,7 +14,7 @@ is_writable(xl::XLSXFile) = xl.is_writable sheetnames(xl::XLSXFile) sheetnames(wb::Workbook) -Returns a vector with Worksheet names for this Workbook. +Return a vector with Worksheet names for this Workbook. """ sheetnames(wb::Workbook) = [ s.name for s in wb.sheets ] @inline sheetnames(xl::XLSXFile) = sheetnames(xl.workbook) @@ -23,11 +23,11 @@ sheetnames(wb::Workbook) = [ s.name for s in wb.sheets ] hassheet(wb::Workbook, sheetname::AbstractString) hassheet(xl::XLSXFile, sheetname::AbstractString) -Returns `true` if `wb` contains a sheet named `sheetname`. +Return `true` if `wb` contains a sheet named `sheetname`. """ function hassheet(wb::Workbook, sheetname::AbstractString) :: Bool for s in wb.sheets - if s.name == sheetname + if s.name == unquoteit(sheetname) return true end end @@ -39,7 +39,7 @@ end """ sheetcount(xlsfile) :: Int -Counts the number of sheets in the Workbook. +Count the number of sheets in the Workbook. """ @inline sheetcount(wb::Workbook) = length(wb.sheets) @inline sheetcount(xl::XLSXFile) = sheetcount(xl.workbook) @@ -50,7 +50,7 @@ Counts the number of sheets in the Workbook. function getsheet(wb::Workbook, sheetname::String) :: Worksheet for ws in wb.sheets - if ws.name == xlsx_escape(sheetname) + if ws.name == unquoteit(sheetname) return ws end end @@ -119,25 +119,39 @@ function getdata(xl::XLSXFile, rng::SheetColumnRange) return getdata(getsheet(xl, rng.sheet), rng.colrng) end +function getdata(xl::XLSXFile, rng::SheetRowRange) + @assert hassheet(xl, rng.sheet) "Sheet $(rng.sheet) not found." + return getdata(getsheet(xl, rng.sheet), rng.rowrng) +end + +function getdata(xl::XLSXFile, rng::NonContiguousRange) + @assert hassheet(xl, rng.sheet) "Sheet $(rng.sheet) not found." + return getdata(getsheet(xl, rng.sheet), rng) +end + function getdata(xl::XLSXFile, s::AbstractString) - if is_valid_sheet_cellname(s) - return getdata(xl, SheetCellRef(s)) - elseif is_valid_sheet_cellrange(s) - return getdata(xl, SheetCellRange(s)) - elseif is_valid_sheet_column_range(s) - return getdata(xl, SheetColumnRange(s)) - elseif is_workbook_defined_name(xl, s) + if is_workbook_defined_name(xl, s) v = get_defined_name_value(xl.workbook, s) - if is_defined_name_value_a_constant(v) + if is_defined_name_value_a_constant(v) return v elseif is_defined_name_value_a_reference(v) - return getdata(xl, v) + return getdata(xl, v) else error("Unexpected defined name value: $v.") end + elseif is_valid_sheet_cellname(s) + return getdata(xl, SheetCellRef(s)) + elseif is_valid_sheet_cellrange(s) + return getdata(xl, SheetCellRange(s)) + elseif is_valid_sheet_column_range(s) + return getdata(xl, SheetColumnRange(s)) + elseif is_valid_sheet_row_range(s) + return getdata(xl, SheetRowRange(s)) + elseif is_valid_non_contiguous_range(s) + return getdata(xl, NonContiguousRange(s)) end - error("$s is not a valid sheetname or cell/range reference.") + error("$s is not a valid definedName or cell/range reference.") end function getcell(xl::XLSXFile, ref::SheetCellRef) @@ -157,11 +171,25 @@ function getcellrange(xl::XLSXFile, rng::SheetColumnRange) return getcellrange(getsheet(xl, rng.sheet), rng.colrng) end +function getcellrange(xl::XLSXFile, rng::SheetRowRange) + @assert hassheet(xl, rng.sheet) "Sheet $(rng.sheet) not found." + return getcellrange(getsheet(xl, rng.sheet), rng.rowrng) +end + +function getcellrange(xl::XLSXFile, rng::NonContiguousRange) + @assert hassheet(xl, rng.sheet) "Sheet $(rng.sheet) not found." + return getcellrange(getsheet(xl, rng.sheet), rng) +end + function getcellrange(xl::XLSXFile, rng_str::AbstractString) if is_valid_sheet_cellrange(rng_str) return getcellrange(xl, SheetCellRange(rng_str)) elseif is_valid_sheet_column_range(rng_str) return getcellrange(xl, SheetColumnRange(rng_str)) + elseif is_valid_sheet_row_range(rng_str) + return getcellrange(xl, SheetRowRange(rng_str)) + elseif is_valid_non_contiguous_range(rng_str) + return getcellrange(xl, NonContiguousRange(rng_str)) end error("$rng_str is not a valid range reference.") @@ -173,13 +201,131 @@ end @inline is_worksheet_defined_name(wb::Workbook, sheetId::Int, name::AbstractString) :: Bool = haskey(wb.worksheet_names, (sheetId, name)) @inline is_worksheet_defined_name(wb::Workbook, sheet_name::AbstractString, name::AbstractString) :: Bool = is_worksheet_defined_name(wb, getsheet(wb, sheet_name).sheetId, name) -@inline get_defined_name_value(wb::Workbook, name::AbstractString) :: DefinedNameValueTypes = wb.workbook_names[name] +@inline get_defined_name_value(wb::Workbook, name::AbstractString) :: DefinedNameValueTypes = wb.workbook_names[name].value function get_defined_name_value(ws::Worksheet, name::AbstractString) :: DefinedNameValueTypes wb = get_workbook(ws) sheetId = ws.sheetId - return wb.worksheet_names[(sheetId, name)] + dn = wb.worksheet_names[(sheetId, name)] + return dn.value end -@inline is_defined_name_value_a_reference(v::DefinedNameValueTypes) = isa(v, SheetCellRef) || isa(v, SheetCellRange) +@inline is_defined_name_value_a_reference(v::DefinedNameValueTypes) = isa(v, SheetCellRef) || isa(v, SheetCellRange) || isa(v, NonContiguousRange) @inline is_defined_name_value_a_constant(v::DefinedNameValueTypes) = !is_defined_name_value_a_reference(v) + +function is_valid_defined_name(name::AbstractString) :: Bool + if isempty(name) + return false + end + if !isletter(name[1]) && name[1] != '_' + return false + end + for c in name + if !isletter(c) && !isdigit(c) && c != '_' && c != '\\' + return false + end + end + return true +end + +function addDefName(xf::XLSXFile, name::AbstractString, value::DefinedNameValueTypes; absolute=true) + if !is_valid_defined_name(name) + error("Invalid defined name: $name.") + end + if is_workbook_defined_name(xf, name) + error("Workbook already has a defined name called $name.") + end + if value isa NonContiguousRange + abs = absolute ? fill(true, length(value.rng)) : fill(false, length(value.rng)) + else + abs = absolute ? true : false + end + xf.workbook.workbook_names[name] = DefinedNameValue(value, abs) +end +function addDefName(ws::Worksheet, name::AbstractString, value::DefinedNameValueTypes; absolute=true) + wb = get_workbook(ws) + if !is_valid_defined_name(name) + error("Invalid defined name: $name.") + end + if is_worksheet_defined_name(ws, name) + error("Worksheet $(ws.name) already has a defined name called $name.") + end + + if value isa NonContiguousRange + @assert value.sheet == ws.name "Non-contiguous range must be in the same worksheet." + abs = absolute ? fill(true, length(value.rng)) : fill(false, length(value.rng)) + else + abs = absolute ? true : false + end + wb.worksheet_names[(ws.sheetId, name)] = DefinedNameValue(value, abs) +end + +quoteit(x::AbstractString) = occursin(r"[^\w]|\s", x) ? "'$x'" : x +unquoteit(x::AbstractString) = replace(x, "'" => "") + +""" + addDefinedName(xf::XLSXFile, name::AbstractString, value::Union{Int, Float64, String}; absolute=true) + addDefinedName(xf::XLSXFile, name::AbstractString, value::AbstractString; absolute=true) + addDefinedName(sh::Worksheet, name::AbstractString, value::Union{Int, Float64, String}; absolute=true) + addDefinedName(sh::Worksheet, name::AbstractString, value::AbstractString; absolute=true) + +Add a defined name to the Workbook or Worksheet. If an `XLSXFile` is passed, the defined name +is added to the Workbook. If a `Worksheet` is passed, the defined name is added to the Worksheet. + +If the new `definedName` is a cell reference or range, by default, it will be an absolute +reference (e.g. \$A\$1:\$C\$6). If `absolute=false` is specified, the new `definedName will be +a relative reference(e.g. A1:C6). The `absolute` argument is ignored if the `definedName` is +not a cell reference or range. + +In the context of `XLSX.jl` there is no difference between an absolute reference and a relative +reference. However, Excel treats them differently. When `definedNames` are read in as part of +an XLSXFile, we keep track of whether they are absolute or not. If the XLSXFile is subsequently +written out again, the status of the `definedNames` is preserved. + +# Examples +```julia +julia> XLSX.addDefinedName(sh, "ID", "C21") + +julia> XLSX.addDefinedName(sh, "NEW", "'Mock-up'!A1:B2") + +julia> XLSX.addDefinedName(sh, "my_name", "A1,B2,C3") + +julia> XLSX.addDefinedName(xf, "Life_the_universe_and_everything", 42) + +julia> XLSX.addDefinedName(xf, "first_name", "Hello World") + +``` +""" +function addDefinedName end +addDefinedName(xf::XLSXFile, name::AbstractString, value::Union{Int, Float64}; absolute=true) = addDefName(xf, name, value) +addDefinedName(ws::Worksheet, name::AbstractString, value::Union{Int, Float64}; absolute=true) = addDefName(ws, name, value) +function addDefinedName(xf::XLSXFile, name::AbstractString, value::AbstractString; absolute=true) + if value == "" + error("Defined name value cannot be an empty string.") + end + if is_valid_sheet_cellname(value) + return addDefName(xf, name, SheetCellRef(value); absolute) + elseif is_valid_sheet_cellrange(value) + return addDefName(xf, name, SheetCellRange(value); absolute) + elseif is_valid_non_contiguous_sheetcellrange(value) + return addDefName(xf, name, NonContiguousRange(value); absolute) + else + return addDefName(xf, name, value) + end +end +function addDefinedName(ws::Worksheet, name::AbstractString, value::AbstractString; absolute=true) + if value == "" + error("Defined name value cannot be an empty string.") + end + if is_valid_cellname(value) + return addDefName(ws, name, SheetCellRef(ws.name, CellRef(value)); absolute) + elseif is_valid_cellrange(value) + return addDefName(ws, name, SheetCellRange(ws.name, CellRange(value)); absolute) + elseif is_valid_non_contiguous_sheetcellrange(value) + return addDefName(ws, name, NonContiguousRange(value); absolute) + elseif is_valid_non_contiguous_cellrange(value) + return addDefName(ws, name, NonContiguousRange(ws, value); absolute) + else + return addDefName(ws, name, value) + end +end diff --git a/src/worksheet.jl b/src/worksheet.jl index 0526f653..ec9c4f03 100644 --- a/src/worksheet.jl +++ b/src/worksheet.jl @@ -68,8 +68,8 @@ end getdata(sheet, ref) getdata(sheet, row, column) -Returns a scalar or a matrix with values from a spreadsheet. -`ref` can be a cell reference or a range. +Returns a scalar, vector or a matrix with values from a spreadsheet. +`ref` can be a cell reference or a range or a valid defined name. Indexing in a `Worksheet` will dispatch to `getdata` method. @@ -78,11 +78,21 @@ Indexing in a `Worksheet` will dispatch to `getdata` method. ```julia julia> f = XLSX.readxlsx("myfile.xlsx") -julia> sheet = f["mysheet"] +julia> sheet = f["mysheet"] # Worksheet -julia> matrix = sheet["A1:B4"] +julia> matrix = sheet["A1:B4"] # CellRange -julia> single_value = sheet[2, 2] # B2 +julia> matrix = sheet["A:B"] # Column range + +julia> matrix = sheet["1:4"] # Row range + +julia> matrix = sheet["Contiguous"] # Named range + +julia> vector = sheet["A1:A4,C1:C4,G5"] # Non-contiguous range + +julia> vector = sheet["Location"] # Non-contiguous named range + +julia> single_value = sheet[2, 2] # Cell "B2" ``` See also [`XLSX.readdata`](@ref). @@ -162,15 +172,66 @@ function getdata(ws::Worksheet, rng::ColumnRange) :: Array{Any,2} return hcat(columns...) end +function getdata(ws::Worksheet, rng::RowRange) :: Array{Any,2} + rows_count = length(rng) + dim = get_dimension(ws) + + rows = Vector{Vector{Any}}(undef, rows_count) + for i in 1:rows_count + rows[i] = Vector{Any}() + end + + let + top, bottom = row_bounds(rng) + left = dim.start.column_number + right = dim.stop.column_number + + for sheetrow in eachrow(ws) + if sheetrow.row > bottom + break + end + if top > sheetrow.row + continue + else + row_index=sheetrow.row-top+1 + for column in left:right + cell = getcell(sheetrow, column) + push!(rows[row_index], getdata(ws, cell)) + end + end + end + end + + cols = length(rows[1]) + for r in rows + @assert length(r) == cols "Inconsistent state: Each row should have the same number of columns." + end + + return permutedims(hcat(rows...)) +end + +function getdata(ws::Worksheet, rng::NonContiguousRange) :: Vector{Any} + results=Vector{Any}() + for r in rng.rng + if r isa CellRef + push!(results, getdata(ws, r)) + else + for cell in r + push!(results, getdata(ws, cell)) + end + end + end + return results +end + +# Needed for definedName references +getdata(ws::Worksheet, s::SheetCellRef) = getdata(ws, s.cellref) +getdata(ws::Worksheet, s::SheetCellRange) = getdata(ws, s.rng) +getdata(ws::Worksheet, s::SheetColumnRange) = getdata(ws, s.colrng) +getdata(ws::Worksheet, s::SheetRowRange) = getdata(ws, s.rowrng) function getdata(ws::Worksheet, ref::AbstractString) :: Union{Array{Any,2}, Any} - if is_valid_cellname(ref) - return getdata(ws, CellRef(ref)) - elseif is_valid_cellrange(ref) - return getdata(ws, CellRange(ref)) - elseif is_valid_column_range(ref) - return getdata(ws, ColumnRange(ref)) - elseif is_worksheet_defined_name(ws, ref) + if is_worksheet_defined_name(ws, ref) v = get_defined_name_value(ws, ref) if is_defined_name_value_a_constant(v) return v @@ -189,13 +250,21 @@ function getdata(ws::Worksheet, ref::AbstractString) :: Union{Array{Any,2}, Any} else error("Unexpected defined name value: $v.") end + elseif is_valid_cellname(ref) + return getdata(ws, CellRef(ref)) + elseif is_valid_cellrange(ref) + return getdata(ws, CellRange(ref)) + elseif is_valid_column_range(ref) + return getdata(ws, ColumnRange(ref)) + elseif is_valid_row_range(ref) + return getdata(ws, RowRange(ref)) + elseif is_valid_non_contiguous_range(ref) + return getdata(ws, NonContiguousRange(ws, ref)) else error("$ref is not a valid cell or range reference.") end end -getdata(ws::Worksheet, rng::SheetCellRange) = getdata(get_xlsxfile(ws), rng) - function getdata(ws::Worksheet) if ws.dimension !== nothing return getdata(ws, get_dimension(ws)) @@ -271,8 +340,12 @@ getcell(ws::Worksheet, row::Integer, col::Integer) = getcell(ws, CellRef(row, co """ getcellrange(sheet, rng) -Returns a matrix with cells as `Array{AbstractCell, 2}`. -`rng` must be a valid cell range, as in `"A1:B2"`. +Return a matrix with cells as `Array{AbstractCell, 2}`. +`rng` must be a valid cell range, column range or row range, +as in `"A1:B2"`, `"A:B"` or `"1:2"`, or a non-contiguous range. +For row and column ranges, the extent of the range in the other +dimension is determined by the worksheet's dimension. +A non-contiguous range (which is not rectangular) will return a vector. """ function getcellrange(ws::Worksheet, rng::CellRange) :: Array{AbstractCell,2} result = Array{AbstractCell, 2}(undef, size(rng)) @@ -333,11 +406,60 @@ function getcellrange(ws::Worksheet, rng::ColumnRange) :: Array{AbstractCell,2} return hcat(columns...) end +function getcellrange(ws::Worksheet, rng::RowRange) :: Array{AbstractCell,2} + dim = get_dimension(ws) + + rows = Vector{Vector{AbstractCell}}() + + let + top, bottom = row_bounds(rng) + left = dim.start.column_number + right = dim.stop.column_number + + for (i, sheetrow) in enumerate(eachrow(ws)) + push!(rows, Vector{AbstractCell}()) + if top <= sheetrow.row && sheetrow.row <= bottom + for column in left:right + cell = getcell(sheetrow, column) + push!(rows[i], cell) + end + end + if sheetrow.row > bottom + break + end + end + end + + cols = length(rows[1]) + for r in rows + @assert length(r) == cols "Inconsistent state: Each row should have the same number of columns." + end + + return permutedims(hcat(rows...)) +end +function getcellrange(ws::Worksheet, rng::NonContiguousRange) :: Vector{AbstractCell} + results=Vector{AbstractCell}() + for r in rng.rng + if r isa CellRef + push!(results, getcell(ws, r)) + else + for cell in r + push!(results, getcell(ws, cell)) + end + end + end + return results +end + function getcellrange(ws::Worksheet, rng::AbstractString) if is_valid_cellrange(rng) return getcellrange(ws, CellRange(rng)) elseif is_valid_column_range(rng) return getcellrange(ws, ColumnRange(rng)) + elseif is_valid_row_range(rng) + return getcellrange(ws, RowRange(rng)) + elseif is_valid_non_contiguous_range(rng) + return getcellrange(ws, NonContiguousRange(ws, rng)) else error("$rng is not a valid cell range.") end diff --git a/src/write.jl b/src/write.jl index 94613f39..85c02762 100644 --- a/src/write.jl +++ b/src/write.jl @@ -31,7 +31,7 @@ julia> xf = newxlsx() ``` """ -newxlsx() = open_empty_template() +newxlsx(sheetname::AbstractString=""; path::AbstractString=_relocatable_data_path()) :: XLSXFile = open_empty_template(sheetname; path) function open_empty_template( sheetname::AbstractString=""; @@ -65,6 +65,7 @@ function writexlsx(output_source::Union{AbstractString, IO}, xf::XLSXFile; overw end update_worksheets_xml!(xf) + update_workbook_xml!(xf) ZipArchives.ZipWriter(output_source) do xlsx # write XML files @@ -185,6 +186,22 @@ function unlink_rows(node::XML.Node) # removes all rows from a sheetData XML nod end return new_worksheet end +function unlink_definedNames(node::XML.Node) # removes each `col` from a `cols` XML node. + new_cols = XML.Element("definedNames") + a = XML.attributes(node) + if !isnothing(a) # Copy attributes across to new node (probably none) + for (k, v) in XML.attributes(node) + new_cols[k] = v + end + end + for child in XML.children(node) # Copy any child nodes that are not cols across to new node + if XML.tag(child) != "definedName" # Shouldn't be any. + push!(new_cols, child) + end + end + return new_cols +end + function get_idces(doc, t, b) i=1 j=1 @@ -336,6 +353,78 @@ function update_worksheets_xml!(xl::XLSXFile) nothing end +function abscell(c::CellRef) + col, row = split_cellname(c.name) + return "\$$col\$$row" +end + +mkabs(c::SheetCellRef) = abscell(c.cellref) +mkabs(c::SheetCellRange) = abscell(c.rng.start) * ":" * abscell(c.rng.stop) +function make_absolute(dn::DefinedNameValue) + if dn.value isa NonContiguousRange + v="" + for (i, r) in enumerate(dn.value.rng) + cr = r isa CellRange ? SheetCellRange(dn.value.sheet, r) : SheetCellRef(dn.value.sheet, r) # need to separate and handle separately + if dn.isabs[i] + v *= quoteit(cr.sheet) * "!" * mkabs(cr) * "," + else + v *= string(cr) * "," + end + end + return v[1:end-1] + else + return dn.isabs ? quoteit(dn.value.sheet) * "!" * mkabs(dn.value) : string(dn.value) + end +end + +function update_workbook_xml!(xl::XLSXFile) + wb = get_workbook(xl) + + wbdoc = xmlroot(xl, "xl/workbook.xml") # find the block in the workbook's xml file + i, j = get_idces(wbdoc, "workbook", "definedNames") + + definedNames = isnothing(j) ? XML.Element("definedNames") : unlink_definedNames(wbdoc[i][j]) # Remove old defined names + + if isnothing(j) + # there is no block in the workbook's xml file, so we'll need to create one + # The block goes after the block. Need to move everything down one to make room. + m, n = get_idces(wbdoc, "workbook", "sheets") + nchildren = length(XML.children(wbdoc[m])) + push!(wbdoc[m], wbdoc[m][end]) + for c in nchildren-1:-1:n+1 + wbdoc[m][c+1]=wbdoc[m][c] + end + definedNames = XML.Element("definedNames") + j=n+1 + + else + definedNames = unlink_definedNames(wbdoc[i][j]) # Remove old defined names + end + + for (k, v) in wb.workbook_names + if typeof(v.value) <: DefinedNameRangeTypes + v=make_absolute(v) + else + v= string(v.value) + end + dn_node = XML.Element("definedName", name=k, XML.Text(v)) + push!(definedNames, dn_node) + end + for (k, v) in wb.worksheet_names + if typeof(v.value) <: DefinedNameRangeTypes + v=make_absolute(v) + else + v= string(v.value) + end + dn_node = XML.Element("definedName", name=last(k), localSheetId=first(k)-1, XML.Text(v)) + push!(definedNames, dn_node) + end + + wbdoc[i][j] = definedNames # Add the new definedNames block to the workbook's xml file + + return nothing +end + function add_cell_to_worksheet_dimension!(ws::Worksheet, cell::Cell) # update worksheet dimension ws_dimension = get_dimension(ws) @@ -571,6 +660,20 @@ function target_cell_ref_from_offset(anchor_cell::CellRef, offset::Integer, dim: return target_cell_ref_from_offset(row_number(anchor_cell), column_number(anchor_cell), offset, dim) end +const ALLOWED_TYPES = Union{Number, String, Bool, Dates.Date, Dates.Time, Dates.DateTime, Missing, Nothing} +function process_vector(col) # Convert any disallowed types to strings. #239. + if eltype(col) <: ALLOWED_TYPES + # Case 1: All elements are of allowed types + return col + elseif eltype(col) <: Any && all(x -> !(typeof(x) <: ALLOWED_TYPES), col) + # Case 2: All elements are of disallowed types + return map(x -> "$x", col) + else + # Case 3: Mixed types, process each element + return [typeof(x) <: ALLOWED_TYPES ? x : "$x" for x in col] + end +end + """ writetable!( sheet::Worksheet, @@ -586,6 +689,13 @@ starting at `anchor_cell`. `data` must be a vector of columns. `columnnames` must be a vector of column labels. +Column labels that are not of type `String` will be converted +to strings before writing. Any data columns that are not of +type `String`, `Float64`, `Int64`, `Bool`, `Date`, `Time`, +`DateTime`, `Missing`, or `Nothing` will be converted to strings +before writing. + + See also: [`XLSX.writetable`](@ref). """ function writetable!( @@ -612,6 +722,7 @@ function writetable!( anchor_row = row_number(anchor_cell) anchor_col = column_number(anchor_cell) start_from_anchor = 1 + # write table header if write_columnnames for c in 1:col_count @@ -622,6 +733,7 @@ function writetable!( end # write table data + data = [process_vector(col) for col in data] # Address issue #239 for c in 1:col_count for r in 1:row_count target_cell_ref = CellRef(r + anchor_row - start_from_anchor, c + anchor_col - 1) diff --git a/test/runtests.jl b/test/runtests.jl index 11b81d40..06981ecb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -408,7 +408,7 @@ end end end -@testset "Defined Names" begin +@testset "Defined Names" begin # Issue #148 @test XLSX.is_defined_name_value_a_reference(XLSX.SheetCellRef("Sheet1!A1")) @test XLSX.is_defined_name_value_a_reference(XLSX.SheetCellRange("Sheet1!A1:B2")) @test !XLSX.is_defined_name_value_a_reference(1) @@ -416,25 +416,56 @@ end @test !XLSX.is_defined_name_value_a_reference("Hey") @test !XLSX.is_defined_name_value_a_reference(missing) - XLSX.openxlsx(joinpath(data_directory, "general.xlsx")) do f - @test f["SINGLE_CELL"] == "single cell A2" - @test f["RANGE_B4C5"] == Any["range B4:C5" "range B4:C5"; "range B4:C5" "range B4:C5"] - @test f["CONST_DATE"] == 43383 - @test isapprox(f["CONST_FLOAT"], 10.2) - @test f["CONST_INT"] == 100 - @test f["LOCAL_INT"] == 2000 - @test f["named_ranges_2"]["LOCAL_INT"] == 2000 - @test f["named_ranges"]["LOCAL_INT"] == 1000 - @test f["named_ranges"]["LOCAL_NAME"] == "Hey You" - @test f["named_ranges_2"]["LOCAL_NAME"] == "out there in the cold" - @test f["named_ranges"]["SINGLE_CELL"] == "single cell A2" - - @test_throws ErrorException f["header_error"]["LOCAL_REF"] - @test f["named_ranges"]["LOCAL_REF"][1] == 10 - @test f["named_ranges"]["LOCAL_REF"][2] == 20 - @test f["named_ranges_2"]["LOCAL_REF"][1] == "local" - @test f["named_ranges_2"]["LOCAL_REF"][2] == "reference" - end + f = XLSX.opentemplate(joinpath(data_directory, "general.xlsx")) + @test f["SINGLE_CELL"] == "single cell A2" + @test f["RANGE_B4C5"] == Any["range B4:C5" "range B4:C5"; "range B4:C5" "range B4:C5"] + @test f["CONST_DATE"] == 43383 + @test isapprox(f["CONST_FLOAT"], 10.2) + @test f["CONST_INT"] == 100 + @test f["LOCAL_INT"] == 2000 + @test f["named_ranges_2"]["LOCAL_INT"] == 2000 + @test f["named_ranges"]["LOCAL_INT"] == 1000 + @test f["named_ranges"]["LOCAL_NAME"] == "Hey You" + @test f["named_ranges_2"]["LOCAL_NAME"] == "out there in the cold" + @test f["named_ranges"]["SINGLE_CELL"] == "single cell A2" + + @test_throws ErrorException f["header_error"]["LOCAL_REF"] + @test f["named_ranges"]["LOCAL_REF"][1] == 10 + @test f["named_ranges"]["LOCAL_REF"][2] == 20 + @test f["named_ranges_2"]["LOCAL_REF"][1] == "local" + @test f["named_ranges_2"]["LOCAL_REF"][2] == "reference" + + XLSX.addDefinedName(f["lookup"], "Life_the_Universe_and_Everything", 42) + XLSX.addDefinedName(f["lookup"], "FirstName", "Hello World") + XLSX.addDefinedName(f["lookup"], "single", "C2"; absolute=true) + XLSX.addDefinedName(f["lookup"], "range", "C3:C5"; absolute=true) + XLSX.addDefinedName(f["lookup"], "NonContig", "C3:C5,D3:D5"; absolute=true) + @test f["lookup"]["Life_the_Universe_and_Everything"] == 42 + @test f["lookup"]["FirstName"] == "Hello World" + @test f["lookup"]["single"] == "NAME" + @test f["lookup"]["range"] == Any["name1"; "name2"; "name3";;] # A 2D Array, size (3, 1) + @test f["lookup"]["NonContig"] == Any["name1", "name2", "name3", 100, 200, 300] # NonContiguousRanges return a vector + + XLSX.addDefinedName(f, "Life_the_Universe_and_Everything", 42) + XLSX.addDefinedName(f, "FirstName", "Hello World") + XLSX.addDefinedName(f, "single", "lookup!C2"; absolute=true) + XLSX.addDefinedName(f, "range", "lookup!C3:C5"; absolute=true) + XLSX.addDefinedName(f, "NonContig", "lookup!C3:C5,lookup!D3:D5"; absolute=true) + @test f["Life_the_Universe_and_Everything"] == 42 + @test f["FirstName"] == "Hello World" + @test f["single"] == "NAME" + @test f["range"] == Any["name1"; "name2"; "name3";;] # A 2D Array, size (3, 1) + @test f["NonContig"] == Any["name1", "name2", "name3", 100, 200, 300] # NonContiguousRanges return a vector + + XLSX.writexlsx("mytest.xlsx", f, overwrite=true) + + f = XLSX.readxlsx("mytest.xlsx") + @test f["Life_the_Universe_and_Everything"] == 42 + @test f["FirstName"] == "Hello World" + @test f["single"] == "NAME" + @test f["range"] == Any["name1"; "name2"; "name3";;] # A 2D Array, size (3, 1) + @test f["NonContig"] == Any["name1", "name2", "name3", 100, 200, 300] # NonContiguousRanges return a vector + isfile("mytest.xlsx") && rm("mytest.xlsx") @test XLSX.readdata(joinpath(data_directory, "general.xlsx"), "SINGLE_CELL") == "single cell A2" @test XLSX.readdata(joinpath(data_directory, "general.xlsx"), "RANGE_B4C5") == Any["range B4:C5" "range B4:C5"; "range B4:C5" "range B4:C5"] @@ -543,6 +574,57 @@ end @test hash(XLSX.ColumnRange("B:D")) == hash(XLSX.ColumnRange("B:D")) end +@testset "Row Range" begin # Issue #150 + cr = XLSX.RowRange("2:5") + @test string(cr) == "2:5" + @test cr.start == 2 + @test cr.stop == 5 + @test length(cr) == 4 + @test collect(cr) == ["2", "3", "4", "5"] + + cr = XLSX.RowRange("2") + @test string(cr) == "2:2" + @test cr.start == 2 + @test cr.stop == 2 + @test length(cr) == 1 + @test collect(cr) == ["2"] + + @test_throws AssertionError XLSX.RowRange("B1:D3") + @test_throws AssertionError XLSX.RowRange("5:2") + @test XLSX.RowRange("2:5") == XLSX.RowRange("2:5") + @test hash(XLSX.RowRange("2:5")) == hash(XLSX.RowRange("2:5")) +end + +@testset "Non-contiguous Range" begin + cr = XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet1!B1:B3") + @test string(cr) == "Sheet1!D1:D3,Sheet1!B1:B3" + @test cr.sheet == "Sheet1" + @test cr.rng == [XLSX.CellRange("D1:D3"), XLSX.CellRange("B1:B3")] + @test length(cr) == 6 + @test collect(cr.rng) == [XLSX.CellRange("D1:D3"), XLSX.CellRange("B1:B3")] + @test XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet1!B1:B3") == XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet1!B1:B3") + @test hash(XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet1!B1:B3")) == hash(XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet1!B1:B3")) + + f = XLSX.newxlsx("Sheet 1") + s = f["Sheet 1"] + for cell in XLSX.CellRange("A1:D6") + s[cell] = "" + end + cr = XLSX.NonContiguousRange(s, "D1:D3,A2,B1:B3") + @test string(cr) == "'Sheet 1'!D1:D3,'Sheet 1'!A2,'Sheet 1'!B1:B3" + @test cr.sheet == "Sheet 1" + @test cr.rng == [XLSX.CellRange("D1:D3"), XLSX.CellRef("A2"), XLSX.CellRange("B1:B3")] + @test length(cr) == 7 + @test collect(cr.rng) == [XLSX.CellRange("D1:D3"), XLSX.CellRef("A2"), XLSX.CellRange("B1:B3")] + @test XLSX.NonContiguousRange(s, "D1:D3,A2,B1:B3") == XLSX.NonContiguousRange(s, "D1:D3,A2,B1:B3") + @test hash(XLSX.NonContiguousRange(s, "D1:D3,A2,B1:B3")) == hash(XLSX.NonContiguousRange(s, "D1:D3,A2,B1:B3")) + + @test_throws AssertionError XLSX.NonContiguousRange("Sheet1!D1:D3,B1:B3") + @test_throws AssertionError XLSX.NonContiguousRange("Sheet1!D1:D3,Sheet2!B1:B3") + @test_throws AssertionError XLSX.NonContiguousRange("B1:D3") + @test_throws AssertionError XLSX.NonContiguousRange("2:5") +end + @testset "CellRange iterator" begin rng = XLSX.CellRange("A2:C4") @test collect(rng) == [XLSX.CellRef("A2"), XLSX.CellRef("B2"), XLSX.CellRef("C2"), XLSX.CellRef("A3"), XLSX.CellRef("B3"), XLSX.CellRef("C3"), XLSX.CellRef("A4"), XLSX.CellRef("B4"), XLSX.CellRef("C4")] @@ -970,6 +1052,21 @@ end test_data = Any[Any["C3", missing], Any[missing, "D4"]] check_test_data(data, test_data) end + + @testset "normalizenames" begin # Issue #260 + + data = Vector{Any}() + push!(data, [:sym1, :sym2, :sym3]) + push!(data, [1.0, 2.0, 3.0]) + push!(data, ["abc", "DeF", "gHi"]) + push!(data, [true, true, false]) + cols = ["1 col", "col \$2", "local", "col:4"] + + XLSX.writetable("mytest.xlsx", data, cols; overwrite=true) + df = DataFrames.DataFrame(XLSX.readtable("mytest.xlsx", "Sheet1", normalizenames=true)) + @test DataFrames.names(df) == Any["_1_col", "col_2", "_local", "col_4"] + + end end @testset "Write" begin @@ -1236,14 +1333,39 @@ end @test dt_read.column_label_index == dt.column_label_index end + @testset "extended types" begin # Issue #239 + @enum enums begin + enum1 + enum2 + enum3 + end + + data = Vector{Any}() + push!(data, [:sym1, :sym2, :sym3]) + push!(data, [1.0, 2.0, 3.0]) + push!(data, ["abc", "DeF", "gHi"]) + push!(data, [true, true, false]) + push!(data, [XLSX.CellRef("A1"), XLSX.CellRef("B2"), XLSX.CellRef("CCC34000")]) + push!(data, collect(instances(enums))) + cols = [string(eltype(x)) for x in data] + + XLSX.writetable("mytest.xlsx", data, cols; overwrite=true) + + f = XLSX.readxlsx("mytest.xlsx") + @test f[1]["A1"] == "Symbol" + @test f[1]["A1:A4"] == Any["Symbol"; "sym1"; "sym2"; "sym3";;] # A 2D Array, size (4, 1) + @test f[1]["A1"] == "Symbol" + @test f[1]["E1:E4"] == Any["XLSX.CellRef"; "A1"; "B2"; "CCC34000";;] + end + # delete files created by this testset - delete_files = ["output_table.xlsx", "output_tables.xlsx"] + delete_files = ["output_table.xlsx", "output_tables.xlsx", "mytest.xlsx"] for f in delete_files isfile(f) && rm(f) end end -@testset "Styles" verbose = true begin +@testset "Styles" begin @testset "Original" begin using XLSX: CellValue, id, getcell, setdata!, CellRef @@ -1616,9 +1738,9 @@ end @test XLSX.getBorder(s, "D18").border == Dict("left" => Dict("rgb" => "FF111111", "style" => "hair"), "bottom" => Dict("rgb" => "FF111111", "style" => "hair"), "right" => Dict("rgb" => "FF111111", "style" => "hair"), "top" => Dict("rgb" => "FF111111", "style" => "hair"), "diagonal" => Dict("rgb" => "FF111111", "style" => "hair")) @test XLSX.getBorder(s, "D20").border == Dict("left" => Dict("rgb" => "FF111111", "style" => "hair"), "bottom" => Dict("rgb" => "FF111111", "style" => "hair"), "right" => Dict("rgb" => "FF111111", "style" => "hair"), "top" => Dict("rgb" => "FF111111", "style" => "hair"), "diagonal" => Dict("rgb" => "FF111111", "style" => "hair")) @test XLSX.getBorder(s, "J18").border == Dict("left" => Dict("rgb" => "FF111111", "style" => "hair"), "bottom" => Dict("rgb" => "FF111111", "style" => "hair"), "right" => Dict("rgb" => "FF111111", "style" => "hair"), "top" => Dict("rgb" => "FF111111", "style" => "hair"), "diagonal" => Dict("rgb" => "FF111111", "style" => "hair")) - @test XLSX.getBorder(s, "J18").border == Dict("left" => Dict("rgb" => "FF111111", "style" => "hair"), "bottom" => Dict("rgb" => "FF111111", "style" => "hair"), "right" => Dict("rgb" => "FF111111", "style" => "hair"), "top" => Dict("rgb" => "FF111111", "style" => "hair"), "diagonal" => Dict("rgb" => "FF111111", "style" => "hair")) + @test XLSX.getBorder(s, "J20").border == Dict("left" => Dict("rgb" => "FF111111", "style" => "hair"), "bottom" => Dict("rgb" => "FF111111", "style" => "hair"), "right" => Dict("rgb" => "FF111111", "style" => "hair"), "top" => Dict("rgb" => "FF111111", "style" => "hair"), "diagonal" => Dict("rgb" => "FF111111", "style" => "hair")) - # Cant get attributes on a range. + # Can't get attributes on a range. @test_throws AssertionError XLSX.getBorder(s, "Contiguous") f = XLSX.open_empty_template() @@ -1642,6 +1764,7 @@ end # Can't set a uniform attribute to a single cell. @test_throws MethodError XLSX.setUniformFill(s, "D4"; pattern="gray125", bgColor="FF000000") + @test_throws MethodError XLSX.setUniformFill(s, "ID"; pattern="darkTrellis", fgColor="FF222222", bgColor="FFDDDDDD") @test_throws MethodError XLSX.setUniformFont(s, "B4"; size=12, name="Times New Roman", color="FF040404") @test_throws MethodError XLSX.setUniformBorder(f, "Mock-up!D4"; left=["style" => "dotted", "color" => "FF000FF0"], right=["style" => "medium", "color" => "FF765000"], @@ -1649,7 +1772,6 @@ end bottom=["style" => "medium", "color" => "FF0000FF"], diagonal=["style" => "none"] ) - @test_throws MethodError XLSX.setUniformFill(s, "ID"; pattern="darkTrellis", fgColor="FF222222", bgColor="FFDDDDDD") end @@ -1668,13 +1790,13 @@ end @test XLSX.getFill(s, "ID").fill == Dict("patternFill" => Dict("bgrgb" => "FFDDDDDD", "patternType" => "darkTrellis", "fgrgb" => "FF222222")) # Location is a non-contiguous range - XLSX.setFill(s, "Location"; pattern="lightVertical") + XLSX.setFill(s, "Location"; pattern="lightVertical") # Default colors unchanged @test XLSX.getFill(s, "D18").fill == Dict("patternFill" => Dict("bgindexed" => "64", "patternType" => "lightVertical", "fgtint" => "-0.499984740745262", "fgtheme" => "2")) @test XLSX.getFill(s, "D20").fill == Dict("patternFill" => Dict("bgindexed" => "64", "patternType" => "lightVertical", "fgtint" => "-0.499984740745262", "fgtheme" => "2")) @test XLSX.getFill(s, "J18").fill == Dict("patternFill" => Dict("bgindexed" => "64", "patternType" => "lightVertical", "fgtint" => "-0.499984740745262", "fgtheme" => "2")) @test XLSX.getFill(s, "J18").fill == Dict("patternFill" => Dict("bgindexed" => "64", "patternType" => "lightVertical", "fgtint" => "-0.499984740745262", "fgtheme" => "2")) - XLSX.setFill(s, "Contiguous"; pattern="lightVertical") + XLSX.setFill(s, "Contiguous"; pattern="lightVertical") # Default colors unchanged @test XLSX.getFill(s, "D23").fill == Dict("patternFill" => Dict("patternType" => "lightVertical", "bgindexed" => "64", "fgtheme" => "0")) @test XLSX.getFill(s, "D24").fill == Dict("patternFill" => Dict("patternType" => "lightVertical", "bgindexed" => "64", "fgtheme" => "0")) @test XLSX.getFill(s, "D25").fill == Dict("patternFill" => Dict("patternType" => "lightVertical", "bgindexed" => "64", "fgtheme" => "0")) @@ -1873,8 +1995,8 @@ end f = XLSX.open_xlsx_template(joinpath(data_directory, "customXml.xlsx")) s = f["Mock-up"] - XLSX.setColumnWidth(s, "Location"; width = 60) - XLSX.setRowHeight(s, "Location"; height = 50) + XLSX.setColumnWidth(s, "Location"; width=60) + XLSX.setRowHeight(s, "Location"; height=50) @test XLSX.getRowHeight(s, "D18") ≈ 50.2109375 @test XLSX.getColumnWidth(s, "D18") ≈ 60.7109375 @test XLSX.getRowHeight(f, "Mock-up!J20") ≈ 50.2109375 @@ -1885,7 +2007,7 @@ end @testset "No cache" begin XLSX.openxlsx(joinpath(data_directory, "customXml.xlsx"); mode="r", enable_cache=true) do f @test XLSX.getRowHeight(f, "Mock-up!B2") ≈ 23.25 - @test_throws AssertionError XLSX.getColumnWidth(f, "Mock-up!B2") + @test_throws AssertionError XLSX.getColumnWidth(f, "Mock-up!B2") end XLSX.openxlsx(joinpath(data_directory, "customXml.xlsx"); mode="r", enable_cache=false) do f @test_throws AssertionError XLSX.getRowHeight(f, "Mock-up!B2") @@ -1912,7 +2034,40 @@ end end end +@testset "merged cells" begin + XLSX.openxlsx(joinpath(data_directory, "customXml.xlsx")) do f + mc = sort(XLSX.getMergedCells(f["Mock-up"])) + @test length(mc) == 25 + @test mc == sort(XLSX.CellRange[XLSX.CellRange("D49:H49"), XLSX.CellRange("D72:J72"), XLSX.CellRange("F94:J94"), XLSX.CellRange("F96:J96"), XLSX.CellRange("F84:J84"), XLSX.CellRange("F86:J86"), XLSX.CellRange("D62:J63"), XLSX.CellRange("D51:J53"), XLSX.CellRange("D55:J60"), XLSX.CellRange("D92:J92"), XLSX.CellRange("D82:J82"), XLSX.CellRange("D74:J74"), XLSX.CellRange("D67:J68"), XLSX.CellRange("D47:H47"), XLSX.CellRange("D9:H9"), XLSX.CellRange("D11:G11"), XLSX.CellRange("D12:G12"), XLSX.CellRange("D14:E14"), XLSX.CellRange("D16:E16"), XLSX.CellRange("D32:F32"), XLSX.CellRange("D38:J38"), XLSX.CellRange("D34:J34"), XLSX.CellRange("D18:E18"), XLSX.CellRange("D20:E20"), XLSX.CellRange("D13:G13")]) + s=f["Mock-up"] + @test XLSX.isMergedCell(f, "Mock-up!D47") + @test XLSX.isMergedCell(f, "Mock-up!D49"; mergedCells=mc) + @test XLSX.isMergedCell(s, "H84") + @test XLSX.isMergedCell(s, "G84"; mergedCells=mc) + @test XLSX.isMergedCell(s, "Short_Description") + @test !XLSX.isMergedCell(f, "Mock-up!B2") + @test !XLSX.isMergedCell(s, "H40"; mergedCells=mc) + @test !XLSX.isMergedCell(s, "ID"; mergedCells=mc) + @test_throws AssertionError XLSX.isMergedCell(s, "Contiguous"; mergedCells=mc) # Can't test a range + @test_throws AssertionError XLSX.getMergedBaseCell(s, "Location") + + @test XLSX.getMergedBaseCell(f[1], "F72") == (baseCell = CellRef("D72"), baseValue = Dates.Date("2025-03-24")) + @test XLSX.getMergedBaseCell(f, "Mock-up!G72") == (baseCell = CellRef("D72"), baseValue = Dates.Date("2025-03-24")) + @test XLSX.getMergedBaseCell(s, "H53") == (baseCell = CellRef("D51"), baseValue = "Hello World") + @test XLSX.getMergedBaseCell(s, "G52") == (baseCell = CellRef("D51"), baseValue = "Hello World") + @test XLSX.getMergedBaseCell(s, "Short_Description") == (baseCell = CellRef("D51"), baseValue = "Hello World") + @test isnothing(XLSX.getMergedBaseCell(s, "F73")) + @test isnothing(XLSX.getMergedBaseCell(f, "Mock-up!H73")) + @test_throws AssertionError XLSX.getMergedBaseCell(s, "Location") # Can't get base cell for a range + + @test isnothing(XLSX.getMergedCells(f["Document History"])) + s=f["Document History"] + @test !XLSX.isMergedCell(f, "Document History!B2") + @test !XLSX.isMergedCell(s, "C5"; mergedCells=XLSX.getMergedCells(f["Document History"])) + + end +end @testset "filemodes" begin sheetname = "New Sheet"