diff --git a/Project.toml b/Project.toml index 102f1b1..f5ae221 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ChunkedCSV" uuid = "c0d0730e-6432-44b2-a51e-6ec55e1c8b99" authors = ["Tomáš Drvoštěp "] -version = "0.1.3" +version = "0.2.0" [deps] ChunkedBase = "a380dd43-0ebf-4429-88d6-6f06ea920732" diff --git a/src/consume_contexts.jl b/src/consume_contexts.jl index 194f881..fac5cc0 100644 --- a/src/consume_contexts.jl +++ b/src/consume_contexts.jl @@ -71,8 +71,8 @@ function ChunkedBase.consume!(consume_ctx::DebugContext, payload::ParsedPayload) write(io, debug(col, j, parsing_ctx, consume_ctx, chunking_ctx)) n != 1 && print(io, ", ") n -= 1 - elseif task_buf.row_statuses[j] == RowStatus.HasColumnIndicators - write(io, k in task_buf.column_indicators[c] ? "?" : debug(col, j, parsing_ctx, consume_ctx, chunking_ctx)) + elseif task_buf.row_statuses[j] == RowStatus.MissingValues + write(io, k in task_buf.missing_values[c] ? "?" : debug(col, j, parsing_ctx, consume_ctx, chunking_ctx)) n != 1 && print(io, ", ") c += 1 n -= 1 @@ -97,7 +97,7 @@ function ChunkedBase.consume!(consume_ctx::DebugContext, payload::ParsedPayload) consume_ctx.show_values && print(io, "\t$(name): [") for j in 1:length(task_buf.row_statuses) if (task_buf.row_statuses[j] & S) > 0 - has_missing = task_buf.row_statuses[j] > RowStatus.Ok && task_buf.column_indicators[c, k] + has_missing = task_buf.row_statuses[j] & RowStatus.MissingValues > 0 && task_buf.missing_values[c, k] consume_ctx.show_values && write(io, has_missing ? "?" : debug(col, j, parsing_ctx, consume_ctx, chunking_ctx)) consume_ctx.show_values && n != 1 && print(io, ", ") has_missing && (c += 1) @@ -154,9 +154,9 @@ function ChunkedBase.consume!(ctx::TestContext, payload::ParsedPayload) str_col = String[] push!(strings, str_col) if T === Parsers.PosLen31 - col_iter = ColumnIterator(cols[i]::BufferedVector{Parsers.PosLen31}, i, task_buf.row_statuses, task_buf.column_indicators) - for (value, isinvalidrow, ismissingvalue) in col_iter - if ismissingvalue + col_iter = ColumnIterator(cols[i]::BufferedVector{Parsers.PosLen31}, i, task_buf.row_statuses, task_buf.missing_values, task_buf.errored_values) + for (value, isinvalidrow, iserroredvalue, ismissingvalue) in col_iter + if ismissingvalue || iserroredvalue push!(str_col, "") else push!(str_col, Parsers.getstring(chunking_ctx.bytes, value, parsing_ctx.escapechar)) diff --git a/src/populate_result_buffer.jl b/src/populate_result_buffer.jl index 9f36891..2a80fdb 100644 --- a/src/populate_result_buffer.jl +++ b/src/populate_result_buffer.jl @@ -21,8 +21,8 @@ function skip_row!(result_buf::AbstractResultBuffer, row_bytes, comment::Vector{ end function skip_row!(result_buf::AbstractResultBuffer) foreach(skip_element!, result_buf.cols) - unsafe_push!(result_buf.row_statuses, RowStatus.HasColumnIndicators | RowStatus.SkippedRow) - addrows!(result_buf.column_indicators, 1, true) + unsafe_push!(result_buf.row_statuses, RowStatus.MissingValues | RowStatus.SkippedRow) + addrows!(result_buf.missing_values, 1, true) return true end @@ -70,28 +70,31 @@ function ChunkedBase.populate_result_buffer!( empty!(result_buf) enum_schema = parsing_ctx.enum_schema schema = parsing_ctx.schema - colinds_row_idx = 1 + missing_idx = 1 + errored_idx = 1 options = parsing_ctx.options Base.ensureroom(result_buf, ceil(Int, length(newlines_segment) * 1.01)) ignorerepeated = options.ignorerepeated::Bool ignoreemptyrows = options.ignoreemptylines::Bool - colinds = result_buf.column_indicators + missing_values = result_buf.missing_values + errored_values = result_buf.errored_values cols = result_buf.cols N = length(schema) for row_idx in 2:length(newlines_segment) # We only grow the column indicators when we need to, this flag tacks whether we # already added one for this row - added_collind_row = false + added_missing_row = false + added_errored_row = false @inbounds prev_newline = newlines_segment[row_idx - 1] @inbounds curr_newline = newlines_segment[row_idx] isemptyrow = ChunkedBase._isemptyrow(prev_newline, curr_newline, buf) - (ignoreemptyrows && isemptyrow) && skip_row!(result_buf) && (colinds_row_idx += 1; continue) + (ignoreemptyrows && isemptyrow) && skip_row!(result_buf) && (missing_idx += 1; continue) # +1 -1 to exclude newline chars @inbounds row_bytes = view(buf, prev_newline+Int32(1):curr_newline-Int32(1)) - skip_row!(result_buf, row_bytes, comment) && (colinds_row_idx += 1; continue) + skip_row!(result_buf, row_bytes, comment) && (missing_idx += 1; continue) len = length(row_bytes) pos = 1 @@ -120,11 +123,10 @@ function ChunkedBase.populate_result_buffer!( if Parsers.eof(code) && !(col_idx == N && Parsers.delimited(code)) row_status |= RowStatus.TooFewColumns - row_status |= RowStatus.HasColumnIndicators - added_collind_row || (added_collind_row = true; addrows!(colinds)) + added_errored_row || (added_errored_row = true; addrows!(errored_values)) for _col_idx in col_idx:N skip_element!(cols[_col_idx]) - colinds[colinds_row_idx, _col_idx] = true + errored_values[errored_idx, _col_idx] = true end break end @@ -164,14 +166,13 @@ function ChunkedBase.populate_result_buffer!( (val, tlen, code) = parsecustom!(CT, row_bytes, pos, len, col_idx, cols, options, schema[col_idx]) end if Parsers.sentinel(code) - row_status |= RowStatus.HasColumnIndicators - added_collind_row || (added_collind_row = true; addrows!(colinds)) - @inbounds colinds[colinds_row_idx, col_idx] = true + row_status |= RowStatus.MissingValues + added_missing_row || (added_missing_row = true; addrows!(missing_values)) + @inbounds missing_values[missing_idx, col_idx] = true elseif !Parsers.ok(code) row_status |= RowStatus.ValueParsingError - row_status |= RowStatus.HasColumnIndicators - added_collind_row || (added_collind_row = true; addrows!(colinds)) - @inbounds colinds[colinds_row_idx, col_idx] = true + added_errored_row || (added_errored_row = true; addrows!(errored_values)) + @inbounds errored_values[errored_idx, col_idx] = true end pos += tlen end # for col_idx @@ -179,7 +180,8 @@ function ChunkedBase.populate_result_buffer!( row_status |= RowStatus.TooManyColumns end unsafe_push!(result_buf.row_statuses, row_status) - colinds_row_idx += (row_status & RowStatus.HasColumnIndicators) > 0 + missing_idx += added_missing_row + errored_idx += added_errored_row end # for row_idx return nothing end diff --git a/src/result_buffer.jl b/src/result_buffer.jl index 35a04a5..35e1872 100644 --- a/src/result_buffer.jl +++ b/src/result_buffer.jl @@ -8,16 +8,14 @@ A module implementing a bitflag type used to indicate the status of a row in a `TaskResultBuffer`. - `0x00` -- `Ok`: All fields were parsed successfully. -- `0x01` -- `HasColumnIndicators`: Some fields have missing values. -- `0x02` -- `TooFewColumns`: The row has fewer fields than expected according to the schema. Implies `HasColumnIndicators`. +- `0x01` -- `MissingValues`: Some fields have missing values. +- `0x02` -- `TooFewColumns`: The row has fewer fields than expected according to the schema. Implies `MissingValues`. - `0x04` -- `TooManyColumns`: The row has more fields than expected according to the schema. -- `0x08` -- `ValueParsingError`: Some fields could not be parsed due to an unknown instance of a particular type. Implies `HasColumnIndicators`. -- `0x10` -- `SkippedRow`: The row contains no valid values, e.g. it was a comment. Implies `HasColumnIndicators`. +- `0x08` -- `ValueParsingError`: Some fields could not be parsed due to an unknown instance of a particular type. Implies `MissingValues`. +- `0x10` -- `SkippedRow`: The row contains no valid values, e.g. it was a comment. Implies `MissingValues`. -Multiple flags can be set at the same time, e.g. `HasColumnIndicators | TooFewColumns` means that at least column in the row does not have a known value and that there were not enough fields in this row. -If a row has `HasColumnIndicators` flag set, then the `column_indicators` field of the `TaskResultBuffer` will contain a bitset indicating which columns have missing values. - -Distinguishing which values are missing due (i.e. successfully parsed `sentinel` values) and which failed to parse is currently unsupported, as we assume the integrity of the entire row is required. +Multiple flags can be set at the same time, e.g. `MissingValues | TooFewColumns` means that at least column in the row does not have a known value and that there were not enough fields in this row. +If a row has `MissingValues` flag set, then the `missing_values` field of the `TaskResultBuffer` will contain a bitset indicating which columns have missing values. # See also: - [`TaskResultBuffer`](#TaskResultBuffer) @@ -26,7 +24,7 @@ module RowStatus const T = UInt8 # Type of the row status flags const Ok = 0x00 # All ok - const HasColumnIndicators = 0x01 # Some fields have missing values + const MissingValues = 0x01 # Some fields have missing values const TooFewColumns = 0x02 # Some fields have missing values due field count mismatch with the schema const TooManyColumns = 0x04 # We have a valid record according to schema, but we didn't parse some fields due to missing schema info const ValueParsingError = 0x08 # We couldn't parse some fields because we don't know how to parse that particular instance of that type @@ -34,7 +32,7 @@ module RowStatus # Used in DebugContext const Marks = ('✓', '?', '<', '>', '!', '#') - const Names = ("Ok", "HasColumnIndicators", "TooFewColumns", "TooManyColumns", "ValueParsingError", "SkippedRow") + const Names = ("Ok", "MissingValues", "TooFewColumns", "TooManyColumns", "ValueParsingError", "SkippedRow") const Flags = (0x00, 0x01, 0x02, 0x04, 0x08, 0x10) end @@ -46,7 +44,7 @@ The number of rows in the matrix is equal the number of rows with at least one m The number of columns in the matrix is equal to the number of columns in the results buffer. When consuming a `TaskResultBuffer` it is this recommended to iterate it from start to finish -and note the `RowStatus` for the `HasColumnIndicators` which signals that the row contains missing values. +and note the `RowStatus` for the `MissingValues` which signals that the row contains missing values. Using `ColumnIterator`s is the easiest way to do this. For example: ```julia @@ -128,7 +126,8 @@ Holds the parsed results in columnar buffers. - `id::Int`: The unique identifier of the buffer object, in range of 1 to two times `nworkers` arg to the `parse_file` function. - `cols::Vector{BufferedVector}`: A vector of vectors, each corresponding to a column in the CSV file. Note this field is abstractly typed. - `row_statuses::BufferedVector{RowStatus.T}`: Contains a $(RowStatus.T) status flag for each row. -- `column_indicators::BitSetMatrix`: a special type of `BitMatrix` where each row is a bitset signalling missing column values. Number of rows corresponds to the number of row statuses where `HasColumnIndicators` flag is set. +- `missing_values::BitSetMatrix`: a special type of `BitMatrix` where each row is a bitset signalling missing column values. Number of rows corresponds to the number of row statuses where `MissingValues` or `SkippedRow` flag is set. +- `errored_values::BitSetMatrix`: a special type of `BitMatrix` where each row is a bitset signalling errored column values. Number of rows corresponds to the number of row statuses where `TooFewColumns or `ValueParsingError` flag is set. # Notes - Each column in the `cols` field is a `BufferedVector` of the same type as the corresponding column in the `ParsingContext` schema. @@ -141,31 +140,32 @@ Holds the parsed results in columnar buffers. The following shows the structure of a `TaskResultBuffer` storing results for a messy CSV file which we parsed expecting 3 `Int` columns and while skipping over comments: ``` -+-------------------------+-------------------------------------------------------------------------------+ -| INPUT CSV | TASK_RESULT_BUFFER | -+-------------------------+---------------------------+--------------------+----------+---------+---------+ -| head,er,row | row_statuses | column_indicators | cols[1] | cols[2] | cols[3] | -+-------------------------+---------------------------+--------------------+----------+---------+---------+ -| 1,1,1 | Ok | No value | 1 | 1 | 1 | -| 2,,2 | HasCI | 0 1 0 #=[1,:]=# | 2 | undef | 2 | -| 2,, | HasCI | 0 1 1 #=[2,:]=# | 2 | undef | undef | -| 3,3 | HasCI | TooFewColumns | 0 0 1 #=[3,:]=# | 3 | 3 | undef | -| 3 | HasCI | TooFewColumns | 0 1 1 #=[4,:]=# | 3 | undef | undef | -| 4,4,4,4 | TooManyColumns | No value | 4 | 4 | 4 | -| 4,4,4,4,4 | TooManyColumns | No value | 4 | 4 | 4 | -| garbage,garbage,garbage | HasCI | ValueParsingError | 1 1 1 #=[5,:]=# | undef | undef | undef | -| garbage,5,garbage | HasCI | ValueParsingError | 1 0 1 #=[6,:]=# | undef | 5 | undef | -| garbage,,garbage | HasCI | ValueParsingError | 1 1 1 #=[7,:]=# | undef | undef | undef | -| # comment | HasCI | SkippedRow | 1 1 1 #=[8,:]=# | undef | undef | undef | -+-------------------------+---------------------------+--------------------+----------+---------+---------+ -HasCI = HasColumnIndicators ++-------------------------+------------------------------------------------------------------------------+ +| INPUT CSV | TASK_RESULT_BUFFER | ++-------------------------+--------------------------+----------+----------+---------+---------+---------+ +| head,er,row | row_statuses | missing | errored | cols[1] | cols[2] | cols[3] | ++-------------------------+--------------------------+----------+----------+---------+---------+---------+ +| 1,1,1 | Ok | No value | No value | 1 | 1 | 1 | +| 2,,2 | Miss | 0 1 0 | No value | 2 | undef | 2 | +| 2,, | Miss | 0 1 1 | No value | 2 | undef | undef | +| 3,3 | TooFewColumns | No value | 0 0 1 | 3 | 3 | undef | +| 3 | TooFewColumns | No value | 0 1 1 | 3 | undef | undef | +| 4,4,4,4 | TooManyColumns | No value | No value | 4 | 4 | 4 | +| 4,4,4,4,4 | TooManyColumns | No value | No value | 4 | 4 | 4 | +| garbage,garbage,garbage | ValueParsingError | No value | 1 1 1 | undef | undef | undef | +| garbage,5,garbage | ValueParsingError | No value | 1 0 1 | undef | 5 | undef | +| garbage,,garbage | Miss | ValueParsingError | 0 1 0 | 1 0 1 | undef | undef | undef | +| # comment | Miss | SkippedRow | 1 1 1 | No value | undef | undef | undef | ++-------------------------+--------------------------+----------+----------+---------+---------+---------+ +Miss = MissingValues ``` """ struct TaskResultBuffer <: AbstractResultBuffer id::Int cols::Vector{BufferedVector} row_statuses::BufferedVector{RowStatus.T} - column_indicators::BitSetMatrix + missing_values::BitSetMatrix + errored_values::BitSetMatrix end # Since the chunk size if always <= 2GiB, we can never never overflow a PosLen31 @@ -179,6 +179,7 @@ _translate_to_buffer_type(::Type{T}) where {T} = T TaskResultBuffer(id, schema) = TaskResultBuffer(id, schema, 0) # Assumes `schema` has been `_translate_to_buffer_type`'d function TaskResultBuffer(id, schema::Vector{DataType}, n::Int) + ncols = count(x->x !== Nothing, schema) TaskResultBuffer( id, BufferedVector[ @@ -188,19 +189,22 @@ function TaskResultBuffer(id, schema::Vector{DataType}, n::Int) if T !== Nothing ], BufferedVector{RowStatus.T}(Vector{RowStatus.T}(undef, n), 0), - BitSetMatrix(0, count(x->x !== Nothing, schema)), + BitSetMatrix(0, ncols), + BitSetMatrix(0, ncols), ) end # Assumes `schema` has been `_translate_to_buffer_type`'d function _make_result_buffers(num_buffers::Integer, schema, n) out = Vector{TaskResultBuffer}(undef, num_buffers) + ncols = count(x->x !== Nothing, schema) for i in 1:num_buffers @inbounds out[i] = TaskResultBuffer( i, Vector{BufferedVector}(undef, length(schema)), BufferedVector{RowStatus.T}(Vector{RowStatus.T}(undef, n), 0), - BitSetMatrix(0, count(x->x !== Nothing, schema)), + BitSetMatrix(0, ncols), + BitSetMatrix(0, ncols), ) end for (j, T) in enumerate(schema) @@ -220,7 +224,8 @@ Base.length(buf::TaskResultBuffer) = length(buf.row_statuses) function Base.empty!(buf::TaskResultBuffer) foreach(empty!, buf.cols) empty!(buf.row_statuses) - empty!(buf.column_indicators) + empty!(buf.missing_values) + empty!(buf.errored_values) return nothing end @@ -240,11 +245,12 @@ struct ColumnIterator{T} x::BufferedVector{T} idx::Int statuses::BufferedVector{RowStatus.T} - colinds::BitSetMatrix + missing_values::BitSetMatrix + errored_values::BitSetMatrix end function ColumnIterator{T}(buf::TaskResultBuffer, column_position::Int) where {T} col = (buf.cols[column_position])::BufferedVector{T} - return ColumnIterator{T}(col, column_position, buf.row_statuses, buf.column_indicators) + return ColumnIterator{T}(col, column_position, buf.row_statuses, buf.missing_values, buf.errored_values) end Base.length(itr::ColumnIterator) = length(itr.statuses) @@ -252,23 +258,27 @@ Base.length(itr::ColumnIterator) = length(itr.statuses) struct ParsedField{T} value::T # The parsed value, garbage if `ismissingvalue` is true isinvalidrow::Bool # True if the row didn't match the schema - ismissingvalue::Bool # True if the value was missing or invalid + iserroredvalue::Bool # True if the value is invalid + ismissingvalue::Bool # True if the value is missing end Base.iterate(t::ParsedField, iter=1) = iter > nfields(t) ? nothing : (getfield(t, iter), iter + 1) -function Base.iterate(itr::ColumnIterator{T}, state=(row=1, indicator_idx=0)) where {T} - row, indicator_idx = state.row, state.indicator_idx +function Base.iterate(itr::ColumnIterator{T}, state=(row=1, missing_idx=0, errored_idx=0)) where {T} + row, missing_idx, errored_idx = state.row, state.missing_idx, state.errored_idx if row > length(itr.x) return nothing end s = @inbounds itr.statuses[row] value = @inbounds itr.x[row]::T - isinvalidrow = s > RowStatus.HasColumnIndicators - - has_indicators = (s & RowStatus.HasColumnIndicators) != 0 - indicator_idx += has_indicators - ismissingvalue = has_indicators && @inbounds(itr.colinds[indicator_idx, itr.idx]) + has_errored_values_row = (s & (RowStatus.TooFewColumns | RowStatus.ValueParsingError)) != 0 + isinvalidrow = has_errored_values_row | ((s & RowStatus.TooManyColumns) != 0) + errored_idx += has_errored_values_row + + has_missing_values_row = (s & RowStatus.MissingValues) != 0 # All SkippedRows have the MissingValues tag + missing_idx += has_missing_values_row + ismissingvalue = has_missing_values_row && @inbounds(itr.missing_values[missing_idx, itr.idx]) + iserroredvalue = has_errored_values_row && @inbounds(itr.errored_values[errored_idx, itr.idx]) row += 1 - return ParsedField(value, isinvalidrow, ismissingvalue), (; row, indicator_idx) + return ParsedField(value, isinvalidrow, iserroredvalue, ismissingvalue), (; row, missing_idx, errored_idx) end diff --git a/test/simple_file_parsing.jl b/test/simple_file_parsing.jl index 62c8e49..5f6b674 100644 --- a/test/simple_file_parsing.jl +++ b/test/simple_file_parsing.jl @@ -258,8 +258,8 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results[1].cols[1]) == 1 @test length(testctx.results[1].cols[2]) == 1 @test length(testctx.results[1].cols[3]) == 1 - @test testctx.results[1].row_statuses[1] == RowStatus.HasColumnIndicators - @test vec(collect(testctx.results[1].column_indicators)) == [true, true, true] + @test testctx.results[1].row_statuses[1] == RowStatus.MissingValues + @test vec(collect(testctx.results[1].missing_values)) == [true, true, true] end @testset "char" begin @@ -631,7 +631,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[2][[1, 3]] == [2, 6] @test length(testctx.results) == 1 - @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[1].cols[1]) == 4 @test length(testctx.results[1].cols[2]) == 4 end @@ -663,9 +663,9 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 4 @test testctx.results[1].row_statuses == [RowStatus.Ok] - @test testctx.results[2].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[2].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test testctx.results[3].row_statuses == [RowStatus.Ok] - @test testctx.results[4].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[4].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[2].cols[1]) == 1 @test length(testctx.results[2].cols[2]) == 1 @test length(testctx.results[4].cols[1]) == 1 @@ -698,7 +698,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[2][[1, 3]] == [2, 6] @test length(testctx.results) == 1 - @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[1].cols[1]) == 4 @test length(testctx.results[1].cols[2]) == 4 end @@ -733,9 +733,9 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 4 @test testctx.results[1].row_statuses == [RowStatus.Ok] - @test testctx.results[2].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[2].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test testctx.results[3].row_statuses == [RowStatus.Ok] - @test testctx.results[4].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[4].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[2].cols[1]) == 1 @test length(testctx.results[2].cols[2]) == 1 @test length(testctx.results[4].cols[1]) == 1 @@ -771,7 +771,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[2][[1, 3]] == [2, 6] @test length(testctx.results) == 1 - @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[1].row_statuses == [RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow, RowStatus.Ok, RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[1].cols[1]) == 4 @test length(testctx.results[1].cols[2]) == 4 end @@ -808,9 +808,9 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 4 @test testctx.results[1].row_statuses == [RowStatus.Ok] - @test testctx.results[2].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[2].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test testctx.results[3].row_statuses == [RowStatus.Ok] - @test testctx.results[4].row_statuses == [RowStatus.HasColumnIndicators | RowStatus.SkippedRow] + @test testctx.results[4].row_statuses == [RowStatus.MissingValues | RowStatus.SkippedRow] @test length(testctx.results[2].cols[1]) == 1 @test length(testctx.results[2].cols[2]) == 1 @test length(testctx.results[4].cols[1]) == 1 @@ -1074,28 +1074,28 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial testctx = TestContext() parse_file(io_t("a,b,c\n\n"), nothing, testctx, force=alg, ignoreemptyrows=false) @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.TooFewColumns | RowStatus.HasColumnIndicators] + @test testctx.results[1].row_statuses == [RowStatus.TooFewColumns] testctx = TestContext() parse_file(io_t("a,b,c\n\n"), nothing, testctx, force=alg, ignoreemptyrows=true) @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues] testctx = TestContext() parse_file(io_t("a,b,c\r\r"), nothing, testctx, force=alg, ignoreemptyrows=true, newlinechar='\r') @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues] testctx = TestContext() parse_file(io_t("a,b,c\r\n\r\n"), nothing, testctx, force=alg, ignoreemptyrows=true, newlinechar='\n') @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues] testctx = TestContext() parse_file(io_t("a,b,c\r\n\r\n1,2,3\r\n4,5,6"), [Int,Int,Int], testctx, force=alg, ignoreemptyrows=true) @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators, RowStatus.Ok, RowStatus.Ok] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues, RowStatus.Ok, RowStatus.Ok] @test testctx.results[1].cols[1][2:3] == [1, 4] @test testctx.results[1].cols[2][2:3] == [2, 5] @test testctx.results[1].cols[3][2:3] == [3, 6] @@ -1103,7 +1103,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial testctx = TestContext() parse_file(io_t("a,b,c\n\n1,2,3\n4,5,6"), [Int,Int,Int], testctx, force=alg, ignoreemptyrows=true) @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators, RowStatus.Ok, RowStatus.Ok] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues, RowStatus.Ok, RowStatus.Ok] @test testctx.results[1].cols[1][2:3] == [1, 4] @test testctx.results[1].cols[2][2:3] == [2, 5] @test testctx.results[1].cols[3][2:3] == [3, 6] @@ -1111,7 +1111,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial testctx = TestContext() parse_file(io_t("a,b,c\r\r1,2,3\r4,5,6"), [Int,Int,Int], testctx, force=alg, ignoreemptyrows=true, newlinechar='\r') @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators, RowStatus.Ok, RowStatus.Ok] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues, RowStatus.Ok, RowStatus.Ok] @test testctx.results[1].cols[1][2:3] == [1, 4] @test testctx.results[1].cols[2][2:3] == [2, 5] @test testctx.results[1].cols[3][2:3] == [3, 6] @@ -1119,7 +1119,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial testctx = TestContext() parse_file(io_t("a,b,c\r\n\r\n1,2,3\r\n\r\n4,5,6"), [Int,Int,Int], testctx, force=alg, ignoreemptyrows=true) @test testctx.header == [:a, :b, :c] - @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.HasColumnIndicators, RowStatus.Ok, RowStatus.SkippedRow | RowStatus.HasColumnIndicators, RowStatus.Ok] + @test testctx.results[1].row_statuses == [RowStatus.SkippedRow | RowStatus.MissingValues, RowStatus.Ok, RowStatus.SkippedRow | RowStatus.MissingValues, RowStatus.Ok] @test testctx.results[1].cols[1][[2,4]] == [1, 4] @test testctx.results[1].cols[2][[2,4]] == [2, 5] @test testctx.results[1].cols[3][[2,4]] == [3, 6] @@ -1447,7 +1447,7 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:1] == [Parsers.PosLen31(7, 3)] @test testctx.strings[1][1][1:1] == ["foo"] @test testctx.results[1].row_statuses[1] & ChunkedCSV.RowStatus.ValueParsingError > 0 - @test testctx.results[1].column_indicators[1, 2] + @test testctx.results[1].errored_values[1, 2] @test length(testctx.results[1].cols[1]) == 1 @test length(testctx.results[1].cols[2]) == 1 end @@ -1471,8 +1471,8 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 1 @test testctx.results[1].cols[1][1:4] == 0:3 @test testctx.results[1].cols[2][1:3] == 1:3 - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] end @testset "decimals" begin @@ -1493,8 +1493,8 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 1 @test testctx.results[1].cols[1][1:4] == 0:3 @test testctx.results[1].cols[2][1:3] == fill(FixedDecimal{Int,4}(1), 3) - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] end @testset "guess datetimes" begin @@ -1518,8 +1518,8 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test length(testctx.results) == 1 @test testctx.results[1].cols[1][1:7] == 0:6 @test testctx.results[1].cols[2][1:6] == fill(DateTime(1969, 7, 20), 6) - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] end end @@ -2080,14 +2080,14 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:2] == [1,3] @test testctx.results[1].cols[2][2] == 4 @test testctx.results[1].cols[3][1] == 3 - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 1] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[3, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 1] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[3, 3] @test length(testctx.results[1].cols[1]) == 3 @test length(testctx.results[1].cols[2]) == 3 @test length(testctx.results[1].cols[3]) == 3 @@ -2111,14 +2111,14 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:2] == ['a','c'] @test testctx.results[1].cols[2][2] == 'd' @test testctx.results[1].cols[3][1] == 'b' - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 1] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[3, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 1] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[3, 3] @test length(testctx.results[1].cols[1]) == 3 @test length(testctx.results[1].cols[2]) == 3 @test length(testctx.results[1].cols[3]) == 3 @@ -2146,22 +2146,22 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:6] == [FixedDecimal{Int32,1}(1.0),FixedDecimal{Int32,1}(0.3),FixedDecimal{Int32,1}(0),FixedDecimal{Int32,1}(0),FixedDecimal{Int32,1}(0),FixedDecimal{Int32,1}(0)] @test testctx.results[1].cols[2][[2,4,6]] == [FixedDecimal{UInt32,2}(40), FixedDecimal{UInt32,2}(0), FixedDecimal{UInt32,2}(0)] @test testctx.results[1].cols[3][[1,3,5]] == [FixedDecimal{Int64,3}(30), FixedDecimal{Int64,3}(0), FixedDecimal{Int64,3}(0)] - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[4, 3] - @test testctx.results[1].column_indicators[5, 2] - @test testctx.results[1].column_indicators[6, 3] - @test testctx.results[1].column_indicators[7, 1] - @test testctx.results[1].column_indicators[7, 2] - @test testctx.results[1].column_indicators[7, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[4, 3] + @test testctx.results[1].missing_values[5, 2] + @test testctx.results[1].missing_values[6, 3] + @test testctx.results[1].missing_values[7, 1] + @test testctx.results[1].missing_values[7, 2] + @test testctx.results[1].missing_values[7, 3] @test length(testctx.results[1].cols[1]) == 7 @test length(testctx.results[1].cols[2]) == 7 @test length(testctx.results[1].cols[3]) == 7 @@ -2189,22 +2189,22 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:6] == [DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4)] @test testctx.results[1].cols[2][[2, 4, 6]] == [DateTime(1990, 3, 6), DateTime(1990, 3, 6), DateTime(1990, 3, 6)] @test testctx.results[1].cols[3][[1, 3, 5]] == [DateTime(1990, 3, 5), DateTime(1990, 3, 5), DateTime(1990, 3, 5)] - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[4, 3] - @test testctx.results[1].column_indicators[5, 2] - @test testctx.results[1].column_indicators[6, 3] - @test testctx.results[1].column_indicators[7, 1] - @test testctx.results[1].column_indicators[7, 2] - @test testctx.results[1].column_indicators[7, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[4, 3] + @test testctx.results[1].missing_values[5, 2] + @test testctx.results[1].missing_values[6, 3] + @test testctx.results[1].missing_values[7, 1] + @test testctx.results[1].missing_values[7, 2] + @test testctx.results[1].missing_values[7, 3] @test length(testctx.results[1].cols[1]) == 7 @test length(testctx.results[1].cols[2]) == 7 @test length(testctx.results[1].cols[3]) == 7 @@ -2232,22 +2232,22 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:6] == [DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4), DateTime(1990, 3, 4)] @test testctx.results[1].cols[2][[2, 4, 6]] == [DateTime(1990, 3, 6), DateTime(1990, 3, 6), DateTime(1990, 3, 6)] @test testctx.results[1].cols[3][[1, 3, 5]] == [DateTime(1990, 3, 5), DateTime(1990, 3, 5), DateTime(1990, 3, 5)] - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[4, 3] - @test testctx.results[1].column_indicators[5, 2] - @test testctx.results[1].column_indicators[6, 3] - @test testctx.results[1].column_indicators[7, 1] - @test testctx.results[1].column_indicators[7, 2] - @test testctx.results[1].column_indicators[7, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[4, 3] + @test testctx.results[1].missing_values[5, 2] + @test testctx.results[1].missing_values[6, 3] + @test testctx.results[1].missing_values[7, 1] + @test testctx.results[1].missing_values[7, 2] + @test testctx.results[1].missing_values[7, 3] @test length(testctx.results[1].cols[1]) == 7 @test length(testctx.results[1].cols[2]) == 7 @test length(testctx.results[1].cols[3]) == 7 @@ -2277,22 +2277,22 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:6] == [s, s, s, s, s, s] @test testctx.results[1].cols[2][[2, 4, 6]] == [s, s, s] @test testctx.results[1].cols[3][[1, 3, 5]] == [s, s, s] - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[4, 3] - @test testctx.results[1].column_indicators[5, 2] - @test testctx.results[1].column_indicators[6, 3] - @test testctx.results[1].column_indicators[7, 1] - @test testctx.results[1].column_indicators[7, 2] - @test testctx.results[1].column_indicators[7, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[4, 3] + @test testctx.results[1].missing_values[5, 2] + @test testctx.results[1].missing_values[6, 3] + @test testctx.results[1].missing_values[7, 1] + @test testctx.results[1].missing_values[7, 2] + @test testctx.results[1].missing_values[7, 3] @test length(testctx.results[1].cols[1]) == 7 @test length(testctx.results[1].cols[2]) == 7 @test length(testctx.results[1].cols[3]) == 7 @@ -2321,22 +2321,22 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:6] == [u, u, u, u, u, u] @test testctx.results[1].cols[2][[2, 4, 6]] == [u, u, u] @test testctx.results[1].cols[3][[1, 3, 5]] == [u, u, u] - @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.HasColumnIndicators - @test testctx.results[1].column_indicators[1, 2] - @test testctx.results[1].column_indicators[2, 3] - @test testctx.results[1].column_indicators[3, 2] - @test testctx.results[1].column_indicators[4, 3] - @test testctx.results[1].column_indicators[5, 2] - @test testctx.results[1].column_indicators[6, 3] - @test testctx.results[1].column_indicators[7, 1] - @test testctx.results[1].column_indicators[7, 2] - @test testctx.results[1].column_indicators[7, 3] + @test testctx.results[1].row_statuses[1] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[2] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[3] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[4] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[5] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[6] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].row_statuses[7] == ChunkedCSV.RowStatus.MissingValues + @test testctx.results[1].missing_values[1, 2] + @test testctx.results[1].missing_values[2, 3] + @test testctx.results[1].missing_values[3, 2] + @test testctx.results[1].missing_values[4, 3] + @test testctx.results[1].missing_values[5, 2] + @test testctx.results[1].missing_values[6, 3] + @test testctx.results[1].missing_values[7, 1] + @test testctx.results[1].missing_values[7, 2] + @test testctx.results[1].missing_values[7, 3] @test length(testctx.results[1].cols[1]) == 7 @test length(testctx.results[1].cols[2]) == 7 @test length(testctx.results[1].cols[3]) == 7 @@ -2397,25 +2397,26 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[2][[1, 4, 6, 7, 9]] == [1, 3, 4, 4, 5] @test testctx.results[1].cols[3][[1, 2, 6, 7]] == [1, 2, 4, 4] @test testctx.results[1].row_statuses[1] == RS.Ok - @test testctx.results[1].row_statuses[2] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == RS.HasColumnIndicators | RS.TooFewColumns - @test testctx.results[1].row_statuses[5] == RS.HasColumnIndicators | RS.TooFewColumns + @test testctx.results[1].row_statuses[2] == RS.MissingValues + @test testctx.results[1].row_statuses[3] == RS.MissingValues + @test testctx.results[1].row_statuses[4] == RS.TooFewColumns + @test testctx.results[1].row_statuses[5] == RS.TooFewColumns @test testctx.results[1].row_statuses[6] == RS.TooManyColumns @test testctx.results[1].row_statuses[7] == RS.TooManyColumns - @test testctx.results[1].row_statuses[8] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[9] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[10] == RS.HasColumnIndicators | RS.SkippedRow - @test testctx.results[1].row_statuses[11] == RS.HasColumnIndicators - colinds = collect(testctx.results[1].column_indicators) - @test colinds[1, 2] # 2,,2 - @test colinds[2,:] == [false, true, true] # 2,, - @test colinds[3,:] == [false, false, true] # 3,3 - @test colinds[4,:] == [false, true, true] # 3 - @test all(colinds[5,:]) # garbage,garbage,garbage - @test colinds[6,:] == [true, false, true] # garbage,1,garbage - @test all(colinds[7,:]) # # comment - @test all(colinds[8,:]) # ,, + @test testctx.results[1].row_statuses[8] == RS.ValueParsingError + @test testctx.results[1].row_statuses[9] == RS.ValueParsingError + @test testctx.results[1].row_statuses[10] == RS.MissingValues | RS.SkippedRow + @test testctx.results[1].row_statuses[11] == RS.MissingValues + missing_values = collect(testctx.results[1].missing_values) + @test missing_values[1, 2] # 2,,2 + @test missing_values[2,:] == [false, true, true] # 2,, + @test all(missing_values[3,:]) # # comment + @test all(missing_values[4,:]) # ,, + errored_values = collect(testctx.results[1].errored_values) + @test errored_values[1,:] == [false, false, true] # 3,3 + @test errored_values[2,:] == [false, true, true] # 3 + @test all(errored_values[3,:]) # garbage,garbage,garbage + @test errored_values[4,:] == [true, false, true] # garbage,1,garbage end end @@ -2448,25 +2449,26 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][[1, 4, 6, 7, 9]] == [1, 3, 4, 4, 5] @test testctx.results[1].cols[2][[1, 2, 6, 7]] == [1, 2, 4, 4] @test testctx.results[1].row_statuses[1] == RS.Ok - @test testctx.results[1].row_statuses[2] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == RS.HasColumnIndicators | RS.TooFewColumns - @test testctx.results[1].row_statuses[5] == RS.HasColumnIndicators | RS.TooFewColumns + @test testctx.results[1].row_statuses[2] == RS.MissingValues + @test testctx.results[1].row_statuses[3] == RS.MissingValues + @test testctx.results[1].row_statuses[4] == RS.TooFewColumns + @test testctx.results[1].row_statuses[5] == RS.TooFewColumns @test testctx.results[1].row_statuses[6] == RS.TooManyColumns @test testctx.results[1].row_statuses[7] == RS.TooManyColumns - @test testctx.results[1].row_statuses[8] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[9] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[10] == RS.HasColumnIndicators | RS.SkippedRow - @test testctx.results[1].row_statuses[11] == RS.HasColumnIndicators - colinds = collect(testctx.results[1].column_indicators) - @test colinds[1,:] == [true, false] # 2,,2 - @test colinds[2,:] == [true,true] # 2,, - @test colinds[3,:] == [false,true] # 3,3 - @test colinds[4,:] == [true,true] # 3 - @test colinds[5,:] == [true,true] # garbage,garbage,garbage - @test colinds[6,:] == [false,true] # garbage,1,garbage - @test colinds[7,:] == [true,true] # # comment - @test colinds[8,:] == [true,true] # ,, + @test testctx.results[1].row_statuses[8] == RS.ValueParsingError + @test testctx.results[1].row_statuses[9] == RS.ValueParsingError + @test testctx.results[1].row_statuses[10] == RS.MissingValues | RS.SkippedRow + @test testctx.results[1].row_statuses[11] == RS.MissingValues + missing_values = collect(testctx.results[1].missing_values) + @test missing_values[1,:] == [true, false] # 2,,2 + @test missing_values[2,:] == [true,true] # 2,, + @test missing_values[3,:] == [true,true] # # comment + @test missing_values[4,:] == [true,true] # ,, + errored_values = collect(testctx.results[1].errored_values) + @test errored_values[1,:] == [false,true] # 3,3 + @test errored_values[2,:] == [true,true] # 3 + @test errored_values[3,:] == [true,true] # garbage,garbage,garbage + @test errored_values[4,:] == [false,true] # garbage,1,garbage RS = ChunkedCSV.RowStatus testctx = TestContext() @@ -2495,23 +2497,24 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[2][[1, 2, 6, 7]] == [1, 2, 4, 4] @test testctx.results[1].row_statuses[1] == RS.Ok @test testctx.results[1].row_statuses[2] == RS.Ok - @test testctx.results[1].row_statuses[3] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[4] == RS.HasColumnIndicators | RS.TooFewColumns - @test testctx.results[1].row_statuses[5] == RS.HasColumnIndicators | RS.TooFewColumns + @test testctx.results[1].row_statuses[3] == RS.MissingValues + @test testctx.results[1].row_statuses[4] == RS.TooFewColumns + @test testctx.results[1].row_statuses[5] == RS.TooFewColumns @test testctx.results[1].row_statuses[6] == RS.TooManyColumns @test testctx.results[1].row_statuses[7] == RS.TooManyColumns - @test testctx.results[1].row_statuses[8] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[9] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[10] == RS.HasColumnIndicators | RS.SkippedRow - @test testctx.results[1].row_statuses[11] == RS.HasColumnIndicators - colinds = collect(testctx.results[1].column_indicators) - @test colinds[1,:] == [false, true] # 2,, - @test colinds[2,:] == [false, true] # 3,3 - @test colinds[3,:] == [false, true] # 3 - @test colinds[4,:] == [true, true] # garbage,garbage,garbage - @test colinds[5,:] == [true, true] # garbage,1,garbage - @test colinds[6,:] == [true, true] # # comment - @test colinds[7,:] == [true, true] # ,, + @test testctx.results[1].row_statuses[8] == RS.ValueParsingError + @test testctx.results[1].row_statuses[9] == RS.ValueParsingError + @test testctx.results[1].row_statuses[10] == RS.MissingValues | RS.SkippedRow + @test testctx.results[1].row_statuses[11] == RS.MissingValues + missing_values = collect(testctx.results[1].missing_values) + @test missing_values[1,:] == [false, true] # 2,, + @test missing_values[2,:] == [true, true] # # comment + @test missing_values[3,:] == [true, true] # ,, + errored_values = collect(testctx.results[1].errored_values) + @test errored_values[1,:] == [false, true] # 3,3 + @test errored_values[2,:] == [false, true] # 3 + @test errored_values[3,:] == [true, true] # garbage,garbage,garbage + @test errored_values[4,:] == [true, true] # garbage,1,garbage RS = ChunkedCSV.RowStatus testctx = TestContext() @@ -2539,24 +2542,25 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].cols[1][1:7] == [1, 2, 2, 3, 3, 4, 4] @test testctx.results[1].cols[2][[1, 4, 6, 7, 9]] == [1, 3, 4, 4, 5] @test testctx.results[1].row_statuses[1] == RS.Ok - @test testctx.results[1].row_statuses[2] == RS.HasColumnIndicators - @test testctx.results[1].row_statuses[3] == RS.HasColumnIndicators + @test testctx.results[1].row_statuses[2] == RS.MissingValues + @test testctx.results[1].row_statuses[3] == RS.MissingValues @test testctx.results[1].row_statuses[4] == RS.Ok - @test testctx.results[1].row_statuses[5] == RS.HasColumnIndicators | RS.TooFewColumns + @test testctx.results[1].row_statuses[5] == RS.TooFewColumns @test testctx.results[1].row_statuses[6] == RS.TooManyColumns @test testctx.results[1].row_statuses[7] == RS.TooManyColumns - @test testctx.results[1].row_statuses[8] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[9] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[10] == RS.HasColumnIndicators | RS.SkippedRow - @test testctx.results[1].row_statuses[11] == RS.HasColumnIndicators - colinds = collect(testctx.results[1].column_indicators) - @test colinds[1,:] == [false, true] # 2,, - @test colinds[2,:] == [false, true] # 2,,2 - @test colinds[3,:] == [false, true] # 3 - @test colinds[4,:] == [true, true] # garbage,garbage,garbage - @test colinds[5,:] == [true, false] # garbage,1,garbage - @test colinds[6,:] == [true, true] # # comment - @test colinds[7,:] == [true, true] # ,, + @test testctx.results[1].row_statuses[8] == RS.ValueParsingError + @test testctx.results[1].row_statuses[9] == RS.ValueParsingError + @test testctx.results[1].row_statuses[10] == RS.MissingValues | RS.SkippedRow + @test testctx.results[1].row_statuses[11] == RS.MissingValues + missing_values = collect(testctx.results[1].missing_values) + @test missing_values[1,:] == [false, true] # 2,, + @test missing_values[2,:] == [false, true] # 2,,2 + @test missing_values[3,:] == [true, true] # # comment + @test missing_values[4,:] == [true, true] # ,, + errored_values = collect(testctx.results[1].errored_values) + @test errored_values[1,:] == [false, true] # 3 + @test errored_values[2,:] == [true, true] # garbage,garbage,garbage + @test errored_values[3,:] == [true, false] # garbage,1,garbage RS = ChunkedCSV.RowStatus testctx = TestContext() @@ -2589,15 +2593,16 @@ for (io_t, alg) in Iterators.product((iobuffer, iostream, gzip_stream), (:serial @test testctx.results[1].row_statuses[5] == RS.Ok @test testctx.results[1].row_statuses[6] == RS.TooManyColumns @test testctx.results[1].row_statuses[7] == RS.TooManyColumns - @test testctx.results[1].row_statuses[8] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[9] == RS.HasColumnIndicators | RS.ValueParsingError - @test testctx.results[1].row_statuses[10] == RS.HasColumnIndicators | RS.SkippedRow - @test testctx.results[1].row_statuses[11] == RS.HasColumnIndicators - colinds = collect(testctx.results[1].column_indicators) - @test colinds[1,:] == [true] # garbage,garbage,garbage - @test colinds[2,:] == [true] # garbage,1,garbage - @test colinds[3,:] == [true] # # comment - @test colinds[4,:] == [true] # ,, + @test testctx.results[1].row_statuses[8] == RS.ValueParsingError + @test testctx.results[1].row_statuses[9] == RS.ValueParsingError + @test testctx.results[1].row_statuses[10] == RS.MissingValues | RS.SkippedRow + @test testctx.results[1].row_statuses[11] == RS.MissingValues + missing_values = collect(testctx.results[1].missing_values) + @test missing_values[1,:] == [true] # # comment + @test missing_values[2,:] == [true] # ,, + errored_values = collect(testctx.results[1].errored_values) + @test errored_values[1,:] == [true] # garbage,garbage,garbage + @test errored_values[2,:] == [true] # garbage,1,garbage end end end # for (io_t, alg) diff --git a/test/task_result_buffer_tests.jl b/test/task_result_buffer_tests.jl index 7987446..05ca91e 100644 --- a/test/task_result_buffer_tests.jl +++ b/test/task_result_buffer_tests.jl @@ -21,7 +21,8 @@ end @test buf.cols[2] isa ChunkedCSV.BufferedVector{Float64} @test buf.cols[3] isa ChunkedCSV.BufferedVector{Parsers.PosLen31} @test buf.row_statuses isa ChunkedCSV.BufferedVector{ChunkedCSV.RowStatus.T} - @test buf.column_indicators isa ChunkedCSV.BitSetMatrix + @test buf.missing_values isa ChunkedCSV.BitSetMatrix + @test buf.errored_values isa ChunkedCSV.BitSetMatrix @test length(buf.cols) == 3 @test length(buf.row_statuses) == 0 @test length(buf.row_statuses.elements) == 10 @@ -31,7 +32,8 @@ end @test length(buf.cols[2].elements) == 10 @test length(buf.cols[3]) == 0 @test length(buf.cols[3].elements) == 10 - @test size(buf.column_indicators) == (0, 3) + @test size(buf.missing_values) == (0, 3) + @test size(buf.errored_values) == (0, 3) buf = ChunkedCSV.TaskResultBuffer(1, [Int, Float64, Parsers.PosLen31]) @test buf.id == 1 @@ -40,7 +42,8 @@ end @test buf.cols[2] isa ChunkedCSV.BufferedVector{Float64} @test buf.cols[3] isa ChunkedCSV.BufferedVector{Parsers.PosLen31} @test buf.row_statuses isa ChunkedCSV.BufferedVector{ChunkedCSV.RowStatus.T} - @test buf.column_indicators isa ChunkedCSV.BitSetMatrix + @test buf.missing_values isa ChunkedCSV.BitSetMatrix + @test buf.errored_values isa ChunkedCSV.BitSetMatrix @test length(buf.cols) == 3 @test length(buf.row_statuses) == 0 @test length(buf.row_statuses.elements) == 0 @@ -50,7 +53,8 @@ end @test length(buf.cols[2].elements) == 0 @test length(buf.cols[3]) == 0 @test length(buf.cols[3].elements) == 0 - @test size(buf.column_indicators) == (0, 3) + @test size(buf.missing_values) == (0, 3) + @test size(buf.errored_values) == (0, 3) bufs = ChunkedCSV._make_result_buffers(2, [Int, Float64, Parsers.PosLen31], 3) @test length(bufs) == 2 @@ -60,7 +64,8 @@ end @test bufs[1].cols[2] isa ChunkedCSV.BufferedVector{Float64} @test bufs[1].cols[3] isa ChunkedCSV.BufferedVector{Parsers.PosLen31} @test bufs[1].row_statuses isa ChunkedCSV.BufferedVector{ChunkedCSV.RowStatus.T} - @test bufs[1].column_indicators isa ChunkedCSV.BitSetMatrix + @test bufs[1].missing_values isa ChunkedCSV.BitSetMatrix + @test bufs[1].errored_values isa ChunkedCSV.BitSetMatrix @test length(bufs[1].cols) == 3 @test length(bufs[1].row_statuses) == 0 @test length(bufs[1].row_statuses.elements) == 3 @@ -70,14 +75,16 @@ end @test length(bufs[1].cols[2].elements) == 3 @test length(bufs[1].cols[3]) == 0 @test length(bufs[1].cols[3].elements) == 3 - @test size(bufs[1].column_indicators) == (0, 3) + @test size(bufs[1].missing_values) == (0, 3) + @test size(bufs[1].errored_values) == (0, 3) @test bufs[2].id == 2 @test bufs[2].cols isa Vector{ChunkedCSV.BufferedVector} @test bufs[2].cols[1] isa ChunkedCSV.BufferedVector{Int} @test bufs[2].cols[2] isa ChunkedCSV.BufferedVector{Float64} @test bufs[2].cols[3] isa ChunkedCSV.BufferedVector{Parsers.PosLen31} @test bufs[2].row_statuses isa ChunkedCSV.BufferedVector{ChunkedCSV.RowStatus.T} - @test bufs[2].column_indicators isa ChunkedCSV.BitSetMatrix + @test bufs[2].missing_values isa ChunkedCSV.BitSetMatrix + @test bufs[2].errored_values isa ChunkedCSV.BitSetMatrix @test length(bufs[2].cols) == 3 @test length(bufs[2].row_statuses) == 0 @test length(bufs[2].row_statuses.elements) == 3 @@ -87,7 +94,8 @@ end @test length(bufs[2].cols[2].elements) == 3 @test length(bufs[2].cols[3]) == 0 @test length(bufs[2].cols[3].elements) == 3 - @test size(bufs[2].column_indicators) == (0, 3) + @test size(bufs[2].missing_values) == (0, 3) + @test size(bufs[2].errored_values) == (0, 3) end @testset "TaskResultBuffer empty! and ensureroom" begin @@ -96,7 +104,8 @@ end push!(buf.cols[2], 1.0) push!(buf.cols[3], Parsers.PosLen31(1, 1)) push!(buf.row_statuses, ChunkedCSV.RowStatus.Ok) - ChunkedCSV.addrows!(buf.column_indicators, 1) + ChunkedCSV.addrows!(buf.missing_values, 1) + ChunkedCSV.addrows!(buf.errored_values, 1) @test length(buf.cols[1]) == 1 @test length(buf.cols[1].elements) == 10 @@ -107,7 +116,8 @@ end @test length(buf) == 1 @test length(buf.row_statuses) == 1 @test length(buf.row_statuses.elements) == 10 - @test size(buf.column_indicators) == (1, 3) + @test size(buf.missing_values) == (1, 3) + @test size(buf.errored_values) == (1, 3) ChunkedCSV.empty!(buf) @test length(buf.cols[1]) == 0 @test length(buf.cols[1].elements) == 10 @@ -118,7 +128,8 @@ end @test length(buf) == 0 @test length(buf.row_statuses) == 0 @test length(buf.row_statuses.elements) == 10 - @test size(buf.column_indicators) == (0, 3) + @test size(buf.missing_values) == (0, 3) + @test size(buf.errored_values) == (0, 3) Base.ensureroom(buf, 20) @test length(buf.cols[1]) == 0 @test length(buf.cols[1].elements) == 20 @@ -128,33 +139,63 @@ end @test length(buf.cols[3].elements) == 20 @test length(buf.row_statuses) == 0 @test length(buf.row_statuses.elements) == 20 - @test size(buf.column_indicators) == (0, 3) + @test size(buf.missing_values) == (0, 3) + @test size(buf.errored_values) == (0, 3) end @testset "ColumnIterator" begin buf = ChunkedCSV.TaskResultBuffer(1, [Int, Float64], 10) - for i in 1:4; push!(buf.cols[1], i); end - for i in 1:4; push!(buf.cols[2], Float64(i)); end + + # +------------------------------------------------------------------+ + # | TASK_RESULT_BUFFER | + # +--------------------------+---------+---------+---------+---------+ + # | row_statuses | missing | errored | cols[1] | cols[2] | + # +--------------------------+---------+---------+---------+---------+ + # | Ok | --- | --- | 1 | 1.0 | + # | Miss | 1 0 | --- | undef | 2.0 | + # | TooManyColumns | --- | --- | 3 | 3.0 | + # | Miss | ValueParsingError | 0 1 | 1 0 | undef | undef | + # | Miss | TooFewColumns | 1 0 | 0 1 | undef | undef | + # | Miss | SkippedRow | 1 1 | --- | undef | undef | + # +--------------------------+---------+---------+---------+---------+ + for i in 1:6; push!(buf.cols[1], i); end + for i in 1:6; push!(buf.cols[2], Float64(i)); end push!(buf.row_statuses, ChunkedCSV.RowStatus.Ok) - push!(buf.row_statuses, ChunkedCSV.RowStatus.HasColumnIndicators) + push!(buf.row_statuses, ChunkedCSV.RowStatus.MissingValues) push!(buf.row_statuses, ChunkedCSV.RowStatus.TooManyColumns) - push!(buf.row_statuses, ChunkedCSV.RowStatus.HasColumnIndicators | ChunkedCSV.RowStatus.ValueParsingError) - ChunkedCSV.addrows!(buf.column_indicators, 2) - buf.column_indicators[1, 1] = true - buf.column_indicators[2, 2] = true + push!(buf.row_statuses, ChunkedCSV.RowStatus.MissingValues | ChunkedCSV.RowStatus.ValueParsingError) + push!(buf.row_statuses, ChunkedCSV.RowStatus.MissingValues | ChunkedCSV.RowStatus.TooFewColumns) + push!(buf.row_statuses, ChunkedCSV.RowStatus.MissingValues | ChunkedCSV.RowStatus.SkippedRow) + ChunkedCSV.addrows!(buf.missing_values, 4) + ChunkedCSV.addrows!(buf.errored_values, 2) + + buf.missing_values[1, 1] = true # ~ data row 2, col 1, "Miss" + buf.missing_values[2, 2] = true # ~ data row 4, col 2, "Miss | ValueParsingError" + buf.missing_values[3, 1] = true # ~ data row 5, col 1, "Miss | TooFewColumns" + buf.missing_values[4, 1] = true # ~ data row 6, col 1, "Miss | SkippedRow" + buf.missing_values[4, 2] = true # ~ data row 6, col 2, "Miss | SkippedRow" + + buf.errored_values[1, 1] = true # ~ data row 4, col 1, "Miss | ValueParsingError" + buf.errored_values[2, 2] = true # ~ data row 5, col 2, "Miss | TooFewColumns" iter_data = collect(ChunkedCSV.ColumnIterator{Int}(buf, 1)) - @test iter_data[1] == ChunkedCSV.ParsedField(1, false, false) - @test iter_data[2] == ChunkedCSV.ParsedField(2, false, true) - @test iter_data[3] == ChunkedCSV.ParsedField(3, true, false) - @test iter_data[4] == ChunkedCSV.ParsedField(4, true, false) + # val, errrow, errval, missval + @test iter_data[1] == ChunkedCSV.ParsedField(1, false, false, false) + @test iter_data[2] == ChunkedCSV.ParsedField(2, false, false, true) + @test iter_data[3] == ChunkedCSV.ParsedField(3, true, false, false) + @test iter_data[4] == ChunkedCSV.ParsedField(4, true, true, false) + @test iter_data[5] == ChunkedCSV.ParsedField(5, true, false, true) + @test iter_data[6] == ChunkedCSV.ParsedField(6, false, false, true) iter_data = collect(ChunkedCSV.ColumnIterator{Float64}(buf, 2)) - @test iter_data[1] == ChunkedCSV.ParsedField(1.0, false, false) - @test iter_data[2] == ChunkedCSV.ParsedField(2.0, false, false) - @test iter_data[3] == ChunkedCSV.ParsedField(3.0, true, false) - @test iter_data[4] == ChunkedCSV.ParsedField(4.0, true, true) + # val, errrow, errval, missval + @test iter_data[1] == ChunkedCSV.ParsedField(1.0, false, false, false) + @test iter_data[2] == ChunkedCSV.ParsedField(2.0, false, false, false) + @test iter_data[3] == ChunkedCSV.ParsedField(3.0, true, false, false) + @test iter_data[4] == ChunkedCSV.ParsedField(4.0, true, false, true) + @test iter_data[5] == ChunkedCSV.ParsedField(5.0, true, true, false) + @test iter_data[6] == ChunkedCSV.ParsedField(6.0, false, false, true) end