Skip to content

Commit 40498f3

Browse files
authored
Merge pull request #13 from whatsthecraic/non-text-bytes-in-the-header
Non text bytes in the header
2 parents a7837ec + 1bde0d6 commit 40498f3

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

src/init_parsing.jl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,14 @@ function process_header_and_schema_and_finish_row_skip!(
9797
elseif !Parsers.ok(code)
9898
throw(HeaderParsingError("Error parsing header for column $i at $(lines_skipped_total+1):$(pos) (row:pos)."))
9999
else
100-
push!(parsing_ctx.header, Symbol(strip(Parsers.getstring(row_bytes, val, options.e))))
100+
identifier_s = strip(Parsers.getstring(row_bytes, val, options.e))
101+
try
102+
push!(parsing_ctx.header, Symbol(identifier_s))
103+
catch
104+
# defensively truncate identifier_s to 2k characters in case something is very cursed
105+
throw(HeaderParsingError("Error parsing header for column $i ('$(first(identifier_s, 2000))') at " *
106+
"$(lines_skipped_total+1):$pos (row:pos): presence of invalid non text bytes in the CSV snippet"))
107+
end
101108
end
102109
pos += tlen
103110
end
@@ -166,7 +173,14 @@ function process_header_and_schema_and_finish_row_skip!(
166173
elseif !Parsers.ok(code)
167174
throw(HeaderParsingError("Error parsing header for column $i at $(lines_skipped_total+1):$pos (row:pos)."))
168175
else
169-
push!(parsing_ctx.header, Symbol(strip(Parsers.getstring(row_bytes, val, options.e))))
176+
identifier_s = strip(Parsers.getstring(row_bytes, val, options.e))
177+
try
178+
push!(parsing_ctx.header, Symbol(identifier_s))
179+
catch
180+
# defensively truncate identifier_s to 2k characters in case something is very cursed
181+
throw(HeaderParsingError("Error parsing header for column $i ('$(first(identifier_s, 2000))') at " *
182+
"$(lines_skipped_total+1):$pos (row:pos): presence of invalid non text bytes in the CSV snippet"))
183+
end
170184
end
171185
pos += tlen
172186
i += 1

test/exception_handling.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,14 @@ end
228228
header=true,
229229
)
230230

231+
@test_throws ChunkedCSV.HeaderParsingError("Error parsing header for column 1 ('a\0') at 1:1 (row:pos): presence of invalid non text bytes in the CSV snippet") parse_file(IOBuffer("""
232+
a\0,b
233+
1,2
234+
"""),
235+
[Int,Int],
236+
header=true,
237+
)
238+
231239
@test_throws ArgumentError("Provided header and schema names don't match. In schema, not in header: [:q]. In header, not in schema: [:a, :b, :c]") parse_file(IOBuffer("""
232240
a,b,c
233241
1,2,3

0 commit comments

Comments
 (0)