Skip to content

Commit 41c909b

Browse files
authored
Add options/0 (#87)
1 parent 2fc3cbf commit 41c909b

File tree

2 files changed

+80
-5
lines changed

2 files changed

+80
-5
lines changed

lib/nimble_csv.ex

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,27 @@ defmodule NimbleCSV do
9191
defexception [:message]
9292
end
9393

94+
@doc """
95+
Returns the options used to define this parser/dumper module.
96+
97+
This function allows you to retrieve the original options and use them
98+
as a base for defining new modules with modified options.
99+
100+
## Examples
101+
102+
# Create a new parser based on RFC4180 but with formula escaping
103+
NimbleCSV.define(
104+
MyApp.CSV,
105+
NimbleCSV.RFC4180.options()
106+
|> Keyword.merge(
107+
escape_formula: %{~w(@ + - = \\t \\r) => "'"},
108+
moduledoc: "RFC4180 with formula escaping"
109+
)
110+
)
111+
112+
"""
113+
@callback options() :: keyword()
114+
94115
@doc """
95116
Eagerly dumps an enumerable into iodata (a list of binaries and bytes and other lists).
96117
"""
@@ -207,10 +228,10 @@ defmodule NimbleCSV do
207228
* `:dump_bom` - includes BOM (byte order marker) in the dumped document
208229
* `:reserved` - the list of characters to be escaped, defaults to the
209230
`:separator`, `:newlines`, and `:escape` characters above
210-
* `:escape_formula` - the formula prefix(es) and formula escape sequence,
211-
defaults to `nil`, which disabled formula escaping
212-
`%{["@", "+", "-", "=", "\t", "\r"] => "'"}` would escape all fields starting
213-
with `@`, `+`, `-`, `=`, tab or carriage return using the `'` character.
231+
* `:escape_formula` - an optional map of formula prefixes to escape sequences.
232+
When `nil` (the default), formula escaping is disabled. For example,
233+
`%{~w(@ + - = \t \r) => "'"}` escapes fields starting with `@`, `+`, `-`, `=`,
234+
tab, or carriage return by prefixing them with `'`
214235
215236
Although parsing may support multiple newline delimiters, when
216237
dumping, only one of them must be picked, which is controlled by
@@ -239,17 +260,27 @@ defmodule NimbleCSV do
239260
`@`, `+`, `-`, `=`, tab or carriage return). Use the following config to
240261
follow the [OWASP recommendations](https://owasp.org/www-community/attacks/CSV_Injection):
241262
242-
escape_formula: %{["@", "+", "-", "=", "\t", "\r"] => "'"}
263+
escape_formula: %{~w(@ + - = \t \r) => "'"}
243264
244265
Applications that want more control over this process, to allow formulas in specific
245266
cases, or possibly minimize false positives, should leave this option disabled and
246267
escape the value, as necessary, within their code.
268+
269+
## Extending existing CSV modules
270+
271+
Each module defined with `define/2` includes an `c:options/0` function that
272+
returns the original options used to create the CSV module. This allows you
273+
to easily create new modules based on existing ones. For example, you can
274+
extend an existing CSV module to add formula escaping or customize other
275+
options as needed.
247276
"""
248277
def define(module, options) do
249278
defmodule module do
250279
@behaviour NimbleCSV
251280
@moduledoc Keyword.get(options, :moduledoc)
252281

282+
@original_options options
283+
253284
@escape Keyword.get(options, :escape, "\"")
254285
@escape_formula Enum.to_list(Keyword.get(options, :escape_formula, []))
255286

@@ -342,6 +373,8 @@ defmodule NimbleCSV do
342373
@compile {:inline,
343374
maybe_dump_bom: 1, maybe_trim_bom: 1, maybe_to_utf8: 1, maybe_to_encoding: 1}
344375

376+
def options, do: @original_options
377+
345378
## Parser
346379

347380
def parse_stream(stream, opts \\ []) when is_list(opts) do

test/nimble_csv_test.exs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@ defmodule NimbleCSVTest do
99
line_separator: "\r\n"
1010
)
1111

12+
NimbleCSV.define(
13+
DerivedParser,
14+
CSV.options()
15+
|> Keyword.merge(
16+
escape_formula: %{~w(@ + - = \t \r) => "'"},
17+
moduledoc: "Test parser based on RFC4180"
18+
)
19+
)
20+
1221
test "parse_string/2 without headers" do
1322
assert CSV.parse_string("""
1423
name,last,year
@@ -488,6 +497,39 @@ defmodule NimbleCSVTest do
488497
Spreadsheet.to_line_stream(stream) |> Spreadsheet.parse_stream() |> Enum.to_list()
489498
end
490499

500+
test "options/0 returns the original options" do
501+
# Test that RFC4180 has the expected options
502+
rfc4180_options = CSV.options()
503+
assert Keyword.get(rfc4180_options, :separator) == ","
504+
assert Keyword.get(rfc4180_options, :escape) == "\""
505+
assert Keyword.get(rfc4180_options, :line_separator) == "\r\n"
506+
507+
# Test that Spreadsheet has the expected options
508+
spreadsheet_options = Spreadsheet.options()
509+
assert Keyword.get(spreadsheet_options, :separator) == "\t"
510+
assert Keyword.get(spreadsheet_options, :encoding) == {:utf16, :little}
511+
assert Keyword.get(spreadsheet_options, :trim_bom) == true
512+
assert Keyword.get(spreadsheet_options, :dump_bom) == true
513+
end
514+
515+
test "creating a new parser based on existing options" do
516+
# Verify the new parser has the combined options
517+
test_options = DerivedParser.options()
518+
assert Keyword.get(test_options, :separator) == ","
519+
assert Keyword.get(test_options, :escape) == "\""
520+
assert Keyword.get(test_options, :escape_formula) == %{~w(@ + - = \t \r) => "'"}
521+
assert Keyword.get(test_options, :moduledoc) == "Test parser based on RFC4180"
522+
523+
# Test that the new parser works
524+
assert DerivedParser.parse_string("name,value\njohn,123") == [~w(john 123)]
525+
526+
# Test that formula escaping is applied
527+
data = [~w(name formula), ["test", "@SUM(A1:A2)"]]
528+
result = DerivedParser.dump_to_iodata(data)
529+
dumped = IO.iodata_to_binary(result)
530+
assert dumped == "name,formula\r\ntest,'@SUM(A1:A2)\r\n"
531+
end
532+
491533
defp utf16le(binary), do: :unicode.characters_to_binary(binary, :utf8, {:utf16, :little})
492534
defp utf16le_bom(), do: :unicode.encoding_to_bom({:utf16, :little})
493535
end

0 commit comments

Comments
 (0)