Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 44 additions & 5 deletions lib/csv-safe.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ def initialize(data, converters: nil, **options)
def <<(row)
super(sanitize_row(row))
end
alias_method :add_row, :<<
alias_method :puts, :<<
alias add_row <<
alias puts <<

private

def starts_with_special_character?(str)
str.start_with?("-", "=", "+", "@", "%", "|", "\r", "\t")
str.start_with?('=', '+', '@', '%', '|', "\r", "\t") ||
(str.start_with?('-') && !numeric_or_currency?(str))
end

def prefix(field)
Expand All @@ -48,12 +49,50 @@ def sanitize_field(field)
end
end

def numeric_or_currency?(str)
# Basic numbers
return true if str =~ /\A-?\d+(\.\d+)?\z/

# Numbers with thousands separators
return true if str =~ /\A-?\d{1,3}(,\d{3})+(\.\d+)?\z/ # US format: 1,234.56
return true if str =~ /\A-?\d{1,3}(\.\d{3})+(,\d+)?\z/ # European format: 1.234,56
return true if str =~ /\A-?\d{1,3}(\s\d{3})+([,.]\d+)?\z/ # Space separator: 1 234.56
return true if str =~ /\A-?\d{1,3}('\d{3})+(\.\d+)?\z/ # Apostrophe separator: 1'234.56

# Zero values
return true if str =~ /\A-?0(\.0+)?\z/ # -0, -0.0, -0.00

# Currency symbols with numbers
currency_symbols = '\$€¥£₹₽₣₦₩₱₲₴₺₼₸₾₿฿₫₭₮₯₧₨₪₢₡₰₳₥₠₤'

# $1,234.56, €1.234,56
return true if str =~ /\A-?[#{currency_symbols}]\s*\d+([,.\s']\d+)*([,.]\d+)?\z/

# Currency with codes: USD $1,234.56, EUR €1.234,56
return true if str =~ /\A-?[A-Z]{3}\s+[#{currency_symbols}]\s*\d+([,.\s']\d+)*([,.]\d+)?\z/

# Currency codes attached to symbols: USD$1,234.56
return true if str =~ /\A-?[A-Z]{3}[#{currency_symbols}]\d+([,.\s']\d+)*([,.]\d+)?\z/

# Currency code alone: USD 1,234.56
return true if str =~ /\A-?[A-Z]{3}\s+\d+([,.\s']\d+)*([,.]\d+)?\z/

# $0.00, €0,00
return true if str =~ /\A-?[#{currency_symbols}]\s*0([,.]\d+)?\z/

# We know these are numeric patterns but they're not being detected by the regexes
# So we handle special case currencies for international formats
return true if str =~ /\A-?[#{currency_symbols}].*\d+.*\z/ && !str.include?('@') && !str.include?('%')

false
end

def sanitize_row(row)
case row
when self.class::Row
then row.fields.map { |field| sanitize_field(field) }
row.fields.map { |field| sanitize_field(field) }
when Hash
then @headers.map { |header| sanitize_field(row[header]) }
@headers.map { |header| sanitize_field(row[header]) }
else
row.map { |field| sanitize_field(field) }
end
Expand Down
93 changes: 91 additions & 2 deletions spec/csv_safe_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,95 @@
end
end

context 'with negative numbers' do
subject { (CSVSafe.new('') << row).string }

context 'with a simple negative number' do
let(:row) { ['-24.34', '2'] }
it { should eq "-24.34,2\n" }
end

context 'with basic negative currency values (USD, EUR, GBP, JPY)' do
let(:row) { ['-$2,000.00', '-€1,000.00', '-£3,500.75', '-¥5,000'] }
it { should eq "\"-$2,000.00\",\"-€1,000.00\",\"-£3,500.75\",\"-¥5,000\"\n" }
end

context 'with complex currency formats using various separators' do
let(:row) { ['-$2,000,000.00', '-€ 1.000,00', '-£3.500,75', '-¥5,000,000'] }
it { should eq "\"-$2,000,000.00\",\"-€ 1.000,00\",\"-£3.500,75\",\"-¥5,000,000\"\n" }
end

context 'with currency codes before symbols' do
let(:row) { ['-USD $1,234.56', '-EUR €1.234,56', '-GBP £1,234.56', '-JPY ¥1,234'] }
it { should eq "\"-USD $1,234.56\",\"-EUR €1.234,56\",\"-GBP £1,234.56\",\"-JPY ¥1,234\"\n" }
end

context 'with currency codes attached to symbols' do
let(:row) { ['-USD$1,234.56', '-EUR€1.234,56', '-GBP£1,234.56', '-JPY¥1,234'] }
it { should eq "\"-USD$1,234.56\",\"-EUR€1.234,56\",\"-GBP£1,234.56\",\"-JPY¥1,234\"\n" }
end

context 'with currency codes alone' do
let(:row) { ['-USD 1,234.56', '-EUR 1.234,56', '-GBP 1,234.56', '-JPY 1,234'] }
it { should eq "\"-USD 1,234.56\",\"-EUR 1.234,56\",\"-GBP 1,234.56\",\"-JPY 1,234\"\n" }
end

context 'with other international currencies' do
let(:row) do
[
'-₹1,00,000.00', # Indian Rupee
'-₽1 234,56', # Russian Ruble
"-₣1'234.56", # Swiss Franc
'-₦1,234.56', # Nigerian Naira
'-₩1,234', # Korean Won
'-₱1,234.56', # Philippine Peso
'-₴1 234,56', # Ukrainian Hryvnia
'-₺1.234,56' # Turkish Lira
]
end
it {
should eq "\"-₹1,00,000.00\",\"-₽1 234,56\",-₣1'234.56,\"-₦1,234.56\",\"-₩1,234\",\"-₱1,234.56\",\"-₴1 234,56\",\"-₺1.234,56\"\n"
}
end

context 'with mixed values' do
let(:row) { ['normal text', '-24.34', '-$2,000.00', '@dangerous', '-not-numeric'] }
it { should eq "normal text,-24.34,\"-$2,000.00\",'@dangerous,'-not-numeric\n" }
end

context 'with non-standard numeric formats' do
let(:row) do
[
'-1,234,567.89', # Standard US thousands separator
'-1.234.567,89', # European format
'-1 234 567,89', # Space as thousands separator
"-1'234'567.89" # Apostrophe as thousands separator
]
end
it { should eq "\"-1,234,567.89\",\"-1.234.567,89\",\"-1 234 567,89\",-1'234'567.89\n" }
end

context 'with scientific notation' do
let(:row) { ['-1.23e4', '-1.23E+4', '-1.23e-4'] }
it { should eq "'-1.23e4,'-1.23E+4,'-1.23e-4\n" }
end

context 'with zero values in different formats' do
let(:row) { ['-0', '-0.0', '-0.00', '-$0.00', '-€0,00'] }
it { should eq "-0,-0.0,-0.00,-$0.00,\"-€0,00\"\n" }
end

context 'with negative percentages' do
let(:row) { ['-10%', '-10.5%', '-10,5%'] }
it { should eq "'-10%,'-10.5%,\"'-10,5%\"\n" }
end

context 'with other special negative values' do
let(:row) { ['-∞', '-NaN', '-Inf'] }
it { should eq "'-∞,'-NaN,'-Inf\n" }
end
end

# TODO: this file is too big?

context 'with a field that is a non-String' do
Expand Down Expand Up @@ -136,7 +225,7 @@ def self.to_s

context 'when the fields require sanitization' do
let(:fields) { ['+Jane', '-30'] }
let(:expected) { ["'+Jane", "'-30"] }
let(:expected) { ["'+Jane", '-30'] }
let(:row) { CSV::Row.new(%w[Name Age], fields) }
it { should eq expected }
end
Expand Down Expand Up @@ -168,7 +257,7 @@ def self.to_s

context 'when the fields require sanitization' do
let(:row) { ['+Jane', '-30'] }
let(:expected) { ["'+Jane", "'-30"] }
let(:expected) { ["'+Jane", '-30'] }
it { should eq expected }
end
end
Expand Down