Skip to content

Commit f192d05

Browse files
committed
Remove BOM and escape control characters
1 parent 75fcc17 commit f192d05

File tree

2 files changed

+38
-11
lines changed

2 files changed

+38
-11
lines changed

csv-to-json

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,34 @@ USAGE="$0 <file"
44
declare -a FIELDS
55
declare -a VALUES
66

7+
replace_control_char() {
8+
local char="$1"
9+
case "$char" in
10+
$'\a')
11+
char='\a'
12+
;;
13+
$'\b')
14+
char='\b'
15+
;;
16+
$'\f')
17+
char='\f'
18+
;;
19+
$'\n')
20+
char='\n'
21+
;;
22+
$'\r')
23+
char=''
24+
;;
25+
$'\t')
26+
char='\t'
27+
;;
28+
$'\v')
29+
char='\v'
30+
;;
31+
esac
32+
printf '%s' "$char"
33+
}
34+
735
sanitize_field() {
836
local field="${1#\"}"
937
field="${field%\"}"
@@ -15,10 +43,7 @@ sanitize_field() {
1543
((i++))
1644
continue
1745
fi
18-
if [ "$char" = $'\r' ]; then
19-
((i++))
20-
continue
21-
fi
46+
char=$(replace_control_char "$char")
2247
sanitized="${sanitized}${char}"
2348
done
2449
echo "${sanitized}"
@@ -29,9 +54,10 @@ read_csv_fields() {
2954
local -i idx=0
3055
local field=""
3156
local quote_enclosed=false
32-
read -rt 0.1 HEADER_LINE || return 1
33-
while ((idx < ${#HEADER_LINE})); do
34-
local char="${HEADER_LINE:idx:1}"
57+
read -rt 0.1 line || return 1
58+
line=${line#$'\xEF'$'\xBB'$'\xBF'}
59+
while ((idx < ${#line})); do
60+
local char="${line:idx:1}"
3561
if [ "$char" = '"' ]; then
3662
if $quote_enclosed; then
3763
quote_enclosed=false
@@ -47,10 +73,11 @@ read_csv_fields() {
4773
fi
4874
field="${field}${char}"
4975
((idx++))
50-
if (( idx == ${#HEADER_LINE} )); then
76+
if (( idx == ${#line} )); then
5177
if $quote_enclosed; then
52-
read -r HEADER_LINE || return 1
53-
field="${field}\n"
78+
read -r line || return 1
79+
field="${field}
80+
"
5481
idx=0
5582
else
5683
array_name+=("$(sanitize_field "${field}")")

test/complex.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"aaa","b
22
b b","c,cc","d"",d"",d"
3-
"zzz","y y
3+
"zzz","y y
44
y","x,x,x","w"",""w"",.""w"

0 commit comments

Comments
 (0)