Skip to content

Commit ad9d8c0

Browse files
committed
Properly parse quoted CSV fields
1 parent 47c3715 commit ad9d8c0

File tree

3 files changed

+57
-9
lines changed

3 files changed

+57
-9
lines changed

csv-to-json

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,56 @@ USAGE="$0 <file"
44
declare -a FIELDS
55
declare -a VALUES
66

7-
read_csv_fields() {
8-
local IFS=','
9-
read -t 0.1 -a FIELDS || return 1
7+
sanitize_field() {
8+
local field="${1#\"}"
9+
field="${field%\"}"
10+
local sanitized=""
11+
for ((i=0;i < ${#field}; i++)); do
12+
local char="${field:i:1}"
13+
if [ "$char" = '"' ]; then
14+
sanitized="${sanitized}\\\""
15+
((i++))
16+
continue
17+
fi
18+
sanitized="${sanitized}${char}"
19+
done
20+
echo "${sanitized}"
1021
}
1122

12-
read_row() {
13-
local IFS=','
14-
read -t 0.1 -a VALUES || return 1
23+
read_csv_fields() {
24+
local -n array_name="$1"
25+
local -i idx=0
26+
local field=""
27+
local quote_enclosed=false
28+
read -rt 0.1 HEADER_LINE || return 1
29+
while ((idx < ${#HEADER_LINE})); do
30+
local char="${HEADER_LINE:idx:1}"
31+
if [ "$char" = '"' ]; then
32+
if $quote_enclosed; then
33+
quote_enclosed=false
34+
else
35+
quote_enclosed=true
36+
fi
37+
fi
38+
if ! $quote_enclosed && [ "$char" = ',' ]; then
39+
array_name+=("$(sanitize_field "${field}")")
40+
field=""
41+
((idx++))
42+
continue
43+
fi
44+
field="${field}${char}"
45+
((idx++))
46+
if (( idx == ${#HEADER_LINE} )); then
47+
if $quote_enclosed; then
48+
read -r HEADER_LINE || return 1
49+
field="${field}\n"
50+
idx=0
51+
else
52+
array_name+=("$(sanitize_field "${field}")")
53+
return
54+
fi
55+
fi
56+
done
1557
}
1658

1759
print_row() {
@@ -24,13 +66,13 @@ print_row() {
2466
printf '{%s}' "${row[*]}"
2567
}
2668

27-
if read_csv_fields; then
69+
if read_csv_fields FIELDS; then
2870
printf '['
2971

30-
read_row
72+
read_csv_fields VALUES
3173
print_row
3274

33-
while read_row; do
75+
while read_csv_fields VALUES; do
3476
printf ','
3577
print_row
3678
done

test/complex.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"aaa","b
2+
b b","c,cc","d"",d"",d"
3+
"zzz","y y
4+
y","x,x,x","w"",""w"",.""w"

test/simple.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
aaa,bbb,ccc,ddd
2+
zzz,yyy,xxx,www

0 commit comments

Comments
 (0)