Skip to content

Commit 4c1724b

Browse files
committed
Bug fixed: dialect sniffer
The CSV file dialect cannot be detected when the qualifier was not present.
1 parent 0fae2c7 commit 4c1724b

File tree

5 files changed

+38
-10
lines changed

5 files changed

+38
-10
lines changed

src/Access_version.zip

45 Bytes
Binary file not shown.

src/All_Host_version.zip

50 Bytes
Binary file not shown.

src/CSVinterface.cls

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Private Const CHR_APOSTROPHE As String = "'"
2828
Private Const CHR_BACKSLASH As String = "\"
2929
Private Const CHR_DOUBLE_QUOTES As String = """"
3030
Private Const CHR_TILDE As String = "~"
31+
Private Const CHR_CARET As String = "^"
3132
'////////////////////////////////////////////////////////////////////////////////////////////
3233
'#
3334
'////////////////////////////////////////////////////////////////////////////////////////////
@@ -3152,17 +3153,19 @@ Private Function SniffInString(ByRef confObject As CSVparserConfig, _
31523153
For j = LBound(LinesEnds) To UBound(LinesEnds)
31533154
If InStrB(1, TmpCSVstr, LinesEnds(j)) Then
31543155
For k = LBound(QuoteChar) To UBound(QuoteChar)
3156+
'@--------------------------------------------------------------------------------
3157+
'Set CSV dialect
3158+
.dialect.fieldsDelimiter = TmpDelimiters(i)
3159+
.dialect.recordsDelimiter = LinesEnds(j)
3160+
.dialect.quoteToken = QuoteChar(k)
3161+
Set ImportedTable = New CSVArrayList
3162+
ParseCSVstring TmpCSVstr, tmpConfig, ImportedTable, EmptyParam
3163+
'@--------------------------------------------------------------------------------
3164+
'Save results with keys
31553165
If InStrB(1, TmpCSVstr, GetQuoteChar(QuoteChar(k))) Then
3156-
'@--------------------------------------------------------------------------------
3157-
'Set CSV dialect
3158-
.dialect.fieldsDelimiter = TmpDelimiters(i)
3159-
.dialect.recordsDelimiter = LinesEnds(j)
3160-
.dialect.quoteToken = QuoteChar(k)
3161-
Set ImportedTable = New CSVArrayList
3162-
ParseCSVstring TmpCSVstr, tmpConfig, ImportedTable, EmptyParam
3163-
'@--------------------------------------------------------------------------------
3164-
'Save results with keys
31653166
ScoreArray.AddIndexedItem DialectToString(.dialect), GuesserHelper.TableScore(ImportedTable)
3167+
Else
3168+
ScoreArray.AddIndexedItem DialectToString(.dialect) & CHR_CARET, GuesserHelper.TableScore(ImportedTable) / 2
31663169
End If
31673170
Next k
31683171
End If
@@ -3209,7 +3212,11 @@ Private Function StringToDialect(ByRef dialectString As String) As CSVdialect
32093212
With tmpResult
32103213
.fieldsDelimiter = tmpArr(idx)
32113214
.recordsDelimiter = tmpArr(idx + 1)
3212-
.quoteToken = GetQuoteToken(CLng(tmpArr(idx + 2)))
3215+
If InStrB(1, dialectString, CHR_CARET) Then
3216+
.quoteToken = QuoteTokens.DoubleQuotes
3217+
Else
3218+
.quoteToken = GetQuoteToken(CLng(tmpArr(idx + 2)))
3219+
End If
32133220
End With
32143221
Set StringToDialect = tmpResult
32153222
End Function
-641 Bytes
Binary file not shown.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
=== Delimiters guessing test ===
2+
+ Mixed comma and semicolon
3+
+ File with multi-line field
4+
+ Optional quoted fields
5+
+ Mixed comma and semicolon - file B
6+
+ Geometric CSV
7+
+ Table embedded in the last record
8+
+ Table embedded in the second record
9+
+ Multiple commas in fields
10+
+ Uncommon char as field delimiter
11+
+ Wrong delimiters have been added to guessing operation
12+
+ FEC data - [clevercsv issue #15]
13+
+ Mixed comma and colon - [clevercsv issue #35]
14+
+ Json data type - [clevercsv issue #37]
15+
+ Undefined field delimiter
16+
+ Rainbow CSV [issue #92]
17+
+ Pipe character is more frequent than the comma
18+
+ Pipe character is more frequent than the semicolon
19+
+ Short pipe separated table embedded
20+
= PASS (18 of 18 passed) = 2/4/2023 3:31:47 a.�m. =
21+

0 commit comments

Comments
 (0)