Skip to content

Commit b006ada

Browse files
committed
Added more delimiter guessing tests. Renamed some auxiliary guessing methods.
1 parent e1c5863 commit b006ada

File tree

7 files changed

+120
-48
lines changed

7 files changed

+120
-48
lines changed

src/Access_version.zip

11 Bytes
Binary file not shown.

src/CSVinterface.cls

Lines changed: 46 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -210,45 +210,6 @@ Private Sub Class_Terminate()
210210
Set P_CSV_DATA = Nothing
211211
Set P_CSV_HEADER = Nothing
212212
End Sub
213-
Private Function ArrayListAVGStdDev(ByRef ArrayList As ECPArrayList) As Double
214-
Dim L0 As Long
215-
Dim StdDevSUM As Double
216-
217-
StdDevSUM = 0
218-
For L0 = 0 To ArrayList.count - 1
219-
StdDevSUM = StdDevSUM + ArrayStrStdDev(ArrayList(L0))
220-
Next L0
221-
ArrayListAVGStdDev = StdDevSUM / ArrayList.count
222-
End Function
223-
Private Function ArrayStrLenAVG(ByRef strArray As Variant) As Double
224-
Dim L0 As Long
225-
Dim tmpSUM As Double
226-
227-
tmpSUM = 0
228-
For L0 = LBound(strArray) To UBound(strArray)
229-
tmpSUM = tmpSUM + LenB(strArray(L0))
230-
Next L0
231-
ArrayStrLenAVG = tmpSUM / (1 + UBound(strArray) - LBound(strArray))
232-
End Function
233-
Private Function ArrayStrStdDev(ByRef strArray As Variant) As Double
234-
Dim arrLB As Long
235-
Dim arrUB As Long
236-
Dim L0 As Long
237-
Dim LengthsAVG As Double
238-
Dim CumulativeAVGsquaredDiff As Double
239-
Dim LengthsSum As Double
240-
241-
On Error GoTo ArrStrStdDev_ErrHandler
242-
LengthsAVG = ArrayStrLenAVG(strArray)
243-
CumulativeAVGsquaredDiff = 0
244-
For L0 = LBound(strArray) To UBound(strArray)
245-
CumulativeAVGsquaredDiff = CumulativeAVGsquaredDiff + (LenB(strArray(L0)) - LengthsAVG) ^ 2
246-
Next L0
247-
ArrayStrStdDev = (CumulativeAVGsquaredDiff / (UBound(strArray) - LBound(strArray))) ^ 0.5
248-
Exit Function
249-
ArrStrStdDev_ErrHandler:
250-
ArrayStrStdDev = 1.6E+308
251-
End Function
252213
Public Sub ClearData()
253214
Set P_CSV_DATA = New ECPArrayList
254215
Set P_CSV_HEADER = New ECPArrayList
@@ -661,6 +622,16 @@ ErrHandler_ExportToCSV:
661622
P_SUCCESSFUL_EXPORT = False
662623
P_ERROR_DESC = "[CSV file Export]: " & P_ERROR_DESC
663624
End Sub
625+
Private Function FieldScore(ByRef strArray As Variant) As Double
626+
Dim L0 As Long
627+
Dim tmpSUM As Double
628+
629+
tmpSUM = 0
630+
For L0 = LBound(strArray) To UBound(strArray)
631+
tmpSUM = tmpSUM + LenB(strArray(L0))
632+
Next L0
633+
FieldScore = tmpSUM / (1 + UBound(strArray) - LBound(strArray))
634+
End Function
664635
Private Function FileExists(ByVal FilePath As String) As Boolean
665636
FileExists = CBool(LenB(Dir(FilePath, vbHidden + vbNormal + vbSystem + vbReadOnly + vbArchive)))
666637
End Function
@@ -1305,7 +1276,7 @@ Private Sub GuessInString(ByRef confObject As parserConfig, _
13051276
Dim emptyParam() As Variant
13061277
Dim StartRec As Long
13071278
Dim EndRec As Long
1308-
Dim FieldDelStandardDEV() As Double
1279+
Dim CtableScore() As Double
13091280
Dim QuoteChar() As EscapeTokens
13101281
Dim L0_FieldDelimiter As Long
13111282
Dim L1_FieldDelimiter As Long
@@ -1332,7 +1303,7 @@ Private Sub GuessInString(ByRef confObject As parserConfig, _
13321303
tmpDelimiters() = confObject.DelimitersToGuess
13331304
'@--------------------------------------------------------------------------------
13341305
'StdDev array
1335-
ReDim FieldDelStandardDEV(LBound(tmpDelimiters) To UBound(tmpDelimiters))
1306+
ReDim CtableScore(LBound(tmpDelimiters) To UBound(tmpDelimiters))
13361307
'@--------------------------------------------------------------------------------
13371308
'String to guess
13381309
If Not Buffered Then
@@ -1376,23 +1347,23 @@ Private Sub GuessInString(ByRef confObject As parserConfig, _
13761347
Set guessResults = New ECPArrayList
13771348
ParseCSVstring CSVstring, confObject, guessResults, emptyParam
13781349
If Not guessResults Is Nothing Then
1379-
FieldDelStandardDEV(delIdx) = ArrayListAVGStdDev(guessResults)
1350+
CtableScore(delIdx) = TableScore(guessResults)
13801351
Else
1381-
FieldDelStandardDEV(delIdx) = 1.6E+308
1352+
CtableScore(delIdx) = 1.6E+308
13821353
End If
13831354
Else
1384-
FieldDelStandardDEV(delIdx) = 1.6E+308
1355+
CtableScore(delIdx) = 1.6E+308
13851356
End If
13861357
Next delIdx
13871358
'@--------------------------------------------------------------------------------
13881359
'Delimiters discrimination
1389-
L1_FieldDelimiter = MinIndexVal(FieldDelStandardDEV)
1360+
L1_FieldDelimiter = MinIndexVal(CtableScore)
13901361
.fieldsDelimiter = tmpDelimiters(L1_FieldDelimiter)
13911362
.startingRecord = StartRec
13921363
.endingRecord = EndRec
13931364
End With
13941365
Erase emptyParam
1395-
Erase FieldDelStandardDEV
1366+
Erase CtableScore
13961367
Erase LinesEnds
13971368
Erase QuoteChar
13981369
Erase tmpDelimiters
@@ -2446,6 +2417,24 @@ ParseCSVstring_NoSignificantData:
24462417
"the String has only empty or commented lines that can be omitted."
24472418
Resume ParseCSVstring_Error_Handler
24482419
End Sub
2420+
Private Function RecordScore(ByRef strArray As Variant) As Double
2421+
Dim arrLB As Long
2422+
Dim arrUB As Long
2423+
Dim L0 As Long
2424+
Dim CfieldScore As Double
2425+
Dim CumulativeScore As Double
2426+
2427+
On Error GoTo RecordScore_ErrHandler
2428+
CfieldScore = FieldScore(strArray)
2429+
CumulativeScore = 0
2430+
For L0 = LBound(strArray) To UBound(strArray)
2431+
CumulativeScore = CumulativeScore + (LenB(strArray(L0)) - CfieldScore) ^ 2
2432+
Next L0
2433+
RecordScore = (CumulativeScore / (UBound(strArray) - LBound(strArray))) ^ 0.5
2434+
Exit Function
2435+
RecordScore_ErrHandler:
2436+
RecordScore = 1.6E+308
2437+
End Function
24492438
Private Function RequestedField(ByVal FieldIndex As Long, _
24502439
ByVal mxReq As Long) As Boolean
24512440
Select Case RequestedFieldsArray(0)
@@ -3129,6 +3118,16 @@ AdvanceLine:
31293118
End If
31303119
End If
31313120
End Sub
3121+
Private Function TableScore(ByRef ArrayList As ECPArrayList) As Double
3122+
Dim L0 As Long
3123+
Dim StdDevSUM As Double
3124+
3125+
StdDevSUM = 0
3126+
For L0 = 0 To ArrayList.count - 1
3127+
StdDevSUM = StdDevSUM + RecordScore(ArrayList(L0))
3128+
Next L0
3129+
TableScore = StdDevSUM / ArrayList.count
3130+
End Function
31323131
Public Sub TwoDimToJaggedArray(ByRef TwoDimArray() As Variant, ByRef JaggedArray() As Variant)
31333132
Dim UBj1 As Long, LBj1 As Long
31343133
Dim UBj2 As Long, LBj2 As Long
@@ -3240,5 +3239,4 @@ Private Function UnixToStandardEscapeSeq(ByRef UnixEscapedString As String, _
32403239
Else
32413240
UnixToStandardEscapeSeq = UnixEscapedString
32423241
End If
3243-
End Function
3244-
3242+
End Function

src/Tests/CSVDelimitersGuessingTESTS.bas

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,30 @@ Function DelimitersGuessingTests(FullFileName As String) As TestSuite
142142
"Expected: (" & "[" & ExpectedResult(0) & "]" & " & " & "[" & ExpectedResult(2) & "]" & ")" & _
143143
"Actual: (" & "[" & ActualResult(0) & "]" & " & " & "[" & ActualResult(2) & "]" & ")"
144144
End With
145+
'@--------------------------------------------------------------------------------
146+
'FEC data - [clevercsv issue #15]
147+
With DelimitersGuessingTests.test("FEC data - [clevercsv issue #15]")
148+
FECdata_clevercsvIssue15
149+
.IsEqual ActualResult, ExpectedResult, _
150+
"Expected: (" & "[" & ExpectedResult(0) & "]" & " & " & "[" & ExpectedResult(2) & "]" & ")" & _
151+
"Actual: (" & "[" & ActualResult(0) & "]" & " & " & "[" & ActualResult(2) & "]" & ")"
152+
End With
153+
'@--------------------------------------------------------------------------------
154+
'Mixed comma and colon - [clevercsv issue #35]
155+
With DelimitersGuessingTests.test("Mixed comma and colon - [clevercsv issue #35]")
156+
MixedCommaAndColon_clevercsvIssue35
157+
.IsEqual ActualResult, ExpectedResult, _
158+
"Expected: (" & "[" & ExpectedResult(0) & "]" & " & " & "[" & ExpectedResult(2) & "]" & ")" & _
159+
"Actual: (" & "[" & ActualResult(0) & "]" & " & " & "[" & ActualResult(2) & "]" & ")"
160+
End With
161+
'@--------------------------------------------------------------------------------
162+
'Json data type - [clevercsv issue #37]
163+
With DelimitersGuessingTests.test("Json data type - [clevercsv issue #37]")
164+
JsonDataType_clevercsvIssue37
165+
.IsEqual ActualResult, ExpectedResult, _
166+
"Expected: (" & "[" & ExpectedResult(0) & "]" & " & " & "[" & ExpectedResult(2) & "]" & ")" & _
167+
"Actual: (" & "[" & ActualResult(0) & "]" & " & " & "[" & ActualResult(2) & "]" & ")"
168+
End With
145169
Set DelimitersGuessingTests = Nothing
146170
End Function
147171
Sub GetActualAndExpectedResults(FileName As String, _
@@ -228,6 +252,21 @@ Sub WrongDelimitersHaveBeenAddedToGuessingOperation()
228252

229253
GetActualAndExpectedResults "Wrong delimiters have been added to guessing operation.csv", ",", vbLf, DoubleQuotes
230254
End Sub
255+
Sub FECdata_clevercsvIssue15()
256+
Set confObj = New parserConfig
257+
258+
GetActualAndExpectedResults "FEC data - [clevercsv issue #15].csv", "|", vbLf, DoubleQuotes
259+
End Sub
260+
Sub MixedCommaAndColon_clevercsvIssue35()
261+
Set confObj = New parserConfig
262+
263+
GetActualAndExpectedResults "Mixed comma and colon - [clevercsv issue #35].csv", ",", vbLf, Apostrophe
264+
End Sub
265+
Sub JsonDataType_clevercsvIssue37()
266+
Set confObj = New parserConfig
267+
268+
GetActualAndExpectedResults "Json data type - [clevercsv issue #37].csv", ",", vbLf, DoubleQuotes
269+
End Sub
231270
'/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
232271
'#
233272

2.65 KB
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
C00078279|A|M11|P|80031492155|22Y||MCKENNON, K R|MIDLAND|MI|00000|||10031979|400|||||CONTRIBUTION REF TO INDIVIDUAL|3062020110011466469
2+
C00078279|A|M11||79031415137|15||OREFFICE, P|MIDLAND|MI|00000|DOW CHEMICAL CO||10261979|1500||||||3061920110000382948
3+
C00078279|A|M11||79031415137|15||DOWNEY, J|MIDLAND|MI|00000|DOW CHEMICAL CO||10261979|300||||||3061920110000382949
4+
C00078279|A|M11||79031415137|15||BLAIR, E|MIDLAND|MI|00000|DOW CHEMICAL CO||10261979|1000||||||3061920110000382950
5+
C00078287|A|Q1||79031231889|15||BLANCHARD, JOHN A|CHICAGO|IL|60685|||03201979|200||||||3061920110000383914
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"{""fake"": ""json"", ""fake2"":""json2""}",13:31:38,06:00:04+01:00
2+
"{""fake"": ""json"", ""fake2"":""json2""}",22:13:29,14:20:11+02:00
3+
"{""fake"": ""json"", ""fake2"":""json2""}",04:37:27,22:04:28+03:00
4+
"{""fake"": ""json"", ""fake2"":""json2""}",04:25:28,23:12:53+01:00
5+
"{""fake"": ""json"", ""fake2"":""json2""}",21:04:15,08:23:58+02:00
6+
"{""fake"": ""json"", ""fake2"":""json2""}",10:37:03,11:06:42+05:30
7+
"{""fake"": ""json"", ""fake2"":""json2""}",10:17:24,23:38:47+06:00
8+
"{""fake"": ""json"", ""fake2"":""json2""}",00:02:51,20:04:45-06:00
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
bytearray(b'fake data'),20:53:06,2019-09-01T19:28:21
2+
bytearray(b'fake data'),19:33:15,2005-02-15T19:10:31
3+
bytearray(b'fake data'),10:43:05,1992-10-12T14:49:24
4+
bytearray(b'fake data'),10:36:49,1999-07-18T17:27:55
5+
bytearray(b'fake data'),03:33:35,1982-04-24T17:38:45
6+
bytearray(b'fake data'),14:49:47,1983-01-05T22:17:42
7+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
8+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
9+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
10+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
11+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
12+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
13+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
14+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
15+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
16+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
17+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
18+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
19+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
20+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
21+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45
22+
bytearray(b'fake data'),10:35:30,2006-10-27T02:30:45

0 commit comments

Comments
 (0)