Skip to content

Commit ac18b6f

Browse files
committed
enhancement: Improved performance
Non quoted CSV are processed 15% more faster.
1 parent 4de3722 commit ac18b6f

File tree

4 files changed

+38
-25
lines changed

4 files changed

+38
-25
lines changed
-15 Bytes
Loading

docs/home/Quoted-vs-NONquoted.png

148 Bytes
Loading

docs/home/getting_started.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ Finally, the below chart shows the overheat for the Sorting and Dynamic Typing o
212212

213213
### Conclusions
214214

215-
- `ImportFromCSV` is the faster one import method when working with files with a great number of quoted fields. The performance of the @sdkn104 solution may be affected if the time to load the file content is not ignored, this because the solution doesn't have a method to pull out the content of CSV files.
215+
- `ImportFromCSV` is the faster one import method.
216216
- The CSV syntax slow-down the performance. When the number of escaped fields are increased, the performance decrease, this is especially noticeable for the @sdkn104 solution.
217-
- The Dynamic Typing causes more overheat than the Sort operation. This can be explained by the great performance of the Yaroslavskiy sorting algorithm used.
217+
- The Dynamic Typing causes more overheat than the Sort operation. This can be explained by the great performance of the Yaroslavskiy sorting algorithm used.
218218

219219
## Licence
220220
Copyright (C) 2021 [W. García](https://github.com/ws-garcia/VBA-CSV-interface/).

src/CSVinterface.cls

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Private dTTemplate() As Variant '----------Template
5151
Private TemplateDefined As Boolean
5252
Private TargetsDefined As Boolean
5353
Private RequestedFieldsArray() As Long
54+
Private StreamWithQuotes As Boolean
5455
'////////////////////////////////////////////////////////////////////////////////////////////
5556
'#
5657
'////////////////////////////////////////////////////////////////////////////////////////////
@@ -1612,6 +1613,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
16121613
'@----------------------------------------------------------------------------
16131614
'Get CSV stream and populate a temp array
16141615
CSVstream.ReadText
1616+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
16151617
StreamEnd = CSVstream.atEndOfStream
16161618
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
16171619
'@----------------------------------------------------------------------------
@@ -1620,6 +1622,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
16201622
CSVstream.bufferSize = 2 * configObj.bufferSize
16211623
CSVstream.RestartPointer
16221624
CSVstream.ReadText
1625+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
16231626
StreamEnd = CSVstream.atEndOfStream
16241627
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
16251628
Loop
@@ -1652,29 +1655,33 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
16521655
'Process the current Token
16531656
TokenEndReached = False
16541657
TokenBeginningPos = OverStringPointer
1655-
Select Case OverStringPointer
1656-
Case Is < LenCurrentIndex
1657-
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1658-
'@----------------------------------------------------------------------------
1659-
'Ignore spaces, tabs and remanents LF
1660-
Select Case FDAscW
1661-
Case Is <> 9
1662-
Do While ASCIIcharw = 32 Or ASCIIcharw = 9 Or ASCIIcharw = 10
1663-
OverStringPointer = OverStringPointer + 2&
1664-
If OverStringPointer > LenCurrentIndex Then Exit Do
1665-
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1666-
Loop
1667-
Case Else
1668-
Do While ASCIIcharw = 32 Or ASCIIcharw = 10
1669-
OverStringPointer = OverStringPointer + 2&
1670-
If OverStringPointer > LenCurrentIndex Then Exit Do
1671-
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1672-
Loop
1673-
End Select
1674-
OpenedToken = (ASCIIcharw = EscapeAscW)
1675-
Case Else 'Avoid over runs
1676-
OpenedToken = False
1677-
End Select
1658+
If StreamWithQuotes Then
1659+
Select Case OverStringPointer
1660+
Case Is < LenCurrentIndex
1661+
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1662+
'@----------------------------------------------------------------------------
1663+
'Ignore spaces, tabs and remanents LF
1664+
Select Case FDAscW
1665+
Case Is <> 9
1666+
Do While ASCIIcharw = 32 Or ASCIIcharw = 9 Or ASCIIcharw = 10
1667+
OverStringPointer = OverStringPointer + 2&
1668+
If OverStringPointer > LenCurrentIndex Then Exit Do
1669+
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1670+
Loop
1671+
Case Else
1672+
Do While ASCIIcharw = 32 Or ASCIIcharw = 10
1673+
OverStringPointer = OverStringPointer + 2&
1674+
If OverStringPointer > LenCurrentIndex Then Exit Do
1675+
ASCIIcharw = AscW(MidB$(tmpCSV(Index), OverStringPointer, 2))
1676+
Loop
1677+
End Select
1678+
OpenedToken = (ASCIIcharw = EscapeAscW)
1679+
Case Else 'Avoid over runs
1680+
OpenedToken = False
1681+
End Select
1682+
Else
1683+
OpenedToken = False
1684+
End If
16781685
If OpenedToken Then
16791686
'@----------------------------------------------------------------------------
16801687
'Try to escape the Token
@@ -1705,6 +1712,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
17051712
'@----------------------------------------------------------------------------
17061713
'Load new stream
17071714
CSVstream.ReadText
1715+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
17081716
StreamEnd = CSVstream.atEndOfStream
17091717
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
17101718
Index = 0
@@ -1846,6 +1854,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
18461854
'@----------------------------------------------
18471855
'Advance stream
18481856
CSVstream.ReadText
1857+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
18491858
StreamEnd = CSVstream.atEndOfStream
18501859
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
18511860
Index = 0
@@ -1924,6 +1933,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
19241933
'@----------------------------------------------
19251934
'Advance stream
19261935
CSVstream.ReadText
1936+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
19271937
StreamEnd = CSVstream.atEndOfStream
19281938
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
19291939
Index = 0
@@ -1977,6 +1987,7 @@ Private Sub StreamParseCSV(configObj As parserConfig, _
19771987
'@----------------------------------------------
19781988
'Advance stream
19791989
CSVstream.ReadText
1990+
StreamWithQuotes = InStrB(1, CSVstream.bufferString, CHR_DOUBLE_QUOTES)
19801991
StreamEnd = CSVstream.atEndOfStream
19811992
tmpCSV() = Split(CSVstream.bufferString, RecordDelimiter)
19821993
Index = 0
@@ -2091,6 +2102,7 @@ AdvanceLine:
20912102
'@----------------------------------------------
20922103
'Advance stream
20932104
Ostream.ReadText
2105+
StreamWithQuotes = InStrB(1, Ostream.bufferString, CHR_DOUBLE_QUOTES)
20942106
Arr() = Split(Ostream.bufferString, recDelimiter)
20952107
idx = 0
20962108
MaxIdx = UBound(Arr)
@@ -2106,6 +2118,7 @@ AdvanceLine:
21062118
'@----------------------------------------------
21072119
'Advance stream
21082120
Ostream.ReadText
2121+
StreamWithQuotes = InStrB(1, Ostream.bufferString, CHR_DOUBLE_QUOTES)
21092122
Arr() = Split(Ostream.bufferString, recDelimiter)
21102123
idx = 0
21112124
MaxIdx = UBound(Arr)

0 commit comments

Comments
 (0)