Skip to content

Commit 6b95d33

Browse files
committed
Release v1.1.5 - Performance improvements in Async Parsing
1 parent bcfeb42 commit 6b95d33

File tree

6 files changed

+79
-87
lines changed

6 files changed

+79
-87
lines changed

GitVersion.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
next-version: 1.1.0
1+
next-version: 1.1.5
22
tag-prefix: '[vV]'
33
mode: ContinuousDeployment
44
branches:

README.md

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# <img src="https://github.com/CodeShayk/parsley.net/blob/master/Images/ninja-icon-16.png" alt="ninja" style="width:30px;"/> Parsley.Net v1.1.0
1+
# <img src="https://github.com/CodeShayk/parsley.net/blob/master/Images/ninja-icon-16.png" alt="ninja" style="width:30px;"/> Parsley.Net v1.1.5
22
[![NuGet version](https://badge.fury.io/nu/Parsley.Net.svg)](https://badge.fury.io/nu/Parsley.Net) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/CodeShayk/Parsley.Net/blob/master/LICENSE.md)
33
[![GitHub Release](https://img.shields.io/github/v/release/CodeShayk/Parsley.Net?logo=github&sort=semver)](https://github.com/CodeShayk/Parsley.Net/releases/latest)
44
[![master-build](https://github.com/CodeShayk/parsley.net/actions/workflows/Master-Build.yml/badge.svg)](https://github.com/CodeShayk/parsley.net/actions/workflows/Master-Build.yml)
@@ -44,9 +44,9 @@ NuGet\Install-Package Parsley.Net
4444

4545
Please see below.
4646
```
47-
public interface IParser
48-
{
49-
/// <summary>
47+
public interface IParser
48+
{
49+
/// <summary>
5050
/// Parses a file at the specified filepath into an array of objects of type T.
5151
/// </summary>
5252
/// <typeparam name="T"></typeparam>
@@ -63,20 +63,20 @@ Please see below.
6363
T[] Parse<T>(string[] lines) where T : IFileLine, new();
6464
6565
/// <summary>
66-
/// Parses a stream of delimiter separated records into an array of objects of type T.
66+
/// Parses an array of bytes of delimiter separated records into an array of objects of type T.
6767
/// </summary>
6868
/// <typeparam name="T"></typeparam>
69-
/// <param name="stream"></param>
69+
/// <param name="bytes"></param>
7070
/// <returns></returns>
71-
T[] Parse<T>(Stream stream) where T : IFileLine, new();
71+
T[] Parse<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new();
7272
7373
/// <summary>
74-
/// Parses an array of bytes of delimiter separated records into an array of objects of type T.
74+
/// Parses a stream of delimiter separated records into an array of objects of type T.
7575
/// </summary>
7676
/// <typeparam name="T"></typeparam>
77-
/// <param name="bytes"></param>
77+
/// <param name="stream"></param>
7878
/// <returns></returns>
79-
T[] Parse<T>(byte[] bytes) where T : IFileLine, new();
79+
T[] Parse<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new();
8080
8181
/// <summary>
8282
/// Asynchronously parses a file at the specified filepath into an array of objects of type T.
@@ -95,20 +95,21 @@ Please see below.
9595
Task<T[]> ParseAsync<T>(string[] lines) where T : IFileLine, new();
9696
9797
/// <summary>
98-
/// Asynchronously parses a stream of delimiter separated strings into an array of objects of type T.
98+
/// Asynchronously parses an array of bytes of delimiter separated records into an array of objects of type T.
9999
/// </summary>
100100
/// <typeparam name="T"></typeparam>
101-
/// <param name="stream"></param>
101+
/// <param name="bytes"></param>
102102
/// <returns></returns>
103-
Task<T[]> ParseAsync<T>(Stream stream) where T : IFileLine, new();
103+
Task<T[]> ParseAsync<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new();
104+
104105
/// <summary>
105-
/// Asynchronously parses an array of bytes of delimiter separated records into an array of objects of type T.
106+
/// Asynchronously parses a stream of delimiter separated strings into an array of objects of type T.
106107
/// </summary>
107108
/// <typeparam name="T"></typeparam>
108-
/// <param name="bytes"></param>
109+
/// <param name="stream"></param>
109110
/// <returns></returns>
110-
Task<T[]> ParseAsync<T>(byte[] bytes) where T : IFileLine, new();
111-
}
111+
Task<T[]> ParseAsync<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new();
112+
}
112113
```
113114
To initialise `Parser` class you could do it manually or use dependency injection as shown below. The parser class has parameterised constructor that takes the delimiter character to initialise the instance. Default character is ',' (comma) to initialise the parser for a CSV file parsing.
114115

@@ -338,6 +339,7 @@ The main branch is now on .NET 9.0.
338339
| -------- | --------|
339340
| [`v1.0.0`](https://github.com/CodeShayk/parsley.net/tree/v1.0.0) | [Notes](https://github.com/CodeShayk/Parsley.Net/releases/tag/v1.0.0) |
340341
| [`v1.1.0`](https://github.com/CodeShayk/parsley.net/tree/v1.1.0) | [Notes](https://github.com/CodeShayk/Parsley.Net/releases/tag/v1.1.0) |
342+
| [`v1.1.5`](https://github.com/CodeShayk/parsley.net/tree/v1.1.5) | [Notes](https://github.com/CodeShayk/Parsley.Net/releases/tag/v1.1.5) |
341343

342344
## Credits
343345
Thank you for reading. Please fork, explore, contribute and report. Happy Coding !! :)

src/Parsley/IParser.cs

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.IO;
2+
using System.Text;
23
using System.Threading.Tasks;
34

45
namespace parsley
@@ -19,23 +20,23 @@ public interface IParser
1920
/// <typeparam name="T"></typeparam>
2021
/// <param name="lines"></param>
2122
/// <returns></returns>
22-
T[] Parse<T>(string[] lines) where T : IFileLine, new();
23-
23+
T[] Parse<T>(string[] lines) where T : IFileLine, new();
24+
2425
/// <summary>
25-
/// Parses a stream of delimiter separated records into an array of objects of type T.
26+
/// Parses an array of bytes of delimiter separated records into an array of objects of type T.
2627
/// </summary>
2728
/// <typeparam name="T"></typeparam>
28-
/// <param name="stream"></param>
29+
/// <param name="bytes"></param>
2930
/// <returns></returns>
30-
T[] Parse<T>(Stream stream) where T : IFileLine, new();
31-
31+
T[] Parse<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new();
32+
3233
/// <summary>
33-
/// Parses an array of bytes of delimiter separated records into an array of objects of type T.
34+
/// Parses a stream of delimiter separated records into an array of objects of type T.
3435
/// </summary>
3536
/// <typeparam name="T"></typeparam>
36-
/// <param name="bytes"></param>
37+
/// <param name="stream"></param>
3738
/// <returns></returns>
38-
T[] Parse<T>(byte[] bytes) where T : IFileLine, new();
39+
T[] Parse<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new();
3940

4041
/// <summary>
4142
/// Asynchronously parses a file at the specified filepath into an array of objects of type T.
@@ -52,21 +53,21 @@ public interface IParser
5253
/// <param name="lines"></param>
5354
/// <returns></returns>
5455
Task<T[]> ParseAsync<T>(string[] lines) where T : IFileLine, new();
55-
56+
5657
/// <summary>
57-
/// Asynchronously parses a stream of delimiter separated strings into an array of objects of type T.
58+
/// Asynchronously parses an array of bytes of delimiter separated records into an array of objects of type T.
5859
/// </summary>
5960
/// <typeparam name="T"></typeparam>
60-
/// <param name="stream"></param>
61+
/// <param name="bytes"></param>
6162
/// <returns></returns>
62-
Task<T[]> ParseAsync<T>(Stream stream) where T : IFileLine, new();
63-
63+
Task<T[]> ParseAsync<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new();
64+
6465
/// <summary>
65-
/// Asynchronously parses an array of bytes of delimiter separated records into an array of objects of type T.
66+
/// Asynchronously parses a stream of delimiter separated strings into an array of objects of type T.
6667
/// </summary>
6768
/// <typeparam name="T"></typeparam>
68-
/// <param name="bytes"></param>
69+
/// <param name="stream"></param>
6970
/// <returns></returns>
70-
Task<T[]> ParseAsync<T>(byte[] bytes) where T : IFileLine, new();
71+
Task<T[]> ParseAsync<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new();
7172
}
7273
}

src/Parsley/Parser.cs

Lines changed: 35 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,8 @@ public Parser(char delimiter)
6868
}
6969

7070
private string[] ReadToLines(string path)
71-
{
72-
var lines = new List<string>();
73-
74-
foreach (var line in File.ReadLines(path))
75-
{
76-
if (line != null)
77-
lines.Add(line);
78-
}
79-
80-
return lines.ToArray<string>();
71+
{
72+
return File.ReadAllLines(path);
8173
}
8274

8375
private T ParseLine<T>(string line) where T : IFileLine, new()
@@ -165,13 +157,13 @@ private string[] GetDelimiterSeparatedValues(string line)
165157
return values;
166158
}
167159

168-
public T[] Parse<T>(Stream stream) where T : IFileLine, new()
160+
public T[] Parse<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new()
169161
{
170162
if (stream == null || stream.Length == 0)
171163
return Array.Empty<T>();
172164

173165
var lines = new List<string>();
174-
using (var reader = new StreamReader(stream, Encoding.UTF8))
166+
using (var reader = new StreamReader(stream, encoding ?? Encoding.UTF8))
175167
{
176168
string line;
177169
while ((line = reader.ReadLine()) != null)
@@ -186,53 +178,36 @@ private string[] GetDelimiterSeparatedValues(string line)
186178
return lines.Any() ? Parse<T>(lines.ToArray()) : Array.Empty<T>();
187179
}
188180

189-
public T[] Parse<T>(byte[] bytes) where T : IFileLine, new()
181+
public T[] Parse<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new()
190182
{
191183
if (bytes == null || bytes.Length == 0)
192184
return Array.Empty<T>();
193185

194-
return Parse<T>(new MemoryStream(bytes));
186+
return Parse<T>(new MemoryStream(bytes), encoding);
195187
}
196188

197189
public async Task<T[]> ParseAsync<T>(string filepath) where T : IFileLine, new()
198190
{
199191
if (string.IsNullOrEmpty(filepath) || !File.Exists(filepath))
200192
return Array.Empty<T>();
201193

202-
var lines = await Task.Run(() => ReadToLines(filepath));
194+
var lines = await Task.FromResult(ReadToLines(filepath));
203195

204196
return await ParseAsync<T>(lines);
205-
}
206-
207-
public async Task<T[]> ParseAsync<T>(string[] lines) where T : IFileLine, new()
197+
}
198+
199+
public async Task<T[]> ParseAsync<T>(byte[] bytes, Encoding encoding = null) where T : IFileLine, new()
208200
{
209-
if (lines == null || lines.Length == 0)
201+
if (bytes == null || bytes.Length == 0)
210202
return Array.Empty<T>();
211203

212-
var list = new T[lines.Length];
213-
var index = 0;
214-
var inputs = lines.Select(line => new { Line = line, Index = index++ });
215-
216-
foreach (var input in inputs)
217-
{
218-
if (string.IsNullOrWhiteSpace(input.Line))
219-
continue;
220-
221-
var parsedLine = await Task.Run(() => ParseLine<T>(input.Line));
204+
return await ParseAsync<T>(new MemoryStream(bytes), encoding);
205+
}
222206

223-
if (parsedLine != null)
224-
{
225-
parsedLine.Index = input.Index;
226-
list[parsedLine.Index] = parsedLine;
227-
}
228-
}
229-
return list;
230-
}
231-
232-
public async Task<T[]> ParseAsync<T>(Stream stream) where T : IFileLine, new()
207+
public async Task<T[]> ParseAsync<T>(Stream stream, Encoding encoding = null) where T : IFileLine, new()
233208
{
234209
var lines = new List<string>();
235-
using (var reader = new StreamReader(stream, Encoding.UTF8))
210+
using (var reader = new StreamReader(stream, encoding ?? Encoding.UTF8))
236211
{
237212
string line;
238213
while ((line = await reader.ReadLineAsync()) != null)
@@ -244,14 +219,30 @@ private string[] GetDelimiterSeparatedValues(string line)
244219
}
245220

246221
return lines.Any() ? await ParseAsync<T>(lines.ToArray()) : Array.Empty<T>();
247-
}
222+
}
248223

249-
public async Task<T[]> ParseAsync<T>(byte[] bytes) where T : IFileLine, new()
224+
public async Task<T[]> ParseAsync<T>(string[] lines) where T : IFileLine, new()
250225
{
251-
if (bytes == null || bytes.Length == 0)
226+
if (lines == null || lines.Length == 0)
252227
return Array.Empty<T>();
253228

254-
return await ParseAsync<T>(new MemoryStream(bytes));
229+
var index = 0;
230+
var indexedLines = lines
231+
.Select((line) => new { Line = line, Index = index++ })
232+
.Where(x => !string.IsNullOrWhiteSpace(x.Line))
233+
.ToArray();
234+
235+
var tasks = indexedLines
236+
.Select(x => Task.Run(() => new { x.Index, Parsed = ParseLine<T>(x.Line) }))
237+
.ToArray();
238+
239+
var results = await Task.WhenAll(tasks);
240+
241+
var list = new T[tasks.Length];
242+
foreach (var result in results)
243+
list[result.Index] = result.Parsed;
244+
245+
return list;
255246
}
256247
}
257248
}

src/Parsley/Parsley.csproj

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@
1919
<GenerateDocumentationFile>True</GenerateDocumentationFile>
2020
<PackageProjectUrl>https://github.com/CodeShayk/Parsley.Net/wiki</PackageProjectUrl>
2121
<RepositoryUrl>https://github.com/CodeShayk/Parsley.Net</RepositoryUrl>
22-
<PackageReleaseNotes>
23-
v1.1.0 - Targets .Net9.0, .NetStandard2.1, .NetStandard2.0, and .NetFramework4.6.4. <br/>
24-
* Includes core functionality for parsing delimiter separated files.
25-
* Provided Sync and Async parsing methods</PackageReleaseNotes>
26-
<Version>1.1.0</Version>
22+
<PackageReleaseNotes>v1.1.5 - performance improvements in async parsing.</PackageReleaseNotes>
23+
<Version>1.1.5</Version>
2724
<PackageRequireLicenseAcceptance>True</PackageRequireLicenseAcceptance>
2825
<AssemblyName>Parsley.Net</AssemblyName>
2926
</PropertyGroup>

tests/Parsley.Tests/ParserFixture.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public void TestParseForDependencyInjectionShouldReturnInitialisedInstance()
3939
}
4040

4141
[Test]
42-
public void TestParseWithFileInputShouldReturnCorrectlyParsedArray()
42+
public void TestParseWithFilePathShouldReturnCorrectlyParsedArray()
4343
{
4444
var filePath = Path.Combine(Environment.CurrentDirectory, "TestFile.txt");
4545

@@ -221,14 +221,15 @@ public async Task TestParseAsyncWithStringArrayInputShouldReturnCorrectlyParsedA
221221
var lines = new[]
222222
{
223223
"GB-01|Bob Marley|True|Free",
224-
"UH-02|John Walsh McKinsey|False|Paid"
224+
"UH-02|John Walsh McKinsey|False|Paid",
225+
"UH-03|Fred Wigg|False|Paid",
225226
};
226227

227228
parser = new Parser('|');
228229

229230
var parsed = await parser.ParseAsync<FileLine>(lines);
230231

231-
Assert.That(parsed.Length, Is.EqualTo(2));
232+
Assert.That(parsed.Length, Is.EqualTo(3));
232233

233234
Assert.That(parsed[0].Code.Batch, Is.EqualTo("GB"));
234235
Assert.That(parsed[0].Code.SerialNo, Is.EqualTo(1));

0 commit comments

Comments
 (0)