Skip to content

Commit 4d743e9

Browse files
authored
feat: definition of dialect profile for CSV parser (#85)
1 parent 4a1908a commit 4d743e9

28 files changed

+720
-50
lines changed

docs/_data/navigation_docs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
- basic-usage
2020
- use-stdin-stdout
2121
- specify-engine-parser
22+
- specify-parser-parameters
2223
- multiple-sources
2324

2425
- title: Parsers
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
---
2+
title: Specify parser's parameters
3+
tags: [cli-usage]
4+
---
5+
## Direct specification of data parser's parameters
6+
7+
- `-P, --parser-parameter`: Defines the parser's parameters (or configuraion) to use when using the parser. Accepted values have the form `file extension` followed by `@` followed by the `parameter name` and then `:` to end with the `parameter value`. This option is always optional and is only supported for a few specific parsers and with a predefined set of parameters for each of them.
8+
9+
```bash
10+
didot -t template.hbs -s data.tsv -X tsv:csv -P tsv@delimiter:Semicolumn;tsv@commentChar:#
11+
```
12+
13+
In this example:
14+
15+
- `template.hbs` is the Handlebars template file.
16+
- `data.txt` is the source file.
17+
- `tsv:csv` associates the extension `.tsv` to the CSV parser
18+
- `tsv@delimiter:Semicolumn;tsv@commentChar:#` defines two parameters for the configuration of parser associated to `.tsv` file's extension. The first one defines the parameter `delimiter` to a semicolumn (`;`) and the second one set the parameter `commentChar` to a hash (`#`).
19+
20+
## Parameters for CSV parsers
21+
22+
The following parameters are accepted by Didot to define the behavior of a CSV parser:
23+
24+
- `delimiter`: Specifies the delimiter between fields.
25+
- `lineTerminator`: Specifies the delimiter between records.
26+
- `quoteChar`: Character used to quote fields.
27+
- `doubleQuote`: Whether double quotes are used to escape quotes within quoted fields.
28+
- `escapeChar`: Character used for escaping.
29+
- `header`: Indicates if the first row contains headers.
30+
- `skipInitialSpace`: Whether spaces after delimiters are skipped.
31+
- `commentChar`: Character used to denote comments.
32+
33+
More information about these parameters can found in [the documentation of PocketCsvReader](https://seddryck.github.io/PocketCsvReader/docs/csv-dialect-descriptor/)
34+
35+
### Synonyms for parameter values
36+
37+
To avoid conflicts with other parts of the command line, Didot supports the following synonyms for parameter values:
38+
39+
#### **delimiter**
40+
41+
- `Comma` = `,`
42+
- `Semicolon` = `;`
43+
- `Tab` = `\t`
44+
- `Pipe` = `|`
45+
46+
#### **lineTerminator**
47+
48+
- `CarriageReturnLineFeed` = `\r\n`
49+
- `LineFeed` = `\n`
50+
- `CarriageReturn` = `\r`
51+
52+
#### **quoteChar**
53+
54+
- `DoubleQuote` = `"`
55+
- `SingleQuote` = `'`
56+
57+
#### **escapeChar**
58+
59+
- `BackSlash` = `\`
60+
- `ForwardSlash` = `/`
61+
62+
#### **commentChar**
63+
64+
- `Hash` = `#`
65+
- `Semicolon` = `;`
66+
- `ForwardSlash` = `/`
67+
- `Dash` = `-`

src/Didot.Cli/RenderCommand.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ public RenderCommand(RenderOptions options, ILogger<RenderCommand>? logger = nul
1717
{
1818
options.EngineExtensions.SetDefaultValue(new Dictionary<string, string>());
1919
options.ParserExtensions.SetDefaultValue(new Dictionary<string, string>());
20+
options.ParserParams.SetDefaultValue(new Dictionary<string, string>());
2021
options.Sources.SetDefaultValue(new Dictionary<string, string>());
2122

2223
AddOption(options.Template);
@@ -26,6 +27,7 @@ public RenderCommand(RenderOptions options, ILogger<RenderCommand>? logger = nul
2627
AddOption(options.StdIn);
2728
AddOption(options.Parser);
2829
AddOption(options.ParserExtensions);
30+
AddOption(options.ParserParams);
2931
AddOption(options.Output);
3032

3133
AddValidator(result =>
@@ -71,6 +73,7 @@ public RenderCommand(RenderOptions options, ILogger<RenderCommand>? logger = nul
7173
, options.Sources
7274
, options.Parser
7375
, options.ParserExtensions
76+
, options.ParserParams
7477
, options.Output);
7578
}
7679
}

src/Didot.Cli/RenderCommandHandler.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ string template
2424
, IDictionary<string, string> sources
2525
, string parser
2626
, IDictionary<string, string> parserExtensions
27+
, IDictionary<string, string> parserParams
2728
, string output
2829
)
2930
{
@@ -33,7 +34,10 @@ string template
3334
try
3435
{
3536
Logger?.LogInformation("Configured {ParserExtensionCount} parser associations to file extensions.", parserExtensions.Count());
36-
var parserFactory = GetSourceParserFactory(parserExtensions);
37+
Logger?.LogInformation("Configured {ParserExtensionCount} parser parameters.", parserParams.Count());
38+
foreach (var param in parserParams)
39+
Logger?.LogInformation("Parser parameter: {ParserParamKey}={ParserParamValue}.", param.Key, param.Value);
40+
var parserFactory = GetSourceParserFactory(parserExtensions, parserParams);
3741

3842
Logger?.LogInformation("Configured {EnginerExtensionCount} template engine associations to file extensions.", engineExtensions.Count());
3943
var engineFactory = GetTemplateEngineFactory(engineExtensions);
@@ -71,10 +75,11 @@ string template
7175
}
7276
}
7377

74-
protected virtual FileBasedSourceParserFactory GetSourceParserFactory(IDictionary<string, string> keyValues)
78+
protected virtual FileBasedSourceParserFactory GetSourceParserFactory(IDictionary<string, string> extensions, IDictionary<string, string> parameters)
7579
{
76-
var factory = new FileBasedSourceParserFactory();
77-
factory.AddOrReplace(keyValues);
80+
var factory = new FileBasedSourceParserFactory(parameters);
81+
factory.AddOrReplace(extensions);
82+
7883
return factory;
7984
}
8085

src/Didot.Cli/RenderOptions.cs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
using System.Threading.Tasks;
66
using System.CommandLine;
77
using System.CommandLine.Parsing;
8+
using System.Runtime.CompilerServices;
9+
using Didot.Core;
810

911
namespace Didot.Cli;
1012
public class RenderOptions
@@ -74,6 +76,16 @@ public class RenderOptions
7476
AllowMultipleArgumentsPerToken = true
7577
};
7678

79+
public Option<Dictionary<string, string>> ParserParams { get; } = new Option<Dictionary<string, string>>(
80+
new[] { "-P", "--parser-parameter" },
81+
description: "Provide key-value parameters for parsers, such as configuration or dialect settings.",
82+
parseArgument: result => ParseKeyValuePairs(result, ':', ';')
83+
)
84+
{
85+
Arity = ArgumentArity.ZeroOrMore,
86+
AllowMultipleArgumentsPerToken = true
87+
};
88+
7789
public Option<string> Output { get; } = new Option<string>(
7890
new[] { "-o", "--output" },
7991
description: "Path to the generated file."
@@ -103,7 +115,10 @@ private static Dictionary<string, string> ParseKeyValuePairs(ArgumentResult resu
103115
}
104116
}
105117
else if (keyValue.Length == 2)
106-
dictionary[normalizeKey?.Invoke(keyValue[0]) ?? keyValue[0]] = keyValue[1].Trim();
118+
{
119+
var value = keyValue[1].Length > 0 && keyValue[1].Trim().Length == 0 ? keyValue[1] : keyValue[1].Trim();
120+
dictionary[normalizeKey?.Invoke(keyValue[0]) ?? keyValue[0]] = value;
121+
}
107122
else
108123
{
109124
result.ErrorMessage = $"Invalid key-value pair: {pair}";
@@ -114,11 +129,6 @@ private static Dictionary<string, string> ParseKeyValuePairs(ArgumentResult resu
114129
return dictionary;
115130
}
116131

117-
private static string NormalizeExtension(string extension)
118-
{
119-
extension = extension.Trim().ToLowerInvariant();
120-
if (extension.StartsWith('.'))
121-
return extension;
122-
return $".{extension}";
123-
}
132+
private static string NormalizeExtension(string value)
133+
=> value.NormalizeExtension();
124134
}

src/Didot.Core/BaseFactory.cs

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,29 @@
33
using System.Linq;
44
using System.Text;
55
using System.Threading.Tasks;
6+
using Didot.Core.SourceParsers;
67

7-
namespace Didot.Core.SourceParsers;
8+
namespace Didot.Core;
89
public abstract class BaseFactory<T>
910
{
1011
protected Dictionary<string, T> items = new();
1112

1213
protected abstract string ClassToken {get;}
1314

14-
public BaseFactory()
15-
{
16-
Initialize();
17-
}
15+
protected BaseFactory()
16+
=> Initialize(new Dictionary<string, string>());
1817

19-
protected abstract void Initialize();
18+
protected BaseFactory(IDictionary<string, string> parameters)
19+
=> Initialize(parameters);
20+
21+
protected abstract void Initialize(IDictionary<string, string> parameters);
2022

2123
public virtual void AddOrReplace(string extension, T item)
2224
{
23-
extension = NormalizeExtension(extension);
25+
extension = extension.NormalizeExtension();
2426

25-
if (items.ContainsKey(extension))
27+
if (!items.TryAdd(extension, item))
2628
items[extension] = item;
27-
else
28-
items.Add(extension, item);
2929
}
3030

3131
public virtual void AddOrReplace(IEnumerable<KeyValuePair<string, T>> keyValues)
@@ -34,23 +34,15 @@ public virtual void AddOrReplace(IEnumerable<KeyValuePair<string, T>> keyValues)
3434
public virtual void AddOrReplace(IEnumerable<KeyValuePair<string, string>> keyValues)
3535
=> keyValues.ToList().ForEach(kv => AddOrReplace(kv.Key, GetByTag(kv.Value)));
3636

37-
protected virtual string NormalizeExtension(string extension)
38-
{
39-
extension = extension.Trim().ToLowerInvariant();
40-
if (!extension.StartsWith('.'))
41-
extension = $".{extension}";
42-
return extension;
43-
}
44-
4537
public T GetByExtension(string extension)
4638
{
47-
extension = NormalizeExtension(extension);
39+
extension = extension.NormalizeExtension(); ;
4840
if (items.TryGetValue(extension, out var engine))
4941
return engine;
5042
throw new NotSupportedException(nameof(extension));
5143
}
5244

53-
public Dictionary<string, Type> ListByTags()
45+
protected Dictionary<string, Type> ListByTags()
5446
{
5547
var asm = typeof(T).Assembly;
5648
var types = asm.GetTypes()
@@ -68,11 +60,14 @@ public Dictionary<string, Type> ListByTags()
6860
return dict;
6961
}
7062

71-
public T GetByTag(string tag)
63+
public virtual T GetByTag(string tag)
7264
{
7365
tag = tag.Trim().ToLowerInvariant();
7466
if (!ListByTags().TryGetValue(tag, out var engineType))
7567
throw new Exception(tag);
76-
return (T)Activator.CreateInstance(engineType)!;
68+
69+
var engine = items.Values.FirstOrDefault(x => x is not null && x.GetType() == engineType)
70+
?? (T)Activator.CreateInstance(engineType)!;
71+
return engine;
7772
}
7873
}

src/Didot.Core/Didot.Core.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
<PackageReference Include="Handlebars.Net" Version="2.1.6" />
1313
<PackageReference Include="Handlebars.Net.Helpers" Version="2.4.7" />
1414
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
15-
<PackageReference Include="PocketCsvReader" Version="2.11.1" />
15+
<PackageReference Include="PocketCsvReader" Version="2.12.0" />
1616
<PackageReference Include="Scriban" Version="5.12.1" />
1717
<PackageReference Include="SmartFormat.NET" Version="3.5.1" />
1818
<PackageReference Include="YamlDotNet" Version="16.2.1" />
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace Didot.Core.SourceParsers;
8+
public abstract class BaseSourceBuilder<T> : ISourceBuilder<T> where T : ISourceParser
9+
{
10+
public bool CanHandle(Type type)
11+
=> typeof(T) == type;
12+
13+
public abstract ISourceParser Build(IDictionary<string, string> parameters, string extension);
14+
}

src/Didot.Core/SourceParsers/CsvSource.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@
99
namespace Didot.Core.SourceParsers;
1010
public class CsvSource : ISourceParser
1111
{
12+
private CsvReader CsvReader { get; }
13+
14+
public CsvDialectDescriptor Dialect { get => CsvReader.Dialect; }
15+
16+
public CsvSource()
17+
=> CsvReader = new CsvReader(new CsvProfile(true));
18+
19+
public CsvSource(CsvReader csvReader)
20+
=> CsvReader = csvReader;
1221

1322
public virtual object Parse(string content)
1423
{
@@ -20,7 +29,7 @@ public virtual object Parse(Stream stream)
2029
{
2130
var list = new List<object>();
2231

23-
using var reader = new CsvReader(new CsvProfile(true)).ToDataReader(stream);
32+
using var reader = CsvReader.ToDataReader(stream);
2433
var seenRecords = new Dictionary<string, Dictionary<string, object>>();
2534

2635
while (reader.Read())
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Reflection.Metadata.Ecma335;
5+
using System.Runtime.CompilerServices;
6+
using System.Text;
7+
using System.Threading.Tasks;
8+
using PocketCsvReader.Configuration;
9+
10+
namespace Didot.Core.SourceParsers;
11+
12+
[Extension(".csv")]
13+
internal class CsvSourceBuilder : BaseSourceBuilder<CsvSource>
14+
{
15+
protected Dictionary<string, Action<string>> Actions = [];
16+
17+
private DialectDescriptorBuilder _dialect = new();
18+
19+
public CsvSourceBuilder()
20+
{
21+
Initialize();
22+
}
23+
24+
protected virtual void Initialize()
25+
{
26+
Actions.Add("delimiter", (delimiter) => _dialect.WithDelimiter(MapEnumToChar<Delimiter>(delimiter)));
27+
Actions.Add("lineTerminator", (lineTerminator) => _dialect.WithLineTerminator(MapLineTerminator(lineTerminator)));
28+
Actions.Add("quoteChar", (quoteChar) => _dialect.WithQuoteChar(MapEnumToChar<QuoteChar>(quoteChar)));
29+
Actions.Add("doubleQuote", (doubleQuote) => _dialect.WithDoubleQuote(doubleQuote.ToBoolean()));
30+
Actions.Add("escapeChar", (escapeChar) => _dialect.WithEscapeChar(MapEnumToChar<EscapeChar>(escapeChar)));
31+
Actions.Add("nullSequence", (nullSequence) => _dialect.WithNullSequence(nullSequence));
32+
Actions.Add("skipInitialSpace", (skipInitialSpace) => _dialect.WithSkipInitialSpace(skipInitialSpace.ToBoolean()));
33+
Actions.Add("commentChar", (commentChar) => _dialect.WithCommentChar(MapEnumToChar<CommentChar>(commentChar)));
34+
Actions.Add("header", (header) => _dialect.WithHeader(header.ToBoolean()));
35+
Actions.Add("caseSensitiveHeader", (caseSensitiveHeader) => _dialect.WithCaseSensitiveHeader(caseSensitiveHeader.ToBoolean()));
36+
}
37+
38+
public override ISourceParser Build(IDictionary<string, string> parameters, string extension)
39+
{
40+
extension = $"{extension.NormalizeExtension()}@".Substring(1);
41+
foreach (var kv in parameters.Where(x => x.Key.StartsWith(extension)))
42+
if (Actions.TryGetValue(kv.Key.Split('@')[1], out var action))
43+
action(kv.Value);
44+
45+
var builder = new CsvReaderBuilder();
46+
var csvReader = builder.WithDialectDescriptor((_) => _dialect).Build();
47+
return new CsvSource(csvReader);
48+
}
49+
50+
private char MapEnumToChar<T>(string value) where T : Enum
51+
{
52+
if (value.Length == 1)
53+
return value[0];
54+
55+
return (char)(int)Enum.Parse(typeof(T),
56+
Enum.GetNames(typeof(T)).FirstOrDefault(x => x.Equals(value, StringComparison.InvariantCultureIgnoreCase))
57+
?? throw new NotSupportedException(value)
58+
);
59+
}
60+
61+
private string MapLineTerminator(string value)
62+
{
63+
if (value.Length >= 1 && value.Length <= 2)
64+
return value;
65+
66+
var name = Enum.Parse<LineTerminator>(
67+
Enum.GetNames<LineTerminator>().FirstOrDefault(x => x.Equals(value, StringComparison.InvariantCultureIgnoreCase))
68+
?? throw new NotSupportedException(value));
69+
70+
return name switch
71+
{
72+
LineTerminator.CarriageReturn => "\r",
73+
LineTerminator.LineFeed => "\n",
74+
LineTerminator.CarriageReturnLineFeed => "\r\n",
75+
_ => throw new NotSupportedException(value),
76+
};
77+
}
78+
}

0 commit comments

Comments
 (0)