Skip to content

Commit 42c8d83

Browse files
committed
Began adding logic for token post processing. Simplified definition and use of Token.
1 parent 456da60 commit 42c8d83

16 files changed

+560
-213
lines changed

Binder.Tests/new-alloc/TokenizerTests.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ public void Simple( TokenizerConfig config )
1717
{
1818
var cmdLogger = new CommandLineLogger();
1919
var tokenColl = TokenCollection.GetDefault( CommandLineStyle.Windows, cmdLogger );
20+
var tokenizer = new Tokenizer( tokenColl, cmdLogger );
2021

21-
var tokens = tokenColl.Tokenize( config.CommandLine );
22+
var tokens = tokenizer.Tokenize( config.CommandLine );
2223

2324
tokens.Count.Should().Be( config.Data.Count + 1 );
2425

Binder.Tests/test-data/TokenizerConfig.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Collections.Generic;
2+
#pragma warning disable 8618
23

34
namespace J4JSoftware.Binder.Tests
45
{

Binder.Tests/test-data/TokenizerData.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using J4JSoftware.CommandLine;
2+
#pragma warning disable 8618
23

34
namespace J4JSoftware.Binder.Tests
45
{

Binder/grammar/ITokenizer.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
using System.Collections.Generic;
2+
3+
namespace J4JSoftware.CommandLine
4+
{
5+
public interface ITokenizer
6+
{
7+
List<Token> Tokenize( string cmdLine );
8+
}
9+
}

Binder/grammar/Token.cs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,8 @@ string text
1414
Text = text;
1515
}
1616

17-
private Token( Token toCopy )
18-
{
19-
Type = toCopy.Type;
20-
Text = toCopy.Text;
21-
}
22-
23-
public Token Copy() => new Token( this );
24-
2517
public TokenType Type { get; }
2618
public string Text { get; }
2719
public int Length => Text.Length;
2820
}
29-
30-
public class TokenMatch
31-
{
32-
public TokenMatch( Token token, int startChar )
33-
{
34-
Token = token;
35-
StartChar = startChar;
36-
}
37-
38-
public Token Token { get; }
39-
public int StartChar { get; }
40-
}
4121
}

Binder/grammar/TokenCollection.cs

Lines changed: 28 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,9 @@
77

88
namespace J4JSoftware.CommandLine
99
{
10-
public class TokenCollection
10+
public partial class TokenCollection
1111
{
12-
public static TokenCollection GetDefault(
13-
CommandLineStyle style,
14-
CommandLineLogger logger,
15-
StringComparison? textComparison = null )
16-
{
17-
textComparison ??= style == CommandLineStyle.Linux
18-
? StringComparison.Ordinal
19-
: StringComparison.OrdinalIgnoreCase;
20-
21-
var retVal = new TokenCollection( textComparison.Value, logger );
22-
23-
if( style == CommandLineStyle.UserDefined )
24-
{
25-
logger.LogError( "Requested a user-defined default TokenCollection, which contains no tokens" );
26-
return retVal;
27-
}
28-
29-
retVal.Add( TokenType.Separator, " " );
30-
retVal.Add( TokenType.Separator, "\t" );
31-
retVal.Add( TokenType.ValuePrefix, "=" );
32-
33-
switch( style )
34-
{
35-
case CommandLineStyle.Linux:
36-
retVal.Add( TokenType.Quoter, "\"" );
37-
retVal.Add( TokenType.Quoter, "'" );
38-
retVal.Add( TokenType.KeyPrefix, "-" );
39-
retVal.Add( TokenType.KeyPrefix, "--" );
40-
41-
break;
42-
43-
case CommandLineStyle.Windows:
44-
retVal.Add( TokenType.Quoter, "\"" );
45-
retVal.Add(TokenType.Quoter, "'");
46-
retVal.Add( TokenType.KeyPrefix, "/" );
47-
48-
break;
49-
50-
case CommandLineStyle.Universal:
51-
retVal.Add( TokenType.Quoter, "\"" );
52-
retVal.Add( TokenType.Quoter, "'" );
53-
retVal.Add( TokenType.KeyPrefix, "-" );
54-
retVal.Add( TokenType.KeyPrefix, "--" );
55-
retVal.Add( TokenType.KeyPrefix, "/" );
56-
57-
break;
58-
}
59-
60-
return retVal;
61-
}
62-
63-
private readonly List<Token> _tokens = new List<Token>();
12+
private readonly Dictionary<TokenType, List<string>> _available = new Dictionary<TokenType, List<string>>();
6413
private readonly CommandLineLogger _logger;
6514

6615
public TokenCollection( StringComparison textComp, CommandLineLogger logger )
@@ -71,6 +20,20 @@ public TokenCollection( StringComparison textComp, CommandLineLogger logger )
7120

7221
public StringComparison TextComparison { get; }
7322

23+
public IEnumerable<(string text, TokenType type)> AvailableTokens
24+
{
25+
get
26+
{
27+
foreach( var kvp in _available )
28+
{
29+
foreach( var itemText in kvp.Value )
30+
{
31+
yield return ( itemText, kvp.Key );
32+
}
33+
}
34+
}
35+
}
36+
7437
public bool Add( TokenType type, string text )
7538
{
7639
if( type == TokenType.EndOfInput || type == TokenType.Text )
@@ -79,166 +42,39 @@ public bool Add( TokenType type, string text )
7942
return false;
8043
}
8144

82-
if( _tokens.Any( t => t.Text.Equals( text, TextComparison ) ) )
45+
if( _available.SelectMany( kvp => kvp.Value )
46+
.Any( t => t.Equals( text, TextComparison ) ) )
8347
{
8448
_logger.LogError( $"Duplicate token text '{text}' ({type})" );
8549
return false;
8650
}
8751

88-
_tokens.Add( new Token( type, text ) );
52+
if( _available.ContainsKey( type ) )
53+
_available[ type ].Add( text );
54+
else _available.Add( type, new List<string> { text } );
8955

9056
return true;
9157
}
9258

9359
public bool Remove( string text )
9460
{
95-
var idx = _tokens.FindIndex( t => t.Text.Equals( text, TextComparison ) );
61+
var kvp = _available.FirstOrDefault(
62+
x => x.Value.Any( t => t.Equals( text, TextComparison ) ) );
63+
64+
var idx = kvp.Value.FindIndex( x => x.Equals( text, TextComparison ) );
9665

9766
if( idx < 0 )
9867
{
9968
_logger.LogError( $"Couldn't find '{text}' among tokens to delete" );
10069
return false;
10170
}
10271

103-
_tokens.RemoveAt( idx );
72+
kvp.Value.RemoveAt( idx );
10473

10574
return true;
10675
}
10776

108-
public void Clear() => _tokens.Clear();
109-
public int Count => _tokens.Count;
110-
111-
public List<Token> Tokenize( string cmdLine )
112-
{
113-
var rawTokens = new List<Token>();
114-
TokenMatch? firstMatch = null;
115-
116-
while ( cmdLine.Length > 0 )
117-
{
118-
firstMatch = null;
119-
120-
foreach( var token in _tokens )
121-
{
122-
var tokenStart = cmdLine.IndexOf( token.Text, TextComparison );
123-
124-
// If there was no match, go to next token
125-
if( tokenStart < 0 )
126-
continue;
127-
128-
// We have a match. If this is our first match, store it and
129-
// move on to the next token.
130-
if ( firstMatch == null )
131-
{
132-
firstMatch = new TokenMatch( token.Copy(), tokenStart );
133-
continue;
134-
}
135-
136-
// If the current match starts after the existing first match,
137-
// move on to the next token.
138-
if( tokenStart > firstMatch.StartChar )
139-
continue;
140-
141-
// if the current match starts before the existing first match,
142-
// make it the first match and move on to the next token
143-
if( tokenStart < firstMatch.StartChar)
144-
{
145-
firstMatch = new TokenMatch( token.Copy(), tokenStart );
146-
continue;
147-
}
148-
149-
// if the current match and the existing first match both start
150-
// at the same location, make the longest match the first match.
151-
if( firstMatch.Token.Length < token.Length )
152-
{
153-
firstMatch = new TokenMatch( token.Copy(), tokenStart );
154-
}
155-
}
156-
157-
// if no first match was defined, there are no tokens remaining in the
158-
// command line, so embed it all in a single, final token
159-
firstMatch ??= new TokenMatch( new Token( TokenType.Text, cmdLine ), 0 );
160-
161-
// if the first match doesn't start at index 0 there's some text ahead
162-
// of the first recognized token, so output it as such
163-
if( firstMatch.StartChar > 0 )
164-
rawTokens.Add( new Token( TokenType.Text, cmdLine[ ..firstMatch.StartChar ] ) );
165-
166-
rawTokens.Add( firstMatch.Token );
167-
168-
cmdLine = cmdLine.Substring( firstMatch.StartChar + firstMatch.Token.Length );
169-
}
170-
171-
return CleanupTokens( rawTokens );
172-
}
173-
174-
private List<Token> CleanupTokens( List<Token> tokens )
175-
{
176-
var retVal = new List<Token>();
177-
178-
TokenType? prevType = null;
179-
string? quoter = null;
180-
var sb = new StringBuilder();
181-
182-
foreach( var token in tokens )
183-
{
184-
// ignore 2nd, 3rd, etc., consecutive separator tokens
185-
if (prevType == TokenType.Separator
186-
&& token.Type == TokenType.Separator)
187-
continue;
188-
189-
prevType = token.Type;
190-
191-
// how we handle quoters depends on whether we've
192-
// already encountered one
193-
if( token.Type == TokenType.Quoter )
194-
{
195-
// if this is our first quoter, store the quoter text
196-
// so we'll know when the encounter the end-of-quoted text
197-
if( quoter == null )
198-
quoter = token.Text;
199-
else
200-
{
201-
// if this is our second/closing quoter, check to see if it's
202-
// the same text as the first/opening quoter (multiple types
203-
// of quoters are allowed so we have to match them up)
204-
if( token.Text.Equals( quoter, TextComparison ) )
205-
{
206-
// we have a match, meaning we're at the end of the quoted
207-
// tokens, so create a single Text token based on the accumulated
208-
// text of all the intervening tokens
209-
retVal.Add( new Token( TokenType.Text, sb.ToString() ) );
210-
211-
// reset the active quoter in preparation for encountering
212-
// another set of quoted tokens, and reset the StringBuilder
213-
// used to accumulate the quoted text.
214-
quoter = null;
215-
sb.Clear();
216-
}
217-
// it's not the same type of quoter so store it as plain text
218-
else sb.Append( token.Text );
219-
}
220-
}
221-
else
222-
{
223-
// if we're in the midst of quoted tokens accumulate the
224-
// current token's text preparatory to outputting it when we
225-
// encounter the closing quoter.
226-
if( quoter != null )
227-
sb.Append( token.Text );
228-
// otherwise just output the token
229-
else retVal.Add( token );
230-
}
231-
}
232-
233-
// if the StringBuilder has content we must have an unmatched pair
234-
// of quoters, so output what's left as a text token
235-
if( sb.Length > 0 )
236-
retVal.Add( new Token(TokenType.Text, sb.ToString()) );
237-
238-
// add a closing token
239-
retVal.Add( new Token( TokenType.EndOfInput, string.Empty ) );
240-
241-
return retVal;
242-
}
77+
public void Clear() => _available.Clear();
78+
public int Count => _available.Count;
24379
}
24480
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
using System;
2+
3+
namespace J4JSoftware.CommandLine
4+
{
5+
public partial class TokenCollection
6+
{
7+
public static TokenCollection GetDefault(
8+
CommandLineStyle style,
9+
CommandLineLogger logger,
10+
StringComparison? textComparison = null )
11+
{
12+
textComparison ??= style == CommandLineStyle.Linux
13+
? StringComparison.Ordinal
14+
: StringComparison.OrdinalIgnoreCase;
15+
16+
var retVal = new TokenCollection( textComparison.Value, logger );
17+
18+
if( style == CommandLineStyle.UserDefined )
19+
{
20+
logger.LogError( "Requested a user-defined default TokenCollection, which contains no tokens" );
21+
return retVal;
22+
}
23+
24+
retVal.Add( TokenType.Separator, " " );
25+
retVal.Add( TokenType.Separator, "\t" );
26+
retVal.Add( TokenType.ValuePrefix, "=" );
27+
28+
switch( style )
29+
{
30+
case CommandLineStyle.Linux:
31+
retVal.Add( TokenType.Quoter, "\"" );
32+
retVal.Add( TokenType.Quoter, "'" );
33+
retVal.Add( TokenType.KeyPrefix, "-" );
34+
retVal.Add( TokenType.KeyPrefix, "--" );
35+
36+
break;
37+
38+
case CommandLineStyle.Windows:
39+
retVal.Add( TokenType.Quoter, "\"" );
40+
retVal.Add( TokenType.Quoter, "'" );
41+
retVal.Add( TokenType.KeyPrefix, "/" );
42+
43+
break;
44+
45+
case CommandLineStyle.Universal:
46+
retVal.Add( TokenType.Quoter, "\"" );
47+
retVal.Add( TokenType.Quoter, "'" );
48+
retVal.Add( TokenType.KeyPrefix, "-" );
49+
retVal.Add( TokenType.KeyPrefix, "--" );
50+
retVal.Add( TokenType.KeyPrefix, "/" );
51+
52+
break;
53+
}
54+
55+
return retVal;
56+
}
57+
}
58+
}

0 commit comments

Comments
 (0)