Skip to content

Commit 3162be4

Browse files
author
Yanan Wang
committed
refactor(FuzzySharp): rename DomainTypes to Sources and DomainTermMapping to SynonymMapping
- Rename DomainTypes → Sources - Rename DomainTermMapping → SynonymMapping - Update namespace: BotSharp.Abstraction.FuzzSharp → BotSharp.Plugin.FuzzySharp.FuzzSharp - Rename DomainTermMatcher.cs → SynonymMatcher.cs
1 parent 1367126 commit 3162be4

File tree

18 files changed

+94
-191
lines changed

18 files changed

+94
-191
lines changed

src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ namespace BotSharp.Abstraction.Knowledges;
33
public interface IPhraseCollection
44
{
55
Task<Dictionary<string, HashSet<string>>> LoadVocabularyAsync();
6-
Task<Dictionary<string, (string DbPath, string CanonicalForm)>> LoadDomainTermMappingAsync();
6+
Task<Dictionary<string, (string DbPath, string CanonicalForm)>> LoadSynonymMappingAsync();
77
}
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11

2-
namespace BotSharp.Abstraction.Knowledges.Models
2+
namespace BotSharp.Abstraction.Knowledges.Models;
3+
4+
public class SearchPhrasesResult
35
{
4-
public class SearchPhrasesResult
5-
{
6-
public string Token { get; set; } = string.Empty;
7-
public List<string> DomainTypes { get; set; } = new();
8-
public string CanonicalForm { get; set; } = string.Empty;
9-
public double Confidence { get; set; }
10-
}
6+
public string Token { get; set; } = string.Empty;
7+
public List<string> Sources { get; set; } = new();
8+
public string CanonicalForm { get; set; } = string.Empty;
9+
public string MatchType { get; set; } = string.Empty;
10+
public double Confidence { get; set; }
1111
}

src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ namespace BotSharp.Plugin.FuzzySharp.Constants;
44
public static class MatchReason
55
{
66
/// <summary>
7-
/// Token matched a domain term mapping (e.g., HVAC -> Air Conditioning/Heating)
7+
/// Token matched a synonym term (e.g., HVAC -> Air Conditioning/Heating)
88
/// </summary>
9-
public const string DomainTermMapping = "domain_term_mapping";
9+
public const string SynonymMatch = "synonym_match";
1010

1111
/// <summary>
1212
/// Token exactly matched a vocabulary entry

src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ public FuzzySharpController(
2121
}
2222

2323
/// <summary>
24-
/// Analyze text for typos and entities using domain-specific vocabulary.
25-
///
24+
/// Analyze text for typos and entities using vocabulary.
25+
///
2626
/// Returns:
2727
/// - `original`: Original input text
2828
/// - `tokens`: Tokenized text (only included if `include_tokens=true`)
2929
/// - `flagged`: List of flagged items (each with `match_type`):
30-
/// - `domain_term_mapping` - Business abbreviations (confidence=1.0)
30+
/// - `synonym_match` - Business abbreviations (confidence=1.0)
3131
/// - `exact_match` - Exact vocabulary matches (confidence=1.0)
3232
/// - `typo_correction` - Spelling corrections (confidence less than 1.0)
3333
/// - `processing_time_ms`: Processing time in milliseconds
Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,13 @@
1-
using System.ComponentModel.DataAnnotations;
2-
using System.Text.Json.Serialization;
31

4-
namespace BotSharp.Abstraction.FuzzSharp.Arguments;
2+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Arguments;
53

64
public class TextAnalysisRequest
75
{
8-
/// <summary>
9-
/// Text to analyze
10-
/// </summary>
11-
[Required]
12-
[JsonPropertyName("text")]
136
public string Text { get; set; } = string.Empty;
14-
15-
/// <summary>
16-
/// Folder path containing CSV files (will read all .csv files from the folder or its 'output' subfolder)
17-
/// </summary>
18-
[JsonPropertyName("vocabulary_folder_name")]
197
public string? VocabularyFolderName { get; set; }
20-
21-
/// <summary>
22-
/// Domain term mapping CSV file
23-
/// </summary>
24-
[JsonPropertyName("domain_term_mapping_file")]
25-
public string? DomainTermMappingFile { get; set; }
26-
27-
/// <summary>
28-
/// Min score for suggestions (0.0-1.0)
29-
/// </summary>
30-
[JsonPropertyName("cutoff")]
31-
[Range(0.0, 1.0)]
8+
public string? SynonymMappingFile { get; set; }
329
public double Cutoff { get; set; } = 0.82;
33-
34-
/// <summary>
35-
/// Max candidates per domain (1-20)
36-
/// </summary>
37-
[JsonPropertyName("topk")]
38-
[Range(1, 20)]
3910
public int TopK { get; set; } = 5;
40-
41-
/// <summary>
42-
/// Max n-gram size (1-10)
43-
/// </summary>
44-
[JsonPropertyName("max_ngram")]
45-
[Range(1, 10)]
4611
public int MaxNgram { get; set; } = 5;
47-
48-
/// <summary>
49-
/// Include tokens field in response (default: False)
50-
/// </summary>
51-
[JsonPropertyName("include_tokens")]
5212
public bool IncludeTokens { get; set; } = false;
5313
}

src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
1-
using BotSharp.Abstraction.FuzzSharp.Models;
1+
using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
22

3-
namespace BotSharp.Abstraction.FuzzSharp;
3+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
44

55
public interface INgramProcessor
66
{
77
/// <summary>
88
/// Process tokens and generate all possible n-gram match results
99
/// </summary>
1010
/// <param name="tokens">List of tokens to process</param>
11-
/// <param name="vocabulary">Vocabulary (domain type -> vocabulary set)</param>
12-
/// <param name="domainTermMapping">Domain term mapping</param>
13-
/// <param name="lookup">Lookup table (lowercase vocabulary -> (canonical form, domain type list))</param>
11+
/// <param name="vocabulary">Vocabulary (source -> vocabulary set)</param>
12+
/// <param name="synonymMapping">Synonym term Mapping</param>
13+
/// <param name="lookup">Lookup table (lowercase vocabulary -> (canonical form, source list))</param>
1414
/// <param name="maxNgram">Maximum n-gram length</param>
1515
/// <param name="cutoff">Minimum confidence threshold for fuzzy matching</param>
1616
/// <param name="topK">Maximum number of matches to return</param>
1717
/// <returns>List of flagged items</returns>
1818
List<FlaggedItem> ProcessNgrams(
1919
List<string> tokens,
2020
Dictionary<string, HashSet<string>> vocabulary,
21-
Dictionary<string, (string DbPath, string CanonicalForm)> domainTermMapping,
22-
Dictionary<string, (string CanonicalForm, List<string> DomainTypes)> lookup,
21+
Dictionary<string, (string DbPath, string CanonicalForm)> synonymMapping,
22+
Dictionary<string, (string CanonicalForm, List<string> Sources)> lookup,
2323
int maxNgram,
2424
double cutoff,
2525
int topK);

src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
using BotSharp.Abstraction.FuzzSharp.Models;
1+
using BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
22

3-
namespace BotSharp.Abstraction.FuzzSharp;
3+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
44

55
/// <summary>
66
/// Result processor interface

src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace BotSharp.Abstraction.FuzzSharp;
1+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp;
22

33
public interface ITokenMatcher
44
{
@@ -24,8 +24,8 @@ public record MatchContext(
2424
int StartIndex,
2525
int NgramLength,
2626
Dictionary<string, HashSet<string>> Vocabulary,
27-
Dictionary<string, (string DbPath, string CanonicalForm)> DomainTermMapping,
28-
Dictionary<string, (string CanonicalForm, List<string> DomainTypes)> Lookup,
27+
Dictionary<string, (string DbPath, string CanonicalForm)> SynonymMapping,
28+
Dictionary<string, (string CanonicalForm, List<string> Sources)> Lookup,
2929
double Cutoff,
3030
int TopK);
3131

@@ -34,6 +34,6 @@ public record MatchContext(
3434
/// </summary>
3535
public record MatchResult(
3636
string CanonicalForm,
37-
List<string> DomainTypes,
37+
List<string> Sources,
3838
string MatchType,
3939
double Confidence);
Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,13 @@
1-
using System.Text.Json.Serialization;
21

3-
namespace BotSharp.Abstraction.FuzzSharp.Models;
2+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
43

54
public class FlaggedItem
65
{
7-
/// <summary>
8-
/// Token index in the original text
9-
/// </summary>
10-
[JsonPropertyName("index")]
116
public int Index { get; set; }
12-
13-
/// <summary>
14-
/// Original token text
15-
/// </summary>
16-
[JsonPropertyName("token")]
177
public string Token { get; set; } = string.Empty;
18-
19-
/// <summary>
20-
/// Domain types where this token was found (e.g., ['client_Profile.Name', 'data_ServiceType.Name'])
21-
/// </summary>
22-
[JsonPropertyName("domain_types")]
23-
public List<string> DomainTypes { get; set; } = new();
24-
25-
/// <summary>
26-
/// Type of match: 'domain_term_mapping' (business abbreviations, confidence=1.0) |
27-
/// 'exact_match' (vocabulary matches, confidence=1.0) |
28-
/// 'typo_correction' (spelling corrections, confidence less than 1.0)
29-
/// </summary>
30-
[JsonPropertyName("match_type")]
8+
public List<string> Sources { get; set; } = new();
319
public string MatchType { get; set; } = string.Empty;
32-
33-
/// <summary>
34-
/// Canonical form or suggested correction
35-
/// </summary>
36-
[JsonPropertyName("canonical_form")]
3710
public string CanonicalForm { get; set; } = string.Empty;
38-
39-
/// <summary>
40-
/// Confidence score (0.0-1.0, where 1.0 is exact match)
41-
/// </summary>
42-
[JsonPropertyName("confidence")]
4311
public double Confidence { get; set; }
44-
45-
/// <summary>
46-
/// N-gram length (number of tokens in this match). Internal field, not included in JSON output.
47-
/// </summary>
48-
[JsonIgnore]
4912
public int NgramLength { get; set; }
5013
}
Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,10 @@
1-
using System.Text.Json.Serialization;
21

3-
namespace BotSharp.Abstraction.FuzzSharp.Models;
2+
namespace BotSharp.Plugin.FuzzySharp.FuzzSharp.Models;
43

54
public class TextAnalysisResponse
65
{
7-
/// <summary>
8-
/// Original text
9-
/// </summary>
10-
[JsonPropertyName("original")]
116
public string Original { get; set; } = string.Empty;
12-
13-
/// <summary>
14-
/// Tokenized text (only included if include_tokens=true)
15-
/// </summary>
16-
[JsonPropertyName("tokens")]
17-
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
187
public List<string>? Tokens { get; set; }
19-
20-
/// <summary>
21-
/// Flagged items (filter by 'match_type' field: 'domain_term_mapping', 'exact_match', or 'typo_correction')
22-
/// </summary>
23-
[JsonPropertyName("flagged")]
248
public List<FlaggedItem> Flagged { get; set; } = new();
25-
26-
/// <summary>
27-
/// Processing time in milliseconds
28-
/// </summary>
29-
[JsonPropertyName("processing_time_ms")]
309
public double ProcessingTimeMs { get; set; }
3110
}

0 commit comments

Comments
 (0)