1
+ /*
2
+ COPYRIGHT
3
+ Character transliteration tables:
4
+ Copyright 2001, Sean M. Burke <[email protected] >, all rights reserved.
5
+ Python code:
6
+ Copyright 2009, Tomaz Solc <[email protected] >
7
+ CSharp code:
8
+ Copyright 2010, Oleg Usanov <[email protected] >
9
+ Refactorings (2015) - Nikolay Eremin <[email protected] >
10
+ The programs and documentation in this dist are distributed in the
11
+ hope that they will be useful, but without any warranty; without even
12
+ the implied warranty of merchantability or fitness for a particular
13
+ purpose.
14
+ This library is free software; you can redistribute it and/or modify
15
+ it under the same terms as Perl.
16
+ */
17
+
18
+ using System . Text ;
19
+
20
+ namespace DebitExpress . VatRelief . Unidecode ;
21
+
22
+ public static partial class Unidecoder
23
+ {
24
+ /// <summary>
25
+ /// Transliterate an Unicode object into an ASCII string
26
+ /// </summary>
27
+ /// <remarks>
28
+ /// unidecode(u"\u5317\u4EB0") == "Bei Jing "
29
+ /// </remarks>
30
+ /// <param name="input">The input.</param>
31
+ /// <param name="options"></param>
32
+ /// <returns>ASCII encoded string.</returns>
33
+ public static string Unidecode ( this string input , UnidecodeOptions options = UnidecodeOptions . Default )
34
+ {
35
+ if ( string . IsNullOrWhiteSpace ( input ) ) return "" ;
36
+
37
+ var output = new StringBuilder ( input . Length * 2 ) ;
38
+
39
+ foreach ( var symbol in input )
40
+ {
41
+ var result = Unidecode ( symbol ) ;
42
+ if ( string . IsNullOrEmpty ( result ) ) continue ;
43
+ output . Append ( result ) ;
44
+ }
45
+
46
+ switch ( options )
47
+ {
48
+ case UnidecodeOptions . ToLower :
49
+ return output . ToString ( ) . Trim ( ' ' ) . ToLower ( ) ;
50
+ case UnidecodeOptions . ToUpper :
51
+ return output . ToString ( ) . Trim ( ' ' ) . ToUpper ( ) ;
52
+ case UnidecodeOptions . RemoveSpace :
53
+ return output . ToString ( ) . Trim ( ' ' ) . Replace ( " " , "" ) ;
54
+ case UnidecodeOptions . RemoveSpaceAndToLower :
55
+ return output . ToString ( ) . Trim ( ' ' ) . Replace ( " " , "" ) . ToLower ( ) ;
56
+ case UnidecodeOptions . RemoveSpaceAndToUpper :
57
+ return output . ToString ( ) . Trim ( ' ' ) . Replace ( " " , "" ) . ToUpper ( ) ;
58
+ case UnidecodeOptions . Default :
59
+ return output . ToString ( ) . Trim ( ' ' ) ;
60
+ default :
61
+ return output . ToString ( ) . Trim ( ' ' ) ;
62
+ }
63
+ }
64
+
65
+ /// <summary>
66
+ /// Transliterate Unicode character to ASCII string.
67
+ /// </summary>
68
+ /// <param name="c">Character you want to transliterate into ASCII</param>
69
+ /// <returns>
70
+ /// ASCII string. Unknown(?) unicode characters will return [?] (3 characters).
71
+ /// It is this way in Python code as well.
72
+ /// </returns>
73
+ public static string Unidecode ( this char c )
74
+ {
75
+ string result ;
76
+ if ( c < 0x80 )
77
+ {
78
+ result = new string ( c , 1 ) ;
79
+ }
80
+ else
81
+ {
82
+ var high = c >> 8 ;
83
+ var low = c & 0xff ;
84
+ result = Characters . Value . TryGetValue ( high , out var values ) ? values . Value [ low ] : "" ;
85
+ }
86
+
87
+ return result ;
88
+ }
89
+ }
0 commit comments