1+ /**
2+ * @file Converts pinyin tone numbers to tone marks.
3+ * @author Kevin K. Yang <[email protected] > 4+ * @copyright Kevin K. Yang 2017. Licensed under the MIT License.
5+ */
6+
7+ /**
8+ * An object holding arrays of Unicode tone marks for each vowel.
9+ * Each tone mark can be accessed very intuitively. For example,
10+ * to access the tone marked version of a2, you would call
11+ * toneMarks["a"][2].
12+ *
13+ * @type {Object }
14+ */
115var toneMarks = {
216 a : [ "a" , "\u0101" , "\u00e1" , "\u01ce" , "\u00e0" , "a" ] ,
317 e : [ "e" , "\u0113" , "\u00e9" , "\u011b" , "\u00e8" , "e" ] ,
@@ -7,16 +21,29 @@ var toneMarks = {
721 v : [ "\u00fc" , "\u01d6" , "\u01d8" , "\u01da" , "\u01dc" , "\u00fc" ]
822} ;
923
24+ /**
25+ * @return {Boolean } whether this string is a single alphabetical letter.
26+ */
1027String . prototype . isAlpha = function ( ) {
1128
1229 return / ^ [ A - Z a - z ] $ / . test ( this ) ;
1330}
1431
32+ /**
33+ * @return {Boolean } is this string a valid pinyin vowel
34+ */
1535String . prototype . isPinyinVowel = function ( ) {
1636
1737 return / ^ [ a e i o u v \u00fc ] $ / . test ( this ) ;
1838}
1939
40+ /**
41+ * Finds the last occurrence of a regular expression
42+ * pattern match in this String.
43+ *
44+ * @param {RegExp } the pattern to match
45+ * @return {Number } the last match in this string
46+ */
2047String . prototype . lastIndexOfRegex = function ( regExp ) {
2148
2249 var lastIndex = - 1 ;
@@ -31,6 +58,11 @@ String.prototype.lastIndexOfRegex = function(regExp) {
3158 return lastIndex ;
3259}
3360
61+ /**
62+ * @param {Number } index The index of the character to replace
63+ * @param {String } replacement The string to insert at the index
64+ * @return {String } this String, with the specified replacement
65+ */
3466String . prototype . replaceAt = function ( index , replacement ) {
3567
3668 if ( index >= 0 && index < this . length && typeof replacement === "string" ) {
@@ -44,34 +76,40 @@ String.prototype.replaceAt = function(index, replacement) {
4476}
4577
4678/**
47- * Takes a single pinyin word using tone numbers and converts to tone symbols.
79+ * Converts this String, which must be a single pinyin word followed by a
80+ * tone number, to the equivalent pinyin word with tone marks.
81+ *
82+ * @return {String } this String, with the tone number removed
83+ * and tone mark inserted.
4884 */
4985String . prototype . convertPinyin = function ( ) {
50-
86+ // convert to lowercase
5187 var str = this . toLocaleLowerCase ( ) ;
52-
88+ // get index of the tone number
5389 var toneNumIndex = str . search ( / [ 1 - 5 ] / ) ;
90+ // get index of the first pinyin vowel
5491 var firstVowelIndex = str . search ( / [ a e i o u v \u00fc ] / ) ;
5592 if ( str . length > 7 || toneNumIndex < 1 ||
5693 toneNumIndex !== str . length - 1 ||
5794 firstVowelIndex < 0 ) {
58-
95+ // this string is either too long to be pinyin, does not contain a \
96+ // correctly placed tone number, or has no pinyin vowels
5997 console . log ( "String.prototype.convertPinyin:" + this +
6098 " is not a valid pinyin word." )
6199 return this ;
62100 }
63-
101+ /** @type { Number } from 1 to 5 */
64102 var toneNum = parseInt ( str [ toneNumIndex ] ) ;
65103 if ( / [ a e ] / . test ( str ) ) {
66-
104+ // str contains an 'a' or an 'e', both of which take precedence
67105 var index = str . search ( / [ a e ] / ) ;
68106 str = str . replaceAt ( index , toneMarks [ str . charAt ( index ) ] [ toneNum ] ) ;
69107 } else if ( / o u / . test ( str ) ) {
70-
108+ // str contains 'ou'. The tone always goes on the 'o'
71109 var index = str . search ( / o u / ) ;
72110 str = str . replaceAt ( index , toneMarks [ str . charAt ( index ) ] [ toneNum ] ) ;
73111 } else {
74-
112+ // place the tone on the last vowel
75113 var index = str . lastIndexOfRegex ( / [ a e i o u v \u00fc ] / ) ;
76114 var vowel = str . charAt ( index ) ;
77115 if ( vowel == "\u00fc" ) {
@@ -80,11 +118,15 @@ String.prototype.convertPinyin = function() {
80118 }
81119 str = str . replaceAt ( index , toneMarks [ vowel ] [ toneNum ] ) ;
82120 }
83-
121+ // strip the tone number
84122 str = str . substring ( 0 , str . length - 1 ) ;
85123 return str ;
86124}
87125
126+ /**
127+ * @param {String } the string to convert
128+ * @return {String } the converted string
129+ */
88130var pinyinify = function ( str ) {
89131
90132 if ( typeof str !== 'string' ) {
@@ -94,40 +136,46 @@ var pinyinify = function(str) {
94136
95137 var res = "" ;
96138 var i = 0 ;
139+ // parse str character by character
97140 while ( str . length > 0 ) {
98141
99142 var char = str . charAt ( i ) ;
100143 if ( char . isAlpha ( ) ) {
101-
144+ // a letter has been found
102145 if ( i !== 0 ) {
103-
146+ // remove non-letters found up to now, add to res
104147 res += str . substring ( 0 , i ) ;
105148 str = str . substring ( i ) ;
106149 i = 0 ;
107150 }
151+ // get index of next tone number, if it exists
108152 var toneNumIndex = str . search ( / [ 1 - 5 ] / ) ;
153+ // get index of next whitespace, if it exists
109154 var whitespaceIndex = str . search ( / \s / ) ;
155+
110156 if ( toneNumIndex > 0 && toneNumIndex < 7 &&
111157 ( whitespaceIndex < 0 || whitespaceIndex > toneNumIndex ) ) {
112-
158+ // there is a tone number within 6 characters from now, and no \
159+ // whitespaces between this character and the tone number
113160 res += str . substring ( 0 , toneNumIndex + 1 ) . convertPinyin ( ) ;
114161 str = str . substring ( toneNumIndex + 1 ) ;
115162 } else if ( whitespaceIndex < 0 ) {
116-
163+ // no valid tone numbers nor whitespace, add rest of string to res
117164 res += str . substring ( 0 ) ;
118165 str = "" ;
119166 } else {
120-
167+ // whitespace found, remove everything up to and including the \
168+ // whitespace, and add to res
121169 res += str . substring ( 0 , whitespaceIndex + 1 ) ;
122170 str = str . substring ( whitespaceIndex + 1 ) ;
123171 }
124172 } else if ( i >= str . length ) {
125-
173+ // no more characters to parse
126174 res += str . substring ( 0 ) ;
127175 str = "" ;
128176 }
129177 else {
130-
178+ // increment index
131179 i ++ ;
132180 }
133181 }
0 commit comments