Skip to content

Commit 1fc586d

Browse files
Overhaul num2hanzi
1 parent e037825 commit 1fc586d

File tree

1 file changed

+183
-115
lines changed

1 file changed

+183
-115
lines changed

src/hanzi2num.js

Lines changed: 183 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,3 @@
1-
var NUMS = ["零", "一", "二", "三", "四", "五", "六", "七", "八", "九"];
2-
var MULTS1 = ["十", "百", "千"];
3-
var MULTS4 = [
4-
"萬",
5-
"億",
6-
"兆",
7-
"京",
8-
"垓",
9-
"秭",
10-
"穰",
11-
"溝",
12-
"澗",
13-
"正",
14-
"載",
15-
"極",
16-
"恆河沙",
17-
"阿僧祇",
18-
"那由他",
19-
"不可思議",
20-
"無量大數"
21-
];
22-
var FRACS1 = ["分", "釐", "毫", "絲", "忽", "微", "纖", "沙"];
23-
var FRACS4 = [
24-
"塵",
25-
"埃",
26-
"渺",
27-
"漠",
28-
"模糊",
29-
"逡巡",
30-
"須臾",
31-
"瞬息",
32-
"彈指",
33-
"剎那",
34-
"六德",
35-
"虛",
36-
"空",
37-
"清",
38-
"淨"
39-
];
40-
411
const eTokenType = {
422
SIGN: "SIGN", // 負
433
DIGIT: "DIGIT", // 一二三...
@@ -97,6 +57,62 @@ const NUM_TOKENS = {
9757
"漠": { type: eTokenType.FRAC_MULT, exp: -12 }
9858
};
9959

60+
const NEG_WORD = "負";
61+
const INF_WORD = "無限大數";
62+
const NAN_WORD = "不可算數";
63+
64+
const DECIMAL_WORD = {
65+
"readout": "又"
66+
};
67+
68+
const DIGIT_WORDS = {
69+
"readout": {
70+
"0": "零",
71+
"1": "一",
72+
"2": "二",
73+
"3": "三",
74+
"4": "四",
75+
"5": "五",
76+
"6": "六",
77+
"7": "七",
78+
"8": "八",
79+
"9": "九"
80+
}
81+
};
82+
83+
const MULT_WORDS = {
84+
"readout": [
85+
{ str: "極", exp: 48 },
86+
{ str: "載", exp: 44 },
87+
{ str: "正", exp: 40 },
88+
{ str: "澗", exp: 36 },
89+
{ str: "溝", exp: 32 },
90+
{ str: "穰", exp: 28 },
91+
{ str: "秭", exp: 24 },
92+
{ str: "垓", exp: 20 },
93+
{ str: "京", exp: 16 },
94+
{ str: "兆", exp: 12 },
95+
{ str: "億", exp: 8 },
96+
{ str: "萬", exp: 4 },
97+
{ str: "千", exp: 3 },
98+
{ str: "百", exp: 2 },
99+
{ str: "十", exp: 1 },
100+
{ str: "", exp: 0 },
101+
{ str: "分", exp: -1 },
102+
{ str: "釐", exp: -2 },
103+
{ str: "毫", exp: -3 },
104+
{ str: "絲", exp: -4 },
105+
{ str: "忽", exp: -5 },
106+
{ str: "微", exp: -6 },
107+
{ str: "纖", exp: -7 },
108+
{ str: "沙", exp: -8 },
109+
{ str: "塵", exp: -9 },
110+
{ str: "埃", exp: -10 },
111+
{ str: "渺", exp: -11 },
112+
{ str: "漠", exp: -12 }
113+
]
114+
};
115+
100116
const eMultState = {
101117
NONE: "NONE", // <END>, 一 (ambiguous: 一萬一 or 一十一 or 一·一 or 一絲一)
102118
FRAC: "FRAC", // ...微
@@ -666,97 +682,149 @@ function hanzi2numstr(s) {
666682
}
667683

668684
function hanzi2num(s) {
669-
return Number(hanzi2numstr(s));
685+
const str = hanzi2numstr(s);
686+
if (str == null) {
687+
return NaN;
688+
} else {
689+
return Number(str);
690+
}
670691
}
671692

672-
function num2hanzi(n, nfrac = 6) {
673-
function int2hanzi(n) {
674-
if (n < 10) {
675-
return NUMS[n];
676-
}
677-
var s = "";
678-
var z = -1;
679-
for (var i = MULTS4.length - 1; i >= 0; i--) {
680-
var m = Math.pow(10000, i + 1);
681-
var k = Math.floor(n / m);
682-
if (k > 0) {
683-
n = n % m;
684-
s += int2hanzi(k) + MULTS4[i];
685-
z = 0;
686-
} else if (z == 0) {
687-
s += "零";
688-
z = 1;
689-
}
693+
function num2hanzi(n, format = "", precision = undefined) {
694+
if (!Number.isFinite(n)) {
695+
if (n == Infinity) {
696+
return INF_WORD;
697+
} else if (n == -Infinity) {
698+
return NEG_WORD + INF_WORD;
699+
} else {
700+
return NAN_WORD;
690701
}
691-
for (var i = MULTS1.length - 1; i >= 0; i--) {
692-
var m = Math.pow(10, i + 1);
693-
var k = Math.floor(n / m);
694-
if (k > 0) {
695-
n = n % m;
696-
s += int2hanzi(k) + MULTS1[i];
697-
z = 0;
698-
} else if (z == 0) {
699-
s += "零";
700-
z = 1;
701-
}
702+
}
703+
704+
// the same format as hanzi2numstr.parse
705+
function parseNumStr(str) {
706+
function myIndexOf(str, val) {
707+
const idx = str.indexOf(val);
708+
return idx < 0 ? str.length : idx;
702709
}
703-
if (n) {
704-
s += int2hanzi(n);
710+
const sign = str.charAt(0) == "-" ? -1 : 1;
711+
const digitIndex = "+-".includes(str.charAt(0)) ? 1 : 0;
712+
const expIndex = myIndexOf(str, "e");
713+
const scientificExp = expIndex == str.length ? 0 : Number(str.substring(expIndex + 1));
714+
const decimalIndex = myIndexOf(str.substring(0, expIndex), ".");
715+
const intStr = str.substring(digitIndex, decimalIndex);
716+
const intDigits = intStr.split("").reverse();
717+
const fracStr = str.substring(Math.min(decimalIndex + 1, expIndex), expIndex);
718+
const fracDigits = fracStr.split("").reverse();
719+
return {
720+
sign: sign,
721+
exp: scientificExp - fracDigits.length,
722+
digits: fracDigits.concat(intDigits)
723+
};
724+
}
725+
726+
// reserved for future extension
727+
const chineseFormat = "readout";
728+
const multWords = MULT_WORDS[chineseFormat];
729+
const digitWords = DIGIT_WORDS[chineseFormat];
730+
const decimalWord = DECIMAL_WORD[chineseFormat];
731+
732+
let numStr = precision === undefined ? n.toString() : n.toFixed(precision);
733+
let result = parseNumStr(numStr);
734+
let signStr = result.sign < 0 ? NEG_WORD : "";
735+
let rend = result.digits.findIndex(x => x != "0");
736+
if (rend < 0) {
737+
return signStr + digitWords["0"];
738+
}
739+
let rbegin = result.digits.length;
740+
while (result.digits[rbegin - 1] == "0") {
741+
--rbegin;
742+
}
743+
744+
// is this beyond the lowest fractional unit we can represent?
745+
const minMultExp = multWords[multWords.length - 1].exp;
746+
if (result.exp + rend < minMultExp) {
747+
// cap to lowest fractional unit and retry
748+
numStr = n.toFixed(-minMultExp);
749+
result = parseNumStr(numStr);
750+
signStr = result.sign < 0 ? NEG_WORD : "";
751+
rend = result.digits.findIndex(x => x != "0");
752+
if (rend < 0) {
753+
return signStr + digitWords["0"];
705754
}
706-
if (s[s.length - 1] == "零") {
707-
s = s.slice(0, s.length - 1);
755+
rbegin = result.digits.length;
756+
while (result.digits[rbegin - 1] == "0") {
757+
--rbegin;
708758
}
709-
return s;
710759
}
711-
function frac2hanzi(n) {
712-
var mfrac = Math.pow(0.1, nfrac);
713-
var s = "";
714-
var z = -1;
715-
for (var i = 0; i < FRACS1.length; i++) {
716-
var m = Math.pow(0.1, i + 1);
717-
if (m < mfrac) {
760+
761+
// convert digits to readout format
762+
let str = signStr;
763+
let pendingZero = false;
764+
let i = rbegin;
765+
const intToReadout = function (expBias = 0) {
766+
let hasOutput = false;
767+
while (i != rend) {
768+
const mult = multWords.find(x => x.exp + expBias <= result.exp + (i - 1));
769+
if (mult === undefined || mult.exp < 0) {
770+
// done with int part
771+
break;
772+
} else if (mult.exp > 0) {
773+
// needs higher multiplier
774+
if (intToReadout(expBias + mult.exp)) {
775+
// 零 is omitted here (100 0000 -> 一百萬)
776+
pendingZero = false;
777+
// write the multiplier
778+
str += mult.str;
779+
hasOutput = true;
780+
}
781+
} else if (mult.exp == 0) {
782+
if (result.digits[i - 1] != "0") {
783+
// insert 零 if necessary
784+
if (pendingZero) {
785+
str += digitWords["0"];
786+
pendingZero = false;
787+
}
788+
// write the digit
789+
str += digitWords[result.digits[i - 1]];
790+
hasOutput = true;
791+
} else {
792+
// mark that there are zero(s) not written yet
793+
// 零 will be added later if necessary
794+
pendingZero = true;
795+
}
796+
--i;
718797
break;
719-
}
720-
var k = Math.floor(n / m);
721-
if (k > 0) {
722-
n -= k * m;
723-
s += int2hanzi(k) + FRACS1[i];
724-
z = 0;
725-
} else if (z == 0) {
726-
s += "零";
727-
z = 1;
728798
}
729799
}
730-
for (var i = 0; i < FRACS4.length; i++) {
731-
var m = Math.pow(0.0001, i + 1) * 1e-8;
732-
if (m < mfrac) {
800+
return hasOutput;
801+
};
802+
const fracToReadout = function () {
803+
while (i != rend) {
804+
const mult = multWords.find(x => x.exp <= result.exp + (i - 1));
805+
if (mult === undefined) {
733806
break;
734807
}
735-
var k = Math.floor(n / m);
736-
if (k > 0) {
737-
n -= k * m;
738-
s += int2hanzi(k) + FRACS4[i];
739-
z = 0;
740-
} else if (z == 0) {
741-
s += "零";
742-
z = 1;
808+
if (intToReadout(mult.exp)) {
809+
str += mult.str;
810+
pendingZero = false;
743811
}
744812
}
745-
if (s[s.length - 1] == "零") {
746-
s = s.slice(0, s.length - 1);
747-
}
748-
return s;
749-
}
813+
};
750814

751-
if (n < 0) {
752-
return "負" + num2hanzi(-n);
753-
}
754-
var intn = Math.floor(n);
755-
if (intn == n) {
756-
return int2hanzi(n);
757-
} else {
758-
return int2hanzi(intn) + "又" + frac2hanzi(n - intn);
815+
const hasInt = intToReadout();
816+
pendingZero = false;
817+
if (i != rend) {
818+
if (hasInt) {
819+
str += decimalWord;
820+
}
821+
// avoid 又零
822+
while (i != rend && result.digits[i - 1] == "0") {
823+
--i;
824+
}
825+
fracToReadout();
759826
}
827+
return str;
760828
}
761829

762830
try {

0 commit comments

Comments
 (0)