|
1 | 1 | // LICENSE : MIT |
2 | 2 | "use strict"; |
3 | 3 | import {RuleHelper} from "textlint-rule-helper" |
4 | | -import ObjectAssign from "object-assign" |
5 | | -const defaultOptions = {max: 3}; |
6 | | -function countTen(text) { |
7 | | - return text.split("、").length - 1; |
| 4 | +import {getTokenizer} from "kuromojin"; |
| 5 | +import splitSentences from "sentence-splitter"; |
| 6 | +import Source from "structured-source"; |
| 7 | +const defaultOptions = { |
| 8 | + max: 3, // 1文に利用できる最大の、の数 |
| 9 | + strict: false // 例外ルールを適応するかどうか |
| 10 | +}; |
| 11 | + |
| 12 | +function isSandwichedMeishi({ |
| 13 | + before, |
| 14 | + token, |
| 15 | + after |
| 16 | + }) { |
| 17 | + if (before === undefined || after === undefined || token === undefined) { |
| 18 | + return false; |
| 19 | + } |
| 20 | + return before.pos === "名詞" && after.pos === "名詞"; |
8 | 21 | } |
9 | 22 | /** |
10 | 23 | * @param {RuleContext} context |
11 | 24 | * @param {object} options |
12 | 25 | */ |
13 | 26 | export default function (context, options = {}) { |
14 | | - options = ObjectAssign({}, defaultOptions, options); |
15 | | - const maxLen = options.max; |
16 | | - const punctuation = /[。]/; |
| 27 | + const maxLen = options.max || defaultOptions.max; |
| 28 | + const isStrict = options.strict || defaultOptions.strict; |
17 | 29 | let helper = new RuleHelper(context); |
18 | 30 | let {Syntax, RuleError, report, getSource} = context; |
19 | | - let currentParagraphTexts = []; |
20 | 31 | return { |
21 | | - [Syntax.Paragraph](){ |
22 | | - currentParagraphTexts = [] |
23 | | - }, |
24 | | - [Syntax.Str](node){ |
25 | | - // ignore text from external factor |
26 | | - if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote])) { |
| 32 | + [Syntax.Paragraph](node){ |
| 33 | + if (helper.isChildNode(node, [Syntax.BlockQuote])) { |
27 | 34 | return; |
28 | 35 | } |
29 | | - currentParagraphTexts.push(node); |
30 | | - }, |
31 | | - [Syntax.Paragraph + ":exit"](){ |
32 | | - let currentTenCount = 0; |
| 36 | + let sentences = splitSentences(getSource(node), { |
| 37 | + charRegExp: /[。\?\!?!]/, |
| 38 | + newLineCharacters: "\n\n" |
| 39 | + }); |
33 | 40 | /* |
34 | 41 | <p> |
35 | 42 | <str><code><img><str> |
36 | 43 | <str> |
37 | 44 | </p> |
38 | 45 | */ |
39 | | - currentParagraphTexts.forEach(strNode => { |
40 | | - let paddingLine = 0; |
41 | | - let paddingColumn = 0; |
42 | | - let text = getSource(strNode); |
43 | | - let characters = text.split(""); |
44 | | - characters.forEach(char => { |
45 | | - if (char === "、") { |
46 | | - currentTenCount++; |
47 | | - } |
48 | | - if (char === "。") { |
49 | | - // reset |
50 | | - currentTenCount = 0; |
51 | | - } |
52 | | - // report |
53 | | - if (currentTenCount >= maxLen) { |
54 | | - var ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, { |
55 | | - line: paddingLine, |
56 | | - column: paddingColumn |
57 | | - }); |
58 | | - report(strNode, ruleError); |
59 | | - currentTenCount = 0; |
60 | | - } |
61 | | - // calc padding{line,column} |
62 | | - if (char === "\n") { |
63 | | - paddingLine++; |
64 | | - paddingColumn = 0; |
65 | | - } else { |
66 | | - paddingColumn++; |
67 | | - } |
| 46 | + /* |
| 47 | + # workflow |
| 48 | + 1. split text to sentences |
| 49 | + 2. sentence to tokens |
| 50 | + 3. check tokens |
| 51 | + */ |
| 52 | + return getTokenizer().then(tokenizer => { |
| 53 | + sentences.forEach(sentence => { |
| 54 | + let text = sentence.value; |
| 55 | + let source = new Source(text); |
| 56 | + let currentTenCount = 0; |
| 57 | + let tokens = tokenizer.tokenizeForSentence(text); |
| 58 | + let lastToken = null; |
| 59 | + tokens.forEach((token, index) => { |
| 60 | + let surface = token.surface_form; |
| 61 | + if (surface === "、") { |
| 62 | + // 名詞に過去まわれている場合は例外とする |
| 63 | + let isSandwiched = isSandwichedMeishi({ |
| 64 | + before: tokens[index - 1], |
| 65 | + token: token, |
| 66 | + after: tokens[index + 1] |
| 67 | + }); |
| 68 | + // strictなら例外を例外としない |
| 69 | + if (!isStrict && isSandwiched) { |
| 70 | + return; |
| 71 | + } |
| 72 | + currentTenCount++; |
| 73 | + lastToken = token; |
| 74 | + } |
| 75 | + if (surface === "。") { |
| 76 | + // reset |
| 77 | + currentTenCount = 0; |
| 78 | + } |
| 79 | + // report |
| 80 | + if (currentTenCount >= maxLen) { |
| 81 | + let position = source.indexToPosition(lastToken.word_position - 1); |
| 82 | + let ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, { |
| 83 | + line: position.line - 1, |
| 84 | + column: position.column |
| 85 | + }); |
| 86 | + report(node, ruleError); |
| 87 | + currentTenCount = 0; |
| 88 | + } |
| 89 | + }); |
68 | 90 | }); |
69 | 91 | }); |
70 | 92 | } |
|
0 commit comments