feat(rule): 読点の挿入，"可能"の類似表現に対応 (#11)

shirayu · azu · commit 5185de5b2afd · 2019-01-02T11:22:40.000+09:00
* Added a skippable token

* Added a skippable token

* Designate punctuations

* Added skippable tokens

* Added acceptable tokens

* Early return to reduce variables

* Refactoring
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@
 
 ## 表現の一覧
 
-- "すること[助詞]可能"は冗長な表現です。"すること[助詞]可能"を省き簡潔な表現にすると文章が明瞭になります。
+- "すること[助詞](不)可能"は冗長な表現です。"すること[助詞](不)可能"を省き簡潔な表現にすると文章が明瞭になります。
   - 参考: <http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0>
 - "すること[助詞]できる"は冗長な表現です。"すること[助詞]"を省き簡潔な表現にすると文章が明瞭になります。
   - 参考: <http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0>
diff --git a/package.json b/package.json
@@ -34,8 +34,8 @@
   },
   "dependencies": {
     "kuromojin": "^1.3.2",
-    "morpheme-match": "^1.0.1",
-    "morpheme-match-all": "^1.1.0"
+    "morpheme-match": "^1.2.1",
+    "morpheme-match-all": "^1.2.0"
   },
   "keywords": [
     "textlintrule"
diff --git a/src/dictionary.js b/src/dictionary.js
@@ -1,9 +1,11 @@
 // MIT © 2016 azu
 "use strict";
+const punctuations = ["、", "､", "，", ","];
+
 module.exports = [
     {
         // https://azu.github.io/morpheme-match/?text=省略(することが可能)。
-        message: `"する$2$3可能$1"は冗長な表現です。"する$2$3可能"を省き簡潔な表現にすると文章が明瞭になります。`,
+        message: `"する$2$3$4$5$1"は冗長な表現です。"する$2$3$4$5"を省き簡潔な表現にすると文章が明瞭になります。`,
         url: "http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0",
         tokens: [
             {
@@ -27,16 +29,13 @@ module.exports = [
                 "_capture": "$3",
                 "_readme": "[助詞]",
             }, {
-                "surface_form": "可能",
-                "pos": "名詞",
-                "pos_detail_1": "形容動詞語幹",
-                "pos_detail_2": "*",
-                "pos_detail_3": "*",
-                "conjugated_type": "*",
-                "conjugated_form": "*",
-                "basic_form": "可能",
-                "reading": "カノウ",
-                "pronunciation": "カノー"
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$4",
+            }, {
+                "basic_form": ["可", "可能", "不可能", "不能", "不可"],
+                "_capture": "$5",
+                "_readme": "(不)可能",
             }, {
                 "pos": "助動詞",
                 "_capture": "$1"
@@ -45,7 +44,7 @@ module.exports = [
     },
     {
         // https://azu.github.io/morpheme-match/?text=解析(することができます)。
-        message: `"する$4$3$1$2"は冗長な表現です。"する$4$3"を省き簡潔な表現にすると文章が明瞭になります。`,
+        message: `"する$4$3$5$1$2"は冗長な表現です。"する$4$3$5"を省き簡潔な表現にすると文章が明瞭になります。`,
         url: "http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0",
         expected: "$3$1$2",
         tokens: [
@@ -80,6 +79,11 @@ module.exports = [
                 },
                 "_readme": "[助詞]",
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$5",
+            },
             {
                 "pos": "動詞",
                 "pos_detail_1": "自立",
@@ -95,7 +99,7 @@ module.exports = [
     },
     {
         // https://azu.github.io/morpheme-match/?text=必要(であると言えます)
-        message: `"で$1と$2ます"は冗長な表現です。"である" または "と言えます"を省き簡潔な表現にすると文章が明瞭になります。`,
+        message: `"で$1$6と$5$2ます"は冗長な表現です。"である$6" または "と$5言えます"を省き簡潔な表現にすると文章が明瞭になります。`,
         url: "http://www.sekaihaasobiba.com/entry/2014/10/24/204024",
         tokens: [
             {
@@ -115,17 +119,19 @@ module.exports = [
                 "_capture": "$1",
                 "_readme": "ある",
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$6",
+            },
             {
                 "surface_form": "と",
                 "pos": "助詞",
-                "pos_detail_1": "格助詞",
-                "pos_detail_2": "引用",
-                "pos_detail_3": "*",
-                "conjugated_type": "*",
-                "conjugated_form": "*",
-                "basic_form": "と",
-                "reading": "ト",
-                "pronunciation": "ト"
+            },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$5",
             },
             {
                 "pos": "動詞",
@@ -155,7 +161,7 @@ module.exports = [
     },
     {
         // https://azu.github.io/morpheme-match/?text=必要(であると考えている)
-        message: `"であると考えている"は冗長な表現です。"である" または "と考えている"を省き簡潔な表現にすると文章が明瞭になります。`,
+        message: `"である$7と$5考えて$6いる"は冗長な表現です。"である$7" または "と$5考えて$6いる"を省き簡潔な表現にすると文章が明瞭になります。`,
         url: "http://www.atmarkit.co.jp/ait/articles/1001/19/news106_2.html",
         expected: "である",
         tokens: [
@@ -176,17 +182,19 @@ module.exports = [
                 "_capture": "$1",
                 "_readme": "ある",
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$7",
+            },
             {
                 "surface_form": "と",
                 "pos": "助詞",
-                "pos_detail_1": "格助詞",
-                "pos_detail_2": "引用",
-                "pos_detail_3": "*",
-                "conjugated_type": "*",
-                "conjugated_form": "*",
-                "basic_form": "と",
-                "reading": "ト",
-                "pronunciation": "ト"
+            },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$5",
             },
             {
                 "surface_form": "考え",
@@ -212,6 +220,11 @@ module.exports = [
                 "reading": "テ",
                 "pronunciation": "テ"
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$6",
+            },
             {
                 "pos": "動詞",
                 "pos_detail_1": "非自立",
@@ -223,7 +236,7 @@ module.exports = [
     },
     {
         // https://azu.github.io/morpheme-match/?text=動作の(確認を行わなければ)ならない
-        message: `"$1を行う"は冗長な表現です。"$1する"など簡潔な表現にすると文章が明瞭になります。`,
+        message: `"$1を$5行う"は冗長な表現です。"$1する"など簡潔な表現にすると文章が明瞭になります。`,
         url: "http://www.atmarkit.co.jp/ait/articles/1001/19/news106_2.html",
         tokens: [
             {
@@ -243,6 +256,11 @@ module.exports = [
                 "reading": "ヲ",
                 "pronunciation": "ヲ"
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$5",
+            },
             {
                 "pos": "動詞",
                 "pos_detail_1": "自立",
@@ -254,7 +272,7 @@ module.exports = [
         ]
     },
     {
-        message: `"$1を実行"は冗長な表現です。"$1する"など簡潔な表現にすると文章が明瞭になります。`,
+        message: `"$1を$5実行"は冗長な表現です。"$1する"など簡潔な表現にすると文章が明瞭になります。`,
         url: "http://www.atmarkit.co.jp/ait/articles/1001/19/news106_2.html",
         tokens: [
             {
@@ -274,6 +292,11 @@ module.exports = [
                 "reading": "ヲ",
                 "pronunciation": "ヲ"
             },
+            {
+                "surface_form": punctuations,
+                "_skippable": true,
+                "_capture": "$5",
+            },
             {
                 "surface_form": "実行",
                 "pos": "名詞",
diff --git a/src/index.js b/src/index.js
@@ -4,33 +4,46 @@ const tokenize = require("kuromojin").tokenize;
 const dictionaryList = require("./dictionary");
 const createMatchAll = require("morpheme-match-all");
 
+const replaceAll = (text, from, to) => {
+    return text.split(from).join(to);
+}
+
 const replaceTokenWith = (matcherToken, actualToken, specialTo) => {
-    // _captureがないのは無視
-    if (!matcherToken._capture) {
-        return null;
-    }
     if (matcherToken[specialTo]) {
         return matcherToken[specialTo](actualToken);
     }
     return actualToken.surface_form;
 };
-const createExpected = ({text, matcherTokens, actualTokens}) => {
+const createExpected = ({text, matcherTokens, skipped, actualTokens}) => {
     let resultText = text;
+    let actualTokenIndex = 0;
     matcherTokens.forEach((token, index) => {
-        const to = replaceTokenWith(token, actualTokens[index], "_capture_to_expected");
-        if (to !== null) {
-            resultText = resultText.split(token._capture).join(to);
+        if (skipped[index]) {
+            resultText = replaceAll(resultText, token._capture, "");
+            return;
+        }
+        if (token._capture) {
+            const to = replaceTokenWith(token, actualTokens[actualTokenIndex], "_capture_to_expected");
+            resultText = replaceAll(resultText, token._capture, to);
         }
+        ++actualTokenIndex ;
     });
     return resultText;
 };
-const createMessage = ({text, matcherTokens, actualTokens}) => {
+const createMessage = ({text, matcherTokens, skipped, actualTokens}) => {
     let resultText = text;
+    let actualTokenIndex = 0;
     matcherTokens.forEach((token, index) => {
-        const to = replaceTokenWith(token, actualTokens[index], "_capture_to_message");
-        if (to !== null) {
-            resultText = resultText.split(token._capture).join(to);
+        if (skipped[index]) {
+            resultText = replaceAll(resultText, token._capture, "");
+            return;
+        }
+
+        if (token._capture) {
+            const to = replaceTokenWith(token, actualTokens[actualTokenIndex], "_capture_to_message");
+            resultText = replaceAll(resultText, token._capture, to);
         }
+        ++actualTokenIndex ;
     });
     return resultText;
 };
@@ -55,13 +68,15 @@ const reporter = (context) => {
                     const message = createMessage({
                         text: matchResult.dict.message,
                         matcherTokens: matchResult.dict.tokens,
+                        skipped: matchResult.skipped,
                         actualTokens: matchResult.tokens
                     })
                     + (matchResult.dict.url ? `参考: ${matchResult.dict.url}` : "");
                     const expected = matchResult.dict.expected
                         ? createExpected({
                             text: matchResult.dict.expected,
                             matcherTokens: matchResult.dict.tokens,
+                            skipped: matchResult.skipped,
                             actualTokens: matchResult.tokens
                         })
                         : undefined;
diff --git a/test/index-test.js b/test/index-test.js
@@ -48,6 +48,15 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "これは省略することは，可能だ。",
+            errors: [
+                {
+                    message: `"することは，可能だ"は冗長な表現です。"することは，可能"を省き簡潔な表現にすると文章が明瞭になります。参考: http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0`,
+                    index: 5
+                }
+            ]
+        },
         {
             text: "これは省略することは可能だ。",
             errors: [
@@ -57,6 +66,15 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "これは省略することは不可能だ。",
+            errors: [
+                {
+                    message: `"することは不可能だ"は冗長な表現です。"することは不可能"を省き簡潔な表現にすると文章が明瞭になります。参考: http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0`,
+                    index: 5
+                }
+            ]
+        },
         {
             text: "必要なら解析することができます。",
             output: "必要なら解析できます。",
@@ -67,6 +85,26 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "必要なら解析することが、できます。",
+            output: "必要なら解析できます。",
+            errors: [
+                {
+                    message: `"することが、できます"は冗長な表現です。"することが、"を省き簡潔な表現にすると文章が明瞭になります。参考: http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0`,
+                    index: 6
+                }
+            ]
+        },
+        {
+            text: "必要なら解析することが,できます。",
+            output: "必要なら解析できます。",
+            errors: [
+                {
+                    message: `"することが,できます"は冗長な表現です。"することが,"を省き簡潔な表現にすると文章が明瞭になります。参考: http://qiita.com/takahi-i/items/a93dc2ff42af6b93f6e0`,
+                    index: 6
+                }
+            ]
+        },
         {
             text: "解析することもできますよ。",
             output: "解析もできますよ。",
@@ -117,6 +155,15 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "これは必要であると、言えます。",
+            errors: [
+                {
+                    message: `"であると、言えます"は冗長な表現です。"である" または "と、言えます"を省き簡潔な表現にすると文章が明瞭になります。参考: http://www.sekaihaasobiba.com/entry/2014/10/24/204024`,
+                    index: 5
+                }
+            ]
+        },
         {
             text: "これは必要で有るといえます。",
             errors: [
@@ -126,6 +173,15 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "これは必要である,と、考えて,います。",
+            errors: [
+                {
+                    message: `"である,と、考えて,いる"は冗長な表現です。"である," または "と、考えて,いる"を省き簡潔な表現にすると文章が明瞭になります。参考: http://www.atmarkit.co.jp/ait/articles/1001/19/news106_2.html`,
+                    index: 5
+                }
+            ]
+        },
         {
             text: "実験を行えば分かります。",
             errors: [
@@ -135,6 +191,15 @@ tester.run("textlint-rule-ja-no-redundant-expression", rule, {
                 }
             ]
         },
+        {
+            text: "実験を,行えば分かります。",
+            errors: [
+                {
+                    message: `"実験を,行う"は冗長な表現です。"実験する"など簡潔な表現にすると文章が明瞭になります。参考: http://www.atmarkit.co.jp/ait/articles/1001/19/news106_2.html`,
+                    index: 0
+                }
+            ]
+        },
         {
             text: "検査を実行すれば分かります。",
             errors: [