Skip to content

Commit 12a4199

Browse files
committed
Merge pull request #3 from azu/keitaiso
名詞同士で囲まれた、はカウントしない
2 parents 6abeaac + 6fffc91 commit 12a4199

File tree

4 files changed

+91
-54
lines changed

4 files changed

+91
-54
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ Configure `"max"` value of the `.textlintrc` file.
2828
}
2929
```
3030

31+
## Example
32+
33+
> これは、長文の例ですが、読点の数が3つ以上あるので、エラーが報告されます。
34+
35+
=> error 一つの文で"、"を3つ以上使用しています
36+
37+
> ビスケットの主な材料は(1)小麦粉、(2)牛乳、(3)ショートニング、(4)バター、(5)砂糖である。
38+
39+
=> No error: 名詞同士で囲まれている `` はカウントされない
40+
3141
## Tests
3242

3343
npm test

package.json

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@
3636
"espower-babel": "^3.3.0",
3737
"mocha": "^2.3.0",
3838
"power-assert": "^1.0.0",
39-
"textlint": "^3.2.0",
40-
"textlint-tester": "^0.2.0"
39+
"textlint": "^5.0.3",
40+
"textlint-tester": "^0.4.1"
4141
},
4242
"dependencies": {
43-
"object-assign": "^4.0.1",
43+
"kuromojin": "^1.0.2",
44+
"sentence-splitter": "^1.2.0",
45+
"structured-source": "^3.0.2",
4446
"textlint-rule-helper": "^1.1.3"
4547
}
4648
}

src/max-ten.js

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,92 @@
11
// LICENSE : MIT
22
"use strict";
33
import {RuleHelper} from "textlint-rule-helper"
4-
import ObjectAssign from "object-assign"
5-
const defaultOptions = {max: 3};
6-
function countTen(text) {
7-
return text.split("、").length - 1;
4+
import {getTokenizer} from "kuromojin";
5+
import splitSentences from "sentence-splitter";
6+
import Source from "structured-source";
7+
const defaultOptions = {
8+
max: 3, // 1文に利用できる最大の、の数
9+
strict: false // 例外ルールを適応するかどうか
10+
};
11+
12+
function isSandwichedMeishi({
13+
before,
14+
token,
15+
after
16+
}) {
17+
if (before === undefined || after === undefined || token === undefined) {
18+
return false;
19+
}
20+
return before.pos === "名詞" && after.pos === "名詞";
821
}
922
/**
1023
* @param {RuleContext} context
1124
* @param {object} options
1225
*/
1326
export default function (context, options = {}) {
14-
options = ObjectAssign({}, defaultOptions, options);
15-
const maxLen = options.max;
16-
const punctuation = /[]/;
27+
const maxLen = options.max || defaultOptions.max;
28+
const isStrict = options.strict || defaultOptions.strict;
1729
let helper = new RuleHelper(context);
1830
let {Syntax, RuleError, report, getSource} = context;
19-
let currentParagraphTexts = [];
2031
return {
21-
[Syntax.Paragraph](){
22-
currentParagraphTexts = []
23-
},
24-
[Syntax.Str](node){
25-
// ignore text from external factor
26-
if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote])) {
32+
[Syntax.Paragraph](node){
33+
if (helper.isChildNode(node, [Syntax.BlockQuote])) {
2734
return;
2835
}
29-
currentParagraphTexts.push(node);
30-
},
31-
[Syntax.Paragraph + ":exit"](){
32-
let currentTenCount = 0;
36+
let sentences = splitSentences(getSource(node), {
37+
charRegExp: /[\?\!]/,
38+
newLineCharacters: "\n\n"
39+
});
3340
/*
3441
<p>
3542
<str><code><img><str>
3643
<str>
3744
</p>
3845
*/
39-
currentParagraphTexts.forEach(strNode => {
40-
let paddingLine = 0;
41-
let paddingColumn = 0;
42-
let text = getSource(strNode);
43-
let characters = text.split("");
44-
characters.forEach(char => {
45-
if (char === "、") {
46-
currentTenCount++;
47-
}
48-
if (char === "。") {
49-
// reset
50-
currentTenCount = 0;
51-
}
52-
// report
53-
if (currentTenCount >= maxLen) {
54-
var ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, {
55-
line: paddingLine,
56-
column: paddingColumn
57-
});
58-
report(strNode, ruleError);
59-
currentTenCount = 0;
60-
}
61-
// calc padding{line,column}
62-
if (char === "\n") {
63-
paddingLine++;
64-
paddingColumn = 0;
65-
} else {
66-
paddingColumn++;
67-
}
46+
/*
47+
# workflow
48+
1. split text to sentences
49+
2. sentence to tokens
50+
3. check tokens
51+
*/
52+
return getTokenizer().then(tokenizer => {
53+
sentences.forEach(sentence => {
54+
let text = sentence.value;
55+
let source = new Source(text);
56+
let currentTenCount = 0;
57+
let tokens = tokenizer.tokenizeForSentence(text);
58+
let lastToken = null;
59+
tokens.forEach((token, index) => {
60+
let surface = token.surface_form;
61+
if (surface === "、") {
62+
// 名詞に過去まわれている場合は例外とする
63+
let isSandwiched = isSandwichedMeishi({
64+
before: tokens[index - 1],
65+
token: token,
66+
after: tokens[index + 1]
67+
});
68+
// strictなら例外を例外としない
69+
if (!isStrict && isSandwiched) {
70+
return;
71+
}
72+
currentTenCount++;
73+
lastToken = token;
74+
}
75+
if (surface === "。") {
76+
// reset
77+
currentTenCount = 0;
78+
}
79+
// report
80+
if (currentTenCount >= maxLen) {
81+
let position = source.indexToPosition(lastToken.word_position - 1);
82+
let ruleError = new context.RuleError(`一つの文で"、"を${maxLen}つ以上使用しています`, {
83+
line: position.line - 1,
84+
column: position.column
85+
});
86+
report(node, ruleError);
87+
currentTenCount = 0;
88+
}
89+
});
6890
});
6991
});
7092
}

test/max-ten-test.js

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import rule from "../src/max-ten"
22
function textIncludeTen(count) {
3-
return (new Array(count + 1)).join("テスト、") + "です";
3+
return (new Array(count + 1)).join("テスト文章において、") + "です";
44
}
55
var TextLintTester = require("textlint-tester");
66
var tester = new TextLintTester();
77
// ruleName, rule, expected[]
88
tester.run("max-ten", rule, {
99
// default max:3
1010
valid: [
11+
"名詞、名詞、名詞、名詞の場合は例外",
12+
"ビスケットの主な材料は(1)小麦粉、(2)牛乳、(3)ショートニング、(4)バター、(5)砂糖である。",
13+
"これは、TaskA、TaskB、TaskC、TaskDが処理するものです。",
1114
{
1215
text: textIncludeTen(3 - 1)
1316
},
@@ -24,7 +27,7 @@ tester.run("max-ten", rule, {
2427
],
2528
invalid: [
2629
{
27-
text: `a、b、 c
30+
text: `これは、これは、これは
2831
、d`
2932
,
3033
errors: [
@@ -47,15 +50,15 @@ tester.run("max-ten", rule, {
4750
]
4851
},
4952
{
50-
text: `これは、長文、columnがちゃんと計算、されてるはずです。`,
53+
text: `これは、長文の例ですが、columnがちゃんと計算、されてるはずです。`,
5154
options: {
5255
"max": 3
5356
},
5457
errors: [
5558
{
5659
message: `一つの文で"、"を3つ以上使用しています`,
5760
line: 1,
58-
column: 21
61+
column: 26
5962
}
6063
]
6164
},

0 commit comments

Comments
 (0)