33import { RuleHelper } from "textlint-rule-helper" ;
44import { getTokenizer } from "kuromojin" ;
55import splitSentences , { Syntax as SentenceSyntax } from "sentence-splitter" ;
6+ import StringSource from "textlint-util-to-string" ;
67/**
78 * create a object that
89 * map ={
@@ -34,31 +35,49 @@ function matchExceptionRule(tokens) {
3435 }
3536 return false ;
3637}
38+ /*
39+ default options
40+ */
3741const defaultOptions = {
3842 min_interval : 1 ,
3943 strict : false
4044} ;
45+
46+
47+ /*
48+ 1. Paragraph Node -> text
49+ 2. text -> sentences
50+ 3. tokenize sentence
51+ 4. report error if found word that match the rule.
52+
53+ TODO: need abstraction
54+ */
4155export default function ( context , options = { } ) {
4256 const helper = new RuleHelper ( context ) ;
4357 // 最低間隔値
44- let minInterval = options . min_interval || defaultOptions . min_interval ;
45- let isStrict = options . strict || defaultOptions . strict ;
46- let { Syntax, report, getSource, RuleError} = context ;
58+ const minInterval = options . min_interval || defaultOptions . min_interval ;
59+ const isStrict = options . strict || defaultOptions . strict ;
60+ const { Syntax, report, getSource, RuleError} = context ;
4761 return {
48- [ Syntax . Str ] ( node ) {
62+ [ Syntax . Paragraph ] ( node ) {
4963 if ( helper . isChildNode ( node , [ Syntax . Link , Syntax . Image , Syntax . BlockQuote , Syntax . Emphasis ] ) ) {
5064 return ;
5165 }
52- let text = getSource ( node ) ;
53- let sentences = splitSentences ( text ) . filter ( node => {
66+ const source = new StringSource ( node ) ;
67+ const text = source . toString ( ) ;
68+ const isSentenceNode = node => {
5469 return node . type === SentenceSyntax . Sentence ;
55- } ) ;
70+ } ;
71+ let sentences = splitSentences ( text , {
72+ charRegExp : / [ 。 \? \! ? ! ] /
73+ } ) . filter ( isSentenceNode ) ;
5674 return getTokenizer ( ) . then ( tokenizer => {
5775 const checkSentence = ( sentence ) => {
5876 let tokens = tokenizer . tokenizeForSentence ( sentence . raw ) ;
59- let joshiTokens = tokens . filter ( token => {
77+ const isJoshiToken = token => {
6078 return token . pos === "助詞" ;
61- } ) ;
79+ } ;
80+ let joshiTokens = tokens . filter ( isJoshiToken ) ;
6281 let joshiTokenSurfaceKeyMap = createSurfaceKeyMap ( joshiTokens ) ;
6382 /*
6483 # Data Structure
@@ -73,26 +92,33 @@ export default function (context, options = {}) {
7392 let tokens = joshiTokenSurfaceKeyMap [ key ] ;
7493 // strict mode ではない時例外を除去する
7594 if ( ! isStrict ) {
76- if ( matchExceptionRule ( tokens ) ) {
95+ if ( matchExceptionRule ( tokens ) ) {
7796 return ;
7897 }
7998 }
8099 if ( tokens . length <= 1 ) {
81100 return ; // no duplicated token
82101 }
83102 // if found differenceIndex less than
103+ // tokes are sorted ascending order
84104 tokens . reduce ( ( prev , current ) => {
85105 let startPosition = joshiTokens . indexOf ( prev ) ;
86106 let otherPosition = joshiTokens . indexOf ( current ) ;
87107 // if difference
88108 let differenceIndex = otherPosition - startPosition ;
89109 if ( differenceIndex <= minInterval ) {
90- report ( node , new RuleError ( `一文に二回以上利用されている助詞 "${ key } " がみつかりました。` , {
91- line : sentence . loc . start . line - 1 ,
110+ let originalPosition = source . originalPositionFor ( {
111+ line : sentence . loc . start . line ,
112+ column : sentence . loc . start . column + ( current . word_position - 1 )
113+ } ) ;
114+ // padding position
115+ var padding = {
116+ line : originalPosition . line - 1 ,
92117 // matchLastToken.word_position start with 1
93118 // this is padding column start with 0 (== -1)
94- column : sentence . loc . start . column + ( current . word_position - 1 )
95- } ) ) ;
119+ column : originalPosition . column
120+ } ;
121+ report ( node , new RuleError ( `一文に二回以上利用されている助詞 "${ key } " がみつかりました。` , padding ) ) ;
96122 }
97123 return current ;
98124 } ) ;
0 commit comments