@@ -624,3 +624,123 @@ Which results in:
624624 } ]
625625}
626626--------------------------------------------------
627+
628+ [[analysis-kuromoji-hiragana-uppercase]]
629+ ==== `hiragana_uppercase` token filter
630+
631+ The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters.
632+ This filter is useful if you want to search against old style Japanese text such as
633+ patents, legal documents, contract policies, etc.
634+
635+ For example:
636+
637+ [source,console]
638+ --------------------------------------------------
639+ PUT kuromoji_sample
640+ {
641+ "settings": {
642+ "index": {
643+ "analysis": {
644+ "analyzer": {
645+ "my_analyzer": {
646+ "tokenizer": "kuromoji_tokenizer",
647+ "filter": [
648+ "hiragana_uppercase"
649+ ]
650+ }
651+ }
652+ }
653+ }
654+ }
655+ }
656+
657+ GET kuromoji_sample/_analyze
658+ {
659+ "analyzer": "my_analyzer",
660+ "text": "ちょっとまって"
661+ }
662+ --------------------------------------------------
663+
664+ Which results in:
665+
666+ [source,console-result]
667+ --------------------------------------------------
668+ {
669+ "tokens": [
670+ {
671+ "token": "ちよつと",
672+ "start_offset": 0,
673+ "end_offset": 4,
674+ "type": "word",
675+ "position": 0
676+ },
677+ {
678+ "token": "まつ",
679+ "start_offset": 4,
680+ "end_offset": 6,
681+ "type": "word",
682+ "position": 1
683+ },
684+ {
685+ "token": "て",
686+ "start_offset": 6,
687+ "end_offset": 7,
688+ "type": "word",
689+ "position": 2
690+ }
691+ ]
692+ }
693+ --------------------------------------------------
694+
695+ [[analysis-kuromoji-katakana-uppercase]]
696+ ==== `katakana_uppercase` token filter
697+
698+ The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters.
699+ This filter is useful if you want to search against old style Japanese text such as
700+ patents, legal documents, contract policies, etc.
701+
702+ For example:
703+
704+ [source,console]
705+ --------------------------------------------------
706+ PUT kuromoji_sample
707+ {
708+ "settings": {
709+ "index": {
710+ "analysis": {
711+ "analyzer": {
712+ "my_analyzer": {
713+ "tokenizer": "kuromoji_tokenizer",
714+ "filter": [
715+ "katakana_uppercase"
716+ ]
717+ }
718+ }
719+ }
720+ }
721+ }
722+ }
723+
724+ GET kuromoji_sample/_analyze
725+ {
726+ "analyzer": "my_analyzer",
727+ "text": "ストップウォッチ"
728+ }
729+ --------------------------------------------------
730+
731+ Which results in:
732+
733+ [source,console-result]
734+ --------------------------------------------------
735+ {
736+ "tokens": [
737+ {
738+ "token": "ストツプウオツチ",
739+ "start_offset": 0,
740+ "end_offset": 8,
741+ "type": "word",
742+ "position": 0
743+ }
744+ ]
745+ }
746+ --------------------------------------------------
0 commit comments