@@ -624,3 +624,123 @@ Which results in:
624
624
} ]
625
625
}
626
626
--------------------------------------------------
627
+
628
+ [[analysis-kuromoji-hiragana-uppercase]]
629
+ ==== `hiragana_uppercase` token filter
630
+
631
+ The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters.
632
+ This filter is useful if you want to search against old style Japanese text such as
633
+ patents, legal documents, contract policies, etc.
634
+
635
+ For example:
636
+
637
+ [source,console]
638
+ --------------------------------------------------
639
+ PUT kuromoji_sample
640
+ {
641
+ "settings": {
642
+ "index": {
643
+ "analysis": {
644
+ "analyzer": {
645
+ "my_analyzer": {
646
+ "tokenizer": "kuromoji_tokenizer",
647
+ "filter": [
648
+ "hiragana_uppercase"
649
+ ]
650
+ }
651
+ }
652
+ }
653
+ }
654
+ }
655
+ }
656
+
657
+ GET kuromoji_sample/_analyze
658
+ {
659
+ "analyzer": "my_analyzer",
660
+ "text": "ちょっとまって"
661
+ }
662
+ --------------------------------------------------
663
+
664
+ Which results in:
665
+
666
+ [source,console-result]
667
+ --------------------------------------------------
668
+ {
669
+ "tokens": [
670
+ {
671
+ "token": "ちよつと",
672
+ "start_offset": 0,
673
+ "end_offset": 4,
674
+ "type": "word",
675
+ "position": 0
676
+ },
677
+ {
678
+ "token": "まつ",
679
+ "start_offset": 4,
680
+ "end_offset": 6,
681
+ "type": "word",
682
+ "position": 1
683
+ },
684
+ {
685
+ "token": "て",
686
+ "start_offset": 6,
687
+ "end_offset": 7,
688
+ "type": "word",
689
+ "position": 2
690
+ }
691
+ ]
692
+ }
693
+ --------------------------------------------------
694
+
695
+ [[analysis-kuromoji-katakana-uppercase]]
696
+ ==== `katakana_uppercase` token filter
697
+
698
+ The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters.
699
+ This filter is useful if you want to search against old style Japanese text such as
700
+ patents, legal documents, contract policies, etc.
701
+
702
+ For example:
703
+
704
+ [source,console]
705
+ --------------------------------------------------
706
+ PUT kuromoji_sample
707
+ {
708
+ "settings": {
709
+ "index": {
710
+ "analysis": {
711
+ "analyzer": {
712
+ "my_analyzer": {
713
+ "tokenizer": "kuromoji_tokenizer",
714
+ "filter": [
715
+ "katakana_uppercase"
716
+ ]
717
+ }
718
+ }
719
+ }
720
+ }
721
+ }
722
+ }
723
+
724
+ GET kuromoji_sample/_analyze
725
+ {
726
+ "analyzer": "my_analyzer",
727
+ "text": "ストップウォッチ"
728
+ }
729
+ --------------------------------------------------
730
+
731
+ Which results in:
732
+
733
+ [source,console-result]
734
+ --------------------------------------------------
735
+ {
736
+ "tokens": [
737
+ {
738
+ "token": "ストツプウオツチ",
739
+ "start_offset": 0,
740
+ "end_offset": 8,
741
+ "type": "word",
742
+ "position": 0
743
+ }
744
+ ]
745
+ }
746
+ --------------------------------------------------
0 commit comments