From 4b166d94219aa54bd5e2e96a35c0579ad207aa1a Mon Sep 17 00:00:00 2001
From: Dai Sugimori <daisuke.sugimori@elastic.co>
Date: Wed, 4 Sep 2024 19:55:56 +0900
Subject: [PATCH] [DOCS] Add docs for new Lucene's filters for Japanese text.
 (#112356)

(cherry picked from commit 2982fc61e81fa23ed03f7b51854af9f5352666bb)
---
 docs/plugins/analysis-kuromoji.asciidoc | 120 ++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc
index 1f114e9ad9ed6..b1d1d5a751057 100644
--- a/docs/plugins/analysis-kuromoji.asciidoc
+++ b/docs/plugins/analysis-kuromoji.asciidoc
@@ -624,3 +624,123 @@ Which results in:
   } ]
 }
 --------------------------------------------------
+
+[[analysis-kuromoji-hiragana-uppercase]]
+==== `hiragana_uppercase` token filter
+
+The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters.
+This filter is useful if you want to search against old style Japanese text such as
+patents, legal documents, contract policies, etc.
+
+For example:
+
+[source,console]
+--------------------------------------------------
+PUT kuromoji_sample
+{
+  "settings": {
+    "index": {
+      "analysis": {
+        "analyzer": {
+          "my_analyzer": {
+            "tokenizer": "kuromoji_tokenizer",
+            "filter": [
+              "hiragana_uppercase"
+            ]
+          }
+        }
+      }
+    }
+  }
+}
+
+GET kuromoji_sample/_analyze
+{
+  "analyzer": "my_analyzer",
+  "text": "ちょっとまって"
+}
+--------------------------------------------------
+
+Which results in:
+
+[source,console-result]
+--------------------------------------------------
+{
+  "tokens": [
+    {
+      "token": "ちよつと",
+      "start_offset": 0,
+      "end_offset": 4,
+      "type": "word",
+      "position": 0
+    },
+    {
+      "token": "まつ",
+      "start_offset": 4,
+      "end_offset": 6,
+      "type": "word",
+      "position": 1
+    },
+    {
+      "token": "て",
+      "start_offset": 6,
+      "end_offset": 7,
+      "type": "word",
+      "position": 2
+    }
+  ]
+}
+--------------------------------------------------
+
+[[analysis-kuromoji-katakana-uppercase]]
+==== `katakana_uppercase` token filter
+
+The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters.
+This filter is useful if you want to search against old style Japanese text such as
+patents, legal documents, contract policies, etc.
+
+For example:
+
+[source,console]
+--------------------------------------------------
+PUT kuromoji_sample
+{
+  "settings": {
+    "index": {
+      "analysis": {
+        "analyzer": {
+          "my_analyzer": {
+            "tokenizer": "kuromoji_tokenizer",
+            "filter": [
+              "katakana_uppercase"
+            ]
+          }
+        }
+      }
+    }
+  }
+}
+
+GET kuromoji_sample/_analyze
+{
+  "analyzer": "my_analyzer",
+  "text": "ストップウォッチ"
+}
+--------------------------------------------------
+
+Which results in:
+
+[source,console-result]
+--------------------------------------------------
+{
+  "tokens": [
+    {
+      "token": "ストツプウオツチ",
+      "start_offset": 0,
+      "end_offset": 8,
+      "type": "word",
+      "position": 0
+    }
+  ]
+}
+--------------------------------------------------