textlint-ja
diff --git a/‎eval/promptfooconfig.yaml‎
Lines changed: 105 additions & 0 deletions b/‎eval/promptfooconfig.yaml‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎eval/prompts/evaluate_error_message.md‎
Lines changed: 49 additions & 0 deletions b/‎eval/prompts/evaluate_error_message.md‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎eval/providers/textlintProvider.mjs‎
Lines changed: 134 additions & 0 deletions b/‎eval/providers/textlintProvider.mjs‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎eval/scripts/generate-dataset.mjs‎
Lines changed: 70 additions & 0 deletions b/‎eval/scripts/generate-dataset.mjs‎
Lines changed: 70 additions & 0 deletions
@@ -0,0 +1,105 @@
+description: 'textlint-rule-no-doubled-joshi エラーメッセージ品質評価（小規模テスト）'
+
+prompts:
+  - '{{text}}'
+
+providers:
+  - id: file://./providers/textlintProvider.mjs
+    label: 'textlint-no-doubled-joshi'
+
+# Rate limit対策
+commandLineOptions:
+  maxConcurrency: 1
+  delay: 5000
+
+# デフォルトの評価設定
+defaultTest:
+  assert:
+    - type: llm-rubric
+      value: |
+        エラーメッセージの明確性を0.0-1.0で評価してください。
+
+        評価基準：
+        - 1.0: 非常に明確で分かりやすい
+        - 0.7-0.9: 明確だが改善の余地がある
+        - 0.4-0.6: やや分かりにくい
+        - 0.0-0.3: 非常に分かりにくい
+
+        ユーザーが何が問題なのかを理解できるか、専門用語が適切に説明されているか、メッセージの構造が分かりやすいかを評価してください。
+      metric: clarity
+      threshold: 0.6
+      provider: ollama:completion:qwen2.5
+
+    - type: llm-rubric
+      value: |
+        エラーメッセージの正確性を0.0-1.0で評価してください。
+
+        評価基準：
+        - 1.0: 技術的に完全に正確
+        - 0.7-0.9: ほぼ正確だが細かい問題がある
+        - 0.4-0.6: やや不正確
+        - 0.0-0.3: 明らかに不正確
+
+        技術的に正しい指摘か、指摘された問題が実際に存在するか、誤検知ではないかを評価してください。
+      metric: accuracy
+      threshold: 0.7
+      provider: ollama:completion:qwen2.5
+
+    - type: llm-rubric
+      value: |
+        エラーメッセージの修正可能性を0.0-1.0で評価してください。
+
+        評価基準：
+        - 1.0: 具体的な修正方法が明示されている
+        - 0.7-0.9: 修正の方向性は示されている
+        - 0.4-0.6: やや抽象的
+        - 0.0-0.3: 修正方法が不明
+
+        具体的な修正方法が示されているか、ユーザーが次のアクションを取れるか、修正例や代替案が提供されているかを評価してください。
+      metric: fixability
+      threshold: 0.6
+      provider: ollama:completion:qwen2.5
+
+    - type: llm-rubric
+      value: |
+        エラーメッセージの文脈適合性を0.0-1.0で評価してください。
+
+        評価基準：
+        - 1.0: 文脈に完全に適合している
+        - 0.7-0.9: 概ね適切
+        - 0.4-0.6: やや不適切
+        - 0.0-0.3: 文脈を無視している
+
+        テキストの文脈を考慮した適切な指摘か、日本語の自然な表現を考慮しているか、過度に厳格すぎないかを評価してください。
+      metric: contextual_fit
+      threshold: 0.6
+      provider: ollama:completion:qwen2.5
+
+    - type: llm-rubric
+      value: |
+        元のテキストとLLMが修正したテキストを比較して、修正の品質を0.0-1.0で評価してください。
+
+        元のテキスト：{{context.originalText}}
+        修正後のテキスト：{{context.fixedText}}
+
+        評価基準：
+        - 1.0: 指摘された問題が完全に解消され、自然で読みやすい日本語になっている
+        - 0.7-0.9: 問題は解消されたが、やや不自然な表現が残っている
+        - 0.4-0.6: 修正が不十分、または元の意味が変わってしまっている
+        - 0.0-0.3: 修正できていない、または文章が破綻している
+
+        指摘された問題が解消されているか、元の意味が保たれているか、自然で読みやすい日本語になっているかを総合的に評価してください。
+      metric: fix_quality
+      threshold: 0.7
+      provider: ollama:completion:qwen2.5
+
+# テストケース
+tests: tests/doubled_joshi_cases_small.json
+
+# 派生メトリクス
+derivedMetrics:
+  - name: 'overall_quality'
+    value: 'clarity * 0.2 + accuracy * 0.3 + fixability * 0.2 + contextual_fit * 0.1 + fix_quality * 0.2'
+
+# 出力設定
+outputPath: './results/evaluation-results.json'
@@ -0,0 +1,49 @@
+以下は、textlintの日本語文章校正ルール「no-doubled-joshi」が出力したエラーメッセージです。
+
+## 対象テキスト
+```
+{{text}}
+```
+
+## エラーメッセージ
+```
+{{output}}
+```
+
+## 評価観点
+
+このエラーメッセージを以下の4つの観点から評価してください：
+
+### 1. 明確性 (Clarity)
+- ユーザーが何が問題なのかを理解できるか
+- 専門用語が適切に説明されているか
+- メッセージの構造が分かりやすいか
+
+### 2. 正確性 (Accuracy)
+- 技術的に正しい指摘か
+- 指摘された問題が実際に存在するか
+- 誤検知ではないか
+
+### 3. 修正可能性 (Fixability)
+- 具体的な修正方法が示されているか
+- ユーザーが次のアクションを取れるか
+- 修正例や代替案が提供されているか
+
+### 4. 文脈適合性 (Contextual Fit)
+- テキストの文脈を考慮した適切な指摘か
+- 日本語の自然な表現を考慮しているか
+- 過度に厳格すぎないか
+
+## 出力形式
+
+各観点について0.0から1.0のスコアで評価し、以下のJSON形式で出力してください：
+
+```json
+{
+  "clarity": 0.8,
+  "accuracy": 0.9,
+  "fixability": 0.7,
+  "contextual_fit": 0.8,
+  "reasoning": "評価の理由を簡潔に説明"
+}
+```
@@ -0,0 +1,134 @@
+import { TextlintKernel } from "@textlint/kernel";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// プロジェクトルート
+const projectRoot = path.resolve(__dirname, "../..");
+
+/**
+ * promptfooカスタムプロバイダー: textlintを実行してエラーメッセージを取得
+ */
+export default class TextlintProvider {
+    constructor(options = {}) {
+        this.id = () => "textlint-no-doubled-joshi";
+    }
+
+    /**
+     * promptfooから呼び出されるメイン関数
+     * @param {string} prompt - 使用しない（評価プロンプトはClaude側で処理）
+     * @param {object} context - promptfooのコンテキスト
+     * @param {object} context.vars - テストケースの変数
+     * @returns {Promise<object>} - textlintの実行結果
+     */
+    async callApi(prompt, context) {
+        const { vars } = context;
+        // promptに展開されたテキスト、またはvars.textを使用
+        const text = prompt || vars.text || "";
+        const options = vars.options || {};
+
+        // テキストが空の場合はエラーを返す
+        if (!text) {
+            return {
+                output: "エラー: テキストが指定されていません (prompt:" + prompt + ", vars:" + JSON.stringify(vars) + ")",
+                error: "No text provided"
+            };
+        }
+
+        try {
+            // ルールをインポート（CommonJS）
+            const { createRequire } = await import("module");
+            const require = createRequire(import.meta.url);
+            const ruleModule = require(path.join(projectRoot, "lib/no-doubled-joshi.js"));
+            const rule = ruleModule.default || ruleModule;
+
+            // kernelを作成
+            const kernel = new TextlintKernel();
+
+            // テキストプラグインをインポート
+            const textPluginModule = require("@textlint/textlint-plugin-text");
+            const textPlugin = textPluginModule.default || textPluginModule;
+
+            // ルールを設定して実行
+            const results = await kernel.lintText(text, {
+                filePath: "test.txt",
+                ext: ".txt",
+                plugins: [
+                    {
+                        pluginId: "text",
+                        plugin: textPlugin
+                    }
+                ],
+                rules: [
+                    {
+                        ruleId: "no-doubled-joshi",
+                        rule: rule,
+                        options: options
+                    }
+                ]
+            });
+
+            // エラーメッセージを抽出
+            const messages = results.messages || [];
+            const errorMessage = messages.length > 0 ? messages[0].message : "エラーなし";
+
+            // エラーがある場合は、LLMで修正文を生成
+            let fixedText = "";
+            if (messages.length > 0) {
+                try {
+                    const fixResponse = await fetch("http://localhost:11434/api/generate", {
+                        method: "POST",
+                        headers: {
+                            "Content-Type": "application/json",
+                        },
+                        body: JSON.stringify({
+                            model: "qwen2.5",
+                            prompt: `以下のテキストに問題があります。エラーメッセージを参考に、自然で読みやすい日本語に修正してください。修正後のテキストのみを出力してください。
+
+元のテキスト：
+${text}
+
+エラーメッセージ：
+${errorMessage}
+
+修正後のテキスト：`,
+                            stream: false,
+                        }),
+                    });
+
+                    if (fixResponse.ok) {
+                        const fixData = await fixResponse.json();
+                        fixedText = fixData.response.trim();
+                    }
+                } catch (err) {
+                    console.error("修正文生成エラー:", err);
+                }
+            }
+
+            // エラーメッセージと修正文を組み合わせて出力
+            const output = fixedText
+                ? `${errorMessage}\n\n---\n\n【修正案】\n${fixedText}`
+                : errorMessage;
+
+            return {
+                output: output,
+                context: {
+                    fixedText: fixedText,
+                    originalText: text
+                },
+                tokenUsage: {
+                    total: 0,
+                    prompt: 0,
+                    completion: 0
+                }
+            };
+        } catch (error) {
+            return {
+                output: `実行エラー: ${error.message}`,
+                error: error.message
+            };
+        }
+    }
+}
@@ -0,0 +1,70 @@
+import { getRuleTest } from "create-textlint-rule-example";
+import fs from "fs/promises";
+import path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// プロジェクトルートを取得
+const projectRoot = path.resolve(__dirname, "../..");
+
+// テストファイルからケースを抽出
+const filePath = path.join(projectRoot, "test/no-doubled-joshi-test.ts");
+const content = await fs.readFile(filePath, "utf-8");
+
+const results = getRuleTest({
+    content: content,
+    filePath: filePath
+});
+
+console.log(`抽出されたテストケース: valid=${results.valid.length}, invalid=${results.invalid.length}`);
+
+// invalidケースのみを対象にする（エラーメッセージの評価）
+const dataset = results.invalid.map((testCase, index) => {
+    // エラーメッセージを取得
+    const errorMessage = testCase.errors[0]?.message || "";
+
+    // 重複している助詞を抽出（メッセージから）
+    const particleMatch = errorMessage.match(/助詞 "([^"]+)" が/);
+    const particle = particleMatch ? particleMatch[1] : "";
+
+    return {
+        text: testCase.text,
+        particle: particle,
+        errorMessage: errorMessage,
+        options: testCase.options || {},
+        caseId: `invalid-${index + 1}`
+    };
+});
+
+// promptfoo用にvars形式に変換
+const datasetWithVars = dataset.map(testCase => ({
+    vars: testCase
+}));
+
+// 小規模テスト用に最初の5ケースを抽出
+const smallDataset = dataset.slice(0, 5);
+const smallDatasetWithVars = smallDataset.map(testCase => ({
+    vars: testCase
+}));
+
+// データセットを保存
+const outputDir = path.join(__dirname, "../tests");
+await fs.writeFile(
+    path.join(outputDir, "doubled_joshi_cases.json"),
+    JSON.stringify(datasetWithVars, null, 2)
+);
+
+await fs.writeFile(
+    path.join(outputDir, "doubled_joshi_cases_small.json"),
+    JSON.stringify(smallDatasetWithVars, null, 2)
+);
+
+console.log(`\nデータセット生成完了:`);
+console.log(`- 全ケース: ${dataset.length}件 → eval/tests/doubled_joshi_cases.json`);
+console.log(`- 小規模テスト: ${smallDataset.length}件 → eval/tests/doubled_joshi_cases_small.json`);
+
+// サンプルを表示
+console.log("\n最初のケース:");
+console.log(JSON.stringify(smallDataset[0], null, 2));