feat: enhance marker-based extraction with advanced features

kwonah0 · claude · kwonah0 · commit 8eb19f208b6d · 2025-09-03T16:43:27.000+09:00
- Improve marker extraction with case-insensitive fallback search - Add proper end marker search after start marker position - Enhanced logging with extraction details and preview - Add comprehensive test suites for marker functionality - Support inline markers at any position in text - Handle nested markers correctly (extract first pair) - Update package version to 1.3.2 for consistency Tests verify: - Basic and inline marker extraction - Case-insensitive marker matching - Custom marker support (### START ### / ### END ###) - Real-world LLM output scenarios - Fallback behavior when markers are missing - Multi-line content extraction - Shell command integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "dtui2-react",
-  "version": "1.1.0",
+  "version": "1.3.2",
   "main": "electron/main.js",
   "scripts": {
     "dev": "vite",
diff --git a/src/agents/ElectronShellAIAgent.ts b/src/agents/ElectronShellAIAgent.ts
@@ -112,17 +112,51 @@ export class ElectronShellAIAgent implements AIAgent {
       }
       
       const { startMarker, endMarker } = this.outputConfig.extraction;
-      const startIndex = text.indexOf(startMarker);
-      const endIndex = text.indexOf(endMarker);
       
-      if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
-        const extracted = text.substring(startIndex + startMarker.length, endIndex).trim();
-        console.log('✅ Extracted response between markers:', { startIndex, endIndex, extracted: extracted.slice(0, 100) + '...' });
-        return extracted;
+      // Find start marker (case-insensitive search as fallback)
+      let startIndex = text.indexOf(startMarker);
+      if (startIndex === -1) {
+        // Try case-insensitive search
+        const lowerText = text.toLowerCase();
+        const lowerStartMarker = startMarker.toLowerCase();
+        startIndex = lowerText.indexOf(lowerStartMarker);
+        if (startIndex !== -1) {
+          console.log('🔍 Found start marker with case-insensitive search');
+        }
       }
       
-      console.log('⚠️ Markers not found, using fallback (full output)');
-      return text;
+      if (startIndex === -1) {
+        console.log('⚠️ Start marker not found, using fallback (full output)');
+        return text;
+      }
+      
+      // Find end marker after start marker
+      const searchStart = startIndex + startMarker.length;
+      let endIndex = text.indexOf(endMarker, searchStart);
+      if (endIndex === -1) {
+        // Try case-insensitive search for end marker
+        const lowerText = text.toLowerCase();
+        const lowerEndMarker = endMarker.toLowerCase();
+        endIndex = lowerText.indexOf(lowerEndMarker, searchStart);
+        if (endIndex !== -1) {
+          console.log('🔍 Found end marker with case-insensitive search');
+        }
+      }
+      
+      if (endIndex === -1) {
+        console.log('⚠️ End marker not found, using fallback (full output)');
+        return text;
+      }
+      
+      const extracted = text.substring(startIndex + startMarker.length, endIndex).trim();
+      console.log('✅ Extracted response between markers:', { 
+        startIndex, 
+        endIndex, 
+        extractedLength: extracted.length,
+        preview: extracted.slice(0, 100) + (extracted.length > 100 ? '...' : '')
+      });
+      
+      return extracted;
     };
     
     if (result.success) {
diff --git a/test-marker-advanced.js b/test-marker-advanced.js
@@ -0,0 +1,149 @@
+#!/usr/bin/env node
+
+// Test advanced marker extraction functionality
+const testCases = [
+  {
+    name: "Case insensitive markers",
+    text: "Some text\n<response>\nExtracted content\n</response>\nMore text",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: "Extracted content"
+  },
+  {
+    name: "Mixed case markers", 
+    text: "Text <Response>Mixed case content</RESPONSE> end",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: "Mixed case content"
+  },
+  {
+    name: "Nested similar markers",
+    text: "A<RESPONSE>First<RESPONSE>Nested</RESPONSE>End</RESPONSE>B",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: "First<RESPONSE>Nested"
+  },
+  {
+    name: "Custom markers",
+    text: "Prefix ### START ###\nCustom extracted content\n### END ### Suffix",
+    config: { enabled: true, startMarker: '### START ###', endMarker: '### END ###' },
+    expected: "Custom extracted content"
+  },
+  {
+    name: "Markers in middle of lines",
+    text: "Some long line with <RESPONSE>inline content</RESPONSE> and more text after",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: "inline content"
+  },
+  {
+    name: "Empty extraction",
+    text: "Text <RESPONSE></RESPONSE> more text",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: ""
+  },
+  {
+    name: "Whitespace only extraction",
+    text: "Text <RESPONSE>   \n\n  </RESPONSE> more text",
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: ""
+  },
+  {
+    name: "Real-world LLM output",
+    text: `Here's your analysis:
+
+The code looks good, but I found some issues:
+
+<RESPONSE>
+1. Memory leak in line 42
+2. Missing error handling in function processData()
+3. Deprecated API usage in networking module
+
+Suggested fixes:
+- Add try-catch blocks
+- Update to latest API version
+- Use proper cleanup in destructors
+</RESPONSE>
+
+Let me know if you need more details!`,
+    config: { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' },
+    expected: `1. Memory leak in line 42
+2. Missing error handling in function processData()
+3. Deprecated API usage in networking module
+
+Suggested fixes:
+- Add try-catch blocks
+- Update to latest API version
+- Use proper cleanup in destructors`
+  }
+];
+
+function extractResponse(text, config) {
+  if (!config.enabled) {
+    return text;
+  }
+  
+  const { startMarker, endMarker } = config;
+  
+  // Find start marker (case-insensitive search as fallback)
+  let startIndex = text.indexOf(startMarker);
+  if (startIndex === -1) {
+    // Try case-insensitive search
+    const lowerText = text.toLowerCase();
+    const lowerStartMarker = startMarker.toLowerCase();
+    startIndex = lowerText.indexOf(lowerStartMarker);
+    if (startIndex !== -1) {
+      console.log('🔍 Found start marker with case-insensitive search');
+    }
+  }
+  
+  if (startIndex === -1) {
+    console.log('⚠️ Start marker not found, using fallback (full output)');
+    return text;
+  }
+  
+  // Find end marker after start marker
+  const searchStart = startIndex + startMarker.length;
+  let endIndex = text.indexOf(endMarker, searchStart);
+  if (endIndex === -1) {
+    // Try case-insensitive search for end marker
+    const lowerText = text.toLowerCase();
+    const lowerEndMarker = endMarker.toLowerCase();
+    endIndex = lowerText.indexOf(lowerEndMarker, searchStart);
+    if (endIndex !== -1) {
+      console.log('🔍 Found end marker with case-insensitive search');
+    }
+  }
+  
+  if (endIndex === -1) {
+    console.log('⚠️ End marker not found, using fallback (full output)');
+    return text;
+  }
+  
+  const extracted = text.substring(startIndex + startMarker.length, endIndex).trim();
+  console.log('✅ Extracted:', { 
+    startIndex, 
+    endIndex, 
+    extractedLength: extracted.length,
+    preview: extracted.slice(0, 50) + (extracted.length > 50 ? '...' : '')
+  });
+  
+  return extracted;
+}
+
+console.log('🔬 Testing advanced marker extraction functionality\n');
+
+testCases.forEach((testCase, index) => {
+  console.log(`Test ${index + 1}: ${testCase.name}`);
+  console.log(`Markers: "${testCase.config.startMarker}" ... "${testCase.config.endMarker}"`);
+  
+  const result = extractResponse(testCase.text, testCase.config);
+  const passed = result === testCase.expected;
+  
+  console.log(`Expected (${testCase.expected.length} chars): ${JSON.stringify(testCase.expected.slice(0, 100))}`);
+  console.log(`Got      (${result.length} chars): ${JSON.stringify(result.slice(0, 100))}`);
+  console.log(`${passed ? '✅ PASS' : '❌ FAIL'}\n`);
+  
+  if (!passed) {
+    console.log('📋 Full comparison:');
+    console.log('Expected:', testCase.expected);
+    console.log('Got:', result);
+    console.log('---\n');
+  }
+});
diff --git a/test-marker.js b/test-marker.js
@@ -0,0 +1,73 @@
+#!/usr/bin/env node
+
+// Test marker extraction functionality
+const testCases = [
+  {
+    name: "Basic marker extraction",
+    text: "Some prefix text\n<RESPONSE>\nThis is the response\n</RESPONSE>\nSome suffix text",
+    expected: "This is the response"
+  },
+  {
+    name: "Inline markers",
+    text: "Here is the answer: <RESPONSE>42</RESPONSE> and that's it.",
+    expected: "42"
+  },
+  {
+    name: "Markers with extra whitespace",
+    text: "Before\n  <RESPONSE>  \n  Extracted content  \n  </RESPONSE>  \nAfter",
+    expected: "Extracted content"
+  },
+  {
+    name: "Multiline content",
+    text: "Prefix<RESPONSE>\nLine 1\nLine 2\nLine 3\n</RESPONSE>Suffix",
+    expected: "Line 1\nLine 2\nLine 3"
+  },
+  {
+    name: "No markers",
+    text: "Just plain text without any markers",
+    expected: "Just plain text without any markers"
+  },
+  {
+    name: "Only start marker",
+    text: "Text <RESPONSE> more text but no end",
+    expected: "Text <RESPONSE> more text but no end"
+  },
+  {
+    name: "Multiple marker pairs (should take first)",
+    text: "A<RESPONSE>first</RESPONSE>B<RESPONSE>second</RESPONSE>C",
+    expected: "first"
+  }
+];
+
+function extractResponse(text, config = { enabled: true, startMarker: '<RESPONSE>', endMarker: '</RESPONSE>' }) {
+  if (!config.enabled) {
+    return text;
+  }
+  
+  const { startMarker, endMarker } = config;
+  const startIndex = text.indexOf(startMarker);
+  const endIndex = text.indexOf(endMarker);
+  
+  if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
+    const extracted = text.substring(startIndex + startMarker.length, endIndex).trim();
+    console.log('✅ Extracted:', { startIndex, endIndex, extracted: extracted.slice(0, 50) + (extracted.length > 50 ? '...' : '') });
+    return extracted;
+  }
+  
+  console.log('⚠️ Markers not found, using fallback');
+  return text;
+}
+
+console.log('🧪 Testing marker extraction functionality\n');
+
+testCases.forEach((testCase, index) => {
+  console.log(`Test ${index + 1}: ${testCase.name}`);
+  console.log(`Input: ${JSON.stringify(testCase.text)}`);
+  
+  const result = extractResponse(testCase.text);
+  const passed = result === testCase.expected;
+  
+  console.log(`Expected: ${JSON.stringify(testCase.expected)}`);
+  console.log(`Got:      ${JSON.stringify(result)}`);
+  console.log(`${passed ? '✅ PASS' : '❌ FAIL'}\n`);
+});
diff --git a/test-shell-marker.js b/test-shell-marker.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+
+// Test shell command with markers
+console.log('🧪 Testing shell command with marker extraction\n');
+
+// Simulate shell command that outputs with markers
+const testCommands = [
+  {
+    name: "Echo with markers",
+    command: 'echo "Before text <RESPONSE>This is the extracted answer</RESPONSE> After text"'
+  },
+  {
+    name: "Multi-line with markers", 
+    command: `echo "Analysis results:
+<RESPONSE>
+- Issue 1: Memory leak
+- Issue 2: Missing validation  
+- Issue 3: Performance bottleneck
+</RESPONSE>
+End of analysis."`
+  },
+  {
+    name: "No markers (fallback test)",
+    command: 'echo "This is just plain output without any special markers"'
+  }
+];
+
+testCommands.forEach((test, index) => {
+  console.log(`\n=== Test ${index + 1}: ${test.name} ===`);
+  console.log(`Command: ${test.command}`);
+  console.log('Output:');
+  
+  // Execute the command
+  const { execSync } = require('child_process');
+  try {
+    const output = execSync(test.command, { encoding: 'utf8' });
+    console.log(output);
+    
+    // Apply marker extraction
+    const extractResponse = (text) => {
+      const startMarker = '<RESPONSE>';
+      const endMarker = '</RESPONSE>';
+      
+      let startIndex = text.indexOf(startMarker);
+      if (startIndex === -1) {
+        const lowerText = text.toLowerCase();
+        startIndex = lowerText.indexOf(startMarker.toLowerCase());
+      }
+      
+      if (startIndex === -1) {
+        console.log('⚠️ No start marker found, using full output');
+        return text;
+      }
+      
+      const searchStart = startIndex + startMarker.length;
+      let endIndex = text.indexOf(endMarker, searchStart);
+      if (endIndex === -1) {
+        const lowerText = text.toLowerCase();
+        endIndex = lowerText.indexOf(endMarker.toLowerCase(), searchStart);
+      }
+      
+      if (endIndex === -1) {
+        console.log('⚠️ No end marker found, using full output');
+        return text;
+      }
+      
+      const extracted = text.substring(startIndex + startMarker.length, endIndex).trim();
+      console.log('✅ Extracted content between markers');
+      return extracted;
+    };
+    
+    const extractedOutput = extractResponse(output);
+    console.log('\n📦 Final extracted output:');
+    console.log('---');
+    console.log(extractedOutput);
+    console.log('---');
+    
+  } catch (error) {
+    console.error('❌ Command failed:', error.message);
+  }
+});
+
+console.log('\n🎯 Marker extraction testing complete!');

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "dtui2-react",`
`3`		`- "version": "1.1.0",`
	`3`	`+ "version": "1.3.2",`
`4`	`4`	`"main": "electron/main.js",`
`5`	`5`	`"scripts": {`
`6`	`6`	`"dev": "vite",`