Returning full string results (before and after) from the backend to the front end

tomsmoker · tomsmoker · commit 29b14b045c4e · 2024-11-03T16:42:39.000-08:00
diff --git a/backend/src/app/schemas/query_api.py b/backend/src/app/schemas/query_api.py
@@ -6,6 +6,13 @@
 
 from app.models.query_core import Chunk, FormatType, Rule
 
+class ResolvedEntity(BaseModel):
+    """Schema for resolved entity transformations."""
+    original: Union[str, List[str]]
+    resolved: Union[str, List[str]]
+    source: dict[str, str]
+    entityType: str
+
 
 class QueryPromptSchema(BaseModel):
     """Schema for the prompt part of the query request."""
@@ -39,7 +46,7 @@ class QueryResult(BaseModel):
 
     answer: Any
     chunks: List[Chunk]
-    resolved_entities: Optional[dict[str, str]] = None
+    resolved_entities: Optional[List[ResolvedEntity]] = None
 
 
 class QueryResponseSchema(BaseModel):
@@ -51,7 +58,7 @@ class QueryResponseSchema(BaseModel):
     answer: Optional[Any] = None
     chunks: List[Chunk]
     type: str
-    resolved_entities: Optional[dict[str, str]] = None
+    resolved_entities: Optional[List[ResolvedEntity]] = None
 
 
 class QueryAnswer(BaseModel):
@@ -69,7 +76,7 @@ class QueryAnswerResponse(BaseModel):
 
     answer: QueryAnswer
     chunks: List[Chunk]
-    resolved_entities: Optional[dict[str, str]] = None
+    resolved_entities: Optional[List[ResolvedEntity]] = None
 
 
 # Type for search responses (used in service layer)
diff --git a/backend/src/app/services/query_service.py b/backend/src/app/services/query_service.py
@@ -1,6 +1,7 @@
 """Query service."""
 
 import logging
+import re
 from typing import Any, Awaitable, Callable, Dict, List, Union
 
 from app.models.query_core import Chunk, FormatType, QueryType, Rule
@@ -45,61 +46,52 @@ def replace_keywords(
     if not text or not keyword_replacements:
         return text, {}
 
-    transformations: Dict[str, str] = {}
-
     # Handle list of strings
     if isinstance(text, list):
+        original_text = text.copy()
         result = []
-        # Track which strings were modified
+        modified = False
+        
+        # Create a single regex pattern for all keywords
+        pattern = '|'.join(map(re.escape, keyword_replacements.keys()))
+        regex = re.compile(f'\\b({pattern})\\b')
+        
         for item in text:
-            if any(keyword in item.split() for keyword in keyword_replacements):
-                # Only process strings that contain keywords
-                transformed_item, item_transformations = replace_keywords_in_string(item, keyword_replacements)
-                result.append(transformed_item)
-                # Store the full before/after for the list item
-                transformations[item] = transformed_item
-            else:
-                result.append(item)
-        return result, transformations
+            # Single pass replacement for all keywords
+            new_item = regex.sub(lambda m: keyword_replacements[m.group()], item)
+            result.append(new_item)
+            if new_item != item:
+                modified = True
+        
+        # Only return transformation if something actually changed
+        if modified:
+            return result, {
+                "original": original_text,
+                "resolved": result
+            }
+        return result, {}
 
     # Handle single string
     return replace_keywords_in_string(text, keyword_replacements)
 
-
 def replace_keywords_in_string(
     text: str, keyword_replacements: dict[str, str]
 ) -> tuple[str, dict[str, str]]:
     """Keywords for single string."""
     if not text:
         return text, {}
 
-    result = text
-    transformations: Dict[str, str] = {}
-
-    for original, new_word in keyword_replacements.items():
-        if original in text:
-            current_pos = 0
-            while True:
-                start_idx = text.find(original, current_pos)
-                if start_idx == -1:  # No more occurrences
-                    break
-
-                end_idx = start_idx + len(original)
-                current_pos = end_idx
-
-                while end_idx < len(text) and (
-                    text[end_idx].isalnum() or text[end_idx] in "()"
-                ):
-                    end_idx += 1
-
-                full_original = text[start_idx:end_idx]
-                suffix = text[start_idx + len(original) : end_idx]
-                full_new = new_word + suffix
-
-                result = result.replace(full_original, full_new)
-                transformations[full_original] = full_new
-
-    return result, transformations
+    # Create a single regex pattern for all keywords
+    pattern = '|'.join(map(re.escape, keyword_replacements.keys()))
+    regex = re.compile(f'\\b({pattern})\\b')
+    
+    # Single pass replacement
+    result = regex.sub(lambda m: keyword_replacements[m.group()], text)
+    
+    # Only return transformation if something changed
+    if result != text:
+        return result, {"original": text, "resolved": result}
+    return text, {}
 
 
 async def process_query(
@@ -141,31 +133,48 @@ async def process_query(
             else chunks
         )
 
+        # First populate the replacements dictionary
         replacements: Dict[str, str] = {}
-
         if resolve_entity_rules and answer_value:
-            # Combine all replacements from all resolve_entity rules
             for rule in resolve_entity_rules:
                 if rule.options:
                     rule_replacements = dict(
                         option.split(":") for option in rule.options
                     )
                     replacements.update(rule_replacements)
 
+            # Then apply the replacements if we have any
             if replacements:
                 print(f"Resolving entities in answer: {answer_value}")
-                # Handle both string and list cases
-                answer_value, transformations = replace_keywords(
-                    answer_value, replacements
-                )
+                if isinstance(answer_value, list):
+                    # Transform the list but keep track of both original and transformed
+                    transformed_list, _ = replace_keywords(answer_value, replacements)
+                    transformations = {
+                        "original": answer_value,  # Keep as list
+                        "resolved": transformed_list  # Keep as list
+                    }
+                    answer_value = transformed_list
+                else:
+                    # Handle single string case
+                    transformed_value, _ = replace_keywords(answer_value, replacements)
+                    transformations = {
+                        "original": answer_value,
+                        "resolved": transformed_value
+                    }
+                    answer_value = transformed_value
+
 
     return QueryResult(
         answer=answer_value,
         chunks=result_chunks[:10],
-        resolved_entities=transformations if transformations else None,
+        resolved_entities=[{
+            "original": transformations["original"],
+            "resolved": transformations["resolved"],
+            "source": {"type": "column", "id": "some-id"},
+            "entityType": "some-type"
+        }] if transformations else None
     )
 
-
 # Convenience functions for specific query types
 async def decomposition_query(
     query: str,
diff --git a/frontend/src/components/kt/kt-controls/kt-resolved-entities.tsx b/frontend/src/components/kt/kt-controls/kt-resolved-entities.tsx
@@ -29,44 +29,6 @@ export function KtResolvedEntities(props: BoxProps) {
     return entities;
   }, [table.globalRules, table.columns]);
 
-  // Helper to format display value based on type
-  const getDisplayValue = (value: string, entity: ResolvedEntity, isOriginal: boolean) => {
-    try {
-      const parsed = JSON.parse(value);
-      if (Array.isArray(parsed)) {
-        if (isOriginal) {
-          
-          return (
-            <Code block style={{ whiteSpace: 'pre-wrap' }}>
-              {parsed
-                .map(item => {
-                  // Check if the item contains the resolved value
-                  const shouldReplace = item.includes(entity.resolved);
-                  // If it contains the resolved value, replace that part with the original
-                  return shouldReplace ? item.replace(entity.resolved, entity.original) : item;
-                })
-                .join('\n')}
-            </Code>
-          );
-        } else {
-          // For "To": Show fullAnswer as is
-          return (
-            <Code block style={{ whiteSpace: 'pre-wrap' }}>
-              {parsed.join('\n')}
-            </Code>
-          );
-        }
-      }
-    } catch {
-      // If not an array, show the original value for "From" or the value as is for "To"
-      return (
-        <Code block style={{ whiteSpace: 'pre-wrap' }}>
-          {isOriginal ? entity.original : value}
-        </Code>
-      );
-    }
-  };
-
   const handleUndoTransformation = (entity: ResolvedEntity) => {
     const rows = table.rows.map(row => ({
       ...row,
@@ -137,10 +99,18 @@ export function KtResolvedEntities(props: BoxProps) {
                     <Group justify="space-between" align="flex-start" wrap="nowrap">
                       <Stack gap="xs" style={{ flex: 1 }}>
                         <Text size="sm" fw={500}>From:</Text>
-                        {getDisplayValue(entity.fullAnswer, entity, true)}
+                        <Code block style={{ whiteSpace: 'pre-wrap' }}>
+                          {Array.isArray(entity.original) 
+                            ? entity.original.join('\n')  // One item per line
+                            : entity.original}
+                        </Code>
                         <Text size="sm" fw={500}>To:</Text>
-                        {getDisplayValue(entity.fullAnswer, entity, false)}
-                        </Stack>
+                        <Code block style={{ whiteSpace: 'pre-wrap' }}>
+                          {Array.isArray(entity.resolved)
+                            ? entity.resolved.join('\n')  // One item per line
+                            : entity.resolved}
+                        </Code>
+                      </Stack>
                       <Tooltip label="Undo transformation">
                         <ActionIcon
                           variant="subtle"
diff --git a/frontend/src/config/api.ts b/frontend/src/config/api.ts
@@ -61,9 +61,19 @@ export const answerSchema = z.union([
   z.array(z.string())
 ]);
 
+export const resolvedEntitySchema = z.object({
+  original: z.union([z.string(), z.array(z.string())]),
+  resolved: z.union([z.string(), z.array(z.string())]),
+  source: z.object({
+    type: z.string(),
+    id: z.string()
+  }),
+  entityType: z.string()
+});
+
 // Update the resolved entities schema to match backend format
 export const resolvedEntitiesSchema = z.union([
-  z.record(z.string(), z.string()),
+  z.array(resolvedEntitySchema),
   z.null(),
   z.undefined()
 ]);
@@ -118,14 +128,15 @@ export async function runQuery(
   const parsed = queryResponseSchema.parse(response);
   console.log('Parsed Response:', parsed);
   
-  // Update resolved entities transformation to handle null/undefined
-  const resolvedEntities = parsed.resolved_entities && typeof parsed.resolved_entities === 'object'
-    ? Object.entries(parsed.resolved_entities).map(([original, resolved]) => ({
-        original,
-        resolved,
-        fullAnswer: parsed.answer.answer as string
-      }))
-    : null;  // Change to null instead of undefined to match expected type
+  // Update resolved entities transformation to handle the new format
+  const resolvedEntities = parsed.resolved_entities?.map(entity => ({
+    original: entity.original,
+    resolved: entity.resolved,
+    source: entity.source,
+    entityType: entity.entityType,
+    fullAnswer: parsed.answer.answer as string
+  })) ?? null;
+  
   console.log('Transformed Resolved Entities:', resolvedEntities);
 
   return {
diff --git a/frontend/src/config/store/store.ts b/frontend/src/config/store/store.ts
@@ -400,11 +400,20 @@ export const useStore = create<Store>()(
               const currentTable = getTable(activeTableId);
               
               // Helper to check if an entity matches any global rule patterns
-              const isGlobalEntity = (entity: { original: string; resolved: string }) => {
+              const isGlobalEntity = (entity: { 
+                original: string | string[]; 
+                resolved: string | string[]; 
+                source?: { type: string; id: string }; 
+                entityType?: string 
+              }) => {
+                const originalText = Array.isArray(entity.original) 
+                  ? entity.original.join(' ') 
+                  : entity.original;
+                  
                 return globalRules.some(rule => 
                   rule.type === 'resolve_entity' && 
                   rule.options?.some(pattern => 
-                    entity.original.toLowerCase().includes(pattern.toLowerCase())
+                    originalText.toLowerCase().includes(pattern.toLowerCase())
                   )
                 );
               };