Updating to add conditional rules

tomsmoker · tomsmoker · commit d37b05f7f6c4 · 2024-11-03T19:55:12.000-08:00
diff --git a/backend/src/app/models/query_core.py b/backend/src/app/models/query_core.py
@@ -31,7 +31,13 @@ class TransformationDict(BaseModel):
 class Rule(BaseModel):
     """Rule model."""
 
-    type: Literal["must_return", "may_return", "max_length", "resolve_entity"]
+    type: Literal[
+        "must_return",
+        "may_return",
+        "max_length",
+        "resolve_entity",
+        "resolve_conditional",
+    ]
     options: Optional[List[str]] = None
     length: Optional[int] = None
 
diff --git a/backend/src/app/services/query_service.py b/backend/src/app/services/query_service.py
@@ -44,16 +44,13 @@ def extract_chunks(search_response: SearchResponse) -> List[Chunk]:
 
 
 def replace_keywords(
-    text: Union[str, List[str]], keyword_replacements: Dict[str, str]
-) -> tuple[
-    Union[str, List[str]], Dict[str, Union[str, List[str]]]
-]:  # Changed return type
+    text: Union[str, List[str]],
+    keyword_replacements: Dict[str, str],
+    conditional_replacements: List[tuple[List[str], str]] = [],
+) -> tuple[Union[str, List[str]], Dict[str, Union[str, List[str]]]]:
     """Replace keywords in text and return both the modified text and transformation details."""
-    if not text or not keyword_replacements:
-        return text, {
-            "original": text,
-            "resolved": text,
-        }  # Return dict instead of TransformationDict
+    if not text or (not keyword_replacements and not conditional_replacements):
+        return text, {"original": text, "resolved": text}
 
     # Handle list of strings
     if isinstance(text, list):
@@ -62,13 +59,12 @@ def replace_keywords(
         modified = False
 
         # Create a single regex pattern for all keywords
-        pattern = "|".join(map(re.escape, keyword_replacements.keys()))
-        regex = re.compile(f"\\b({pattern})\\b")
+        # pattern = "|".join(map(re.escape, keyword_replacements.keys()))
+        # regex = re.compile(f"\\b({pattern})\\b")
 
         for item in text:
-            # Single pass replacement for all keywords
-            new_item = regex.sub(
-                lambda m: keyword_replacements[m.group()], item
+            new_item, _ = replace_keywords_in_string(
+                item, keyword_replacements, conditional_replacements
             )
             result.append(new_item)
             if new_item != item:
@@ -79,24 +75,46 @@ def replace_keywords(
         return result, {"original": original_text, "resolved": result}
 
     # Handle single string
-    return replace_keywords_in_string(text, keyword_replacements)
+    return replace_keywords_in_string(
+        text, keyword_replacements, conditional_replacements
+    )
+
+
+def parse_conditional_replacement(option: str) -> tuple[List[str], str]:
+    """Parse a conditional replacement rule like 'word a + word b : word c'."""
+    conditions, replacement = option.split(":")
+    required_words = [word.strip() for word in conditions.split("+")]
+    return required_words, replacement.strip()
 
 
 def replace_keywords_in_string(
-    text: str, keyword_replacements: Dict[str, str]
-) -> tuple[str, Dict[str, Union[str, List[str]]]]:  # Changed return type
+    text: str,
+    keyword_replacements: Dict[str, str],
+    conditional_replacements: List[tuple[List[str], str]] = [],
+) -> tuple[str, Dict[str, Union[str, List[str]]]]:
     """Keywords for single string."""
-    if not text:
+    if not text or (not keyword_replacements and not conditional_replacements):
         return text, {"original": text, "resolved": text}
 
-    # Create a single regex pattern for all keywords
-    pattern = "|".join(map(re.escape, keyword_replacements.keys()))
-    regex = re.compile(f"\\b({pattern})\\b")
+    result = text
+
+    # First check conditional replacements
+    for required_words, replacement in conditional_replacements:
+        # Check if all required words are present
+        if all(word.lower() in text.lower() for word in required_words):
+            # Create a pattern that matches any of the required words
+            pattern = "|".join(map(re.escape, required_words))
+            # Replace all occurrences of the required words with the replacement
+            result = re.sub(
+                f"\\b({pattern})\\b", replacement, result, flags=re.IGNORECASE
+            )
 
-    # Single pass replacement
-    result = regex.sub(lambda m: keyword_replacements[m.group()], text)
+    # Then do normal replacements
+    if keyword_replacements:
+        pattern = "|".join(map(re.escape, keyword_replacements.keys()))
+        regex = re.compile(f"\\b({pattern})\\b")
+        result = regex.sub(lambda m: keyword_replacements[m.group()], result)
 
-    # Only return transformation if something changed
     if result != text:
         return result, {"original": text, "resolved": result}
     return text, {"original": text, "resolved": text}
@@ -131,11 +149,13 @@ async def process_query(
     result_chunks = []
 
     if format in ["str", "str_array"]:
-
-        # Extract and apply keyword replacements from all resolve_entity rules
+        # Extract rules by type
         resolve_entity_rules = [
             rule for rule in rules if rule.type == "resolve_entity"
         ]
+        conditional_rules = [
+            rule for rule in rules if rule.type == "resolve_conditional"
+        ]
 
         result_chunks = (
             []
@@ -144,28 +164,43 @@ async def process_query(
             else chunks
         )
 
-        # First populate the replacements dictionary
-        replacements: Dict[str, str] = {}
-        if resolve_entity_rules and answer_value:
-            for rule in resolve_entity_rules:
-                if rule.options:
-                    rule_replacements = dict(
-                        option.split(":") for option in rule.options
-                    )
-                    replacements.update(rule_replacements)
-
-            # Then apply the replacements if we have any
-            if replacements:
+        # Process both types of replacements if we have an answer
+        if answer_value and (resolve_entity_rules or conditional_rules):
+            # Build regular replacements dictionary
+            replacements: Dict[str, str] = {}
+            if resolve_entity_rules:
+                for rule in resolve_entity_rules:
+                    if rule.options:
+                        rule_replacements = dict(
+                            option.split(":") for option in rule.options
+                        )
+                        replacements.update(rule_replacements)
+
+            # Build conditional replacements list
+            conditional_replacements: List[tuple[List[str], str]] = []
+            if conditional_rules:
+                for rule in conditional_rules:
+                    if rule.options:
+                        for option in rule.options:
+                            required_words, replacement = (
+                                parse_conditional_replacement(option)
+                            )
+                            conditional_replacements.append(
+                                (required_words, replacement)
+                            )
+
+            # Apply replacements if we have any
+            if replacements or conditional_replacements:
                 print(f"Resolving entities in answer: {answer_value}")
                 if isinstance(answer_value, list):
                     transformed_list, transform_dict = replace_keywords(
-                        answer_value, replacements
+                        answer_value, replacements, conditional_replacements
                     )
                     transformations = transform_dict
                     answer_value = transformed_list
                 else:
                     transformed_value, transform_dict = replace_keywords(
-                        answer_value, replacements
+                        answer_value, replacements, conditional_replacements
                     )
                     transformations = transform_dict
                     answer_value = transformed_value
@@ -256,31 +291,47 @@ async def inference_query(
     llm_service: CompletionService,
 ) -> QueryResult:
     """Generate a response, no need for vector retrieval."""
-    # Since we are just answering this query based on data provided in the query,
-    # ther is no need to retrieve any chunks from the vector database.
-
     answer = await generate_inferred_response(
         llm_service, query, rules, format
     )
     answer_value = answer["answer"]
 
-    # Extract and apply keyword replacements from all resolve_entity rules
+    # Extract rules by type
     resolve_entity_rules = [
         rule for rule in rules if rule.type == "resolve_entity"
     ]
+    conditional_rules = [
+        rule for rule in rules if rule.type == "resolve_conditional"
+    ]
 
-    if resolve_entity_rules and answer_value:
-        # Combine all replacements from all resolve_entity rules
+    if answer_value and (resolve_entity_rules or conditional_rules):
+        # Build regular replacements
         replacements = {}
-        for rule in resolve_entity_rules:
-            if rule.options:
-                rule_replacements = dict(
-                    option.split(":") for option in rule.options
-                )
-                replacements.update(rule_replacements)
+        if resolve_entity_rules:
+            for rule in resolve_entity_rules:
+                if rule.options:
+                    rule_replacements = dict(
+                        option.split(":") for option in rule.options
+                    )
+                    replacements.update(rule_replacements)
 
-        if replacements:
+        # Build conditional replacements
+        conditional_replacements = []
+        if conditional_rules:
+            for rule in conditional_rules:
+                if rule.options:
+                    for option in rule.options:
+                        required_words, replacement = (
+                            parse_conditional_replacement(option)
+                        )
+                        conditional_replacements.append(
+                            (required_words, replacement)
+                        )
+
+        if replacements or conditional_replacements:
             print(f"Resolving entities in answer: {answer_value}")
-            answer_value = replace_keywords(answer_value, replacements)
+            answer_value, _ = replace_keywords(
+                answer_value, replacements, conditional_replacements
+            )
 
     return QueryResult(answer=answer_value, chunks=[])
diff --git a/frontend/src/components/kt/kt-controls/kt-global-rules.tsx b/frontend/src/components/kt/kt-controls/kt-global-rules.tsx
@@ -127,6 +127,8 @@ export function KTGlobalRules(props: BoxProps) {
                 max_length,3,
                 <br />
                 resolve_entity,"blue:ultramarine,red:crimson",Color
+                <br />
+                resolve_conditional,"word a + word b:word c",Words
               </Code>
             </Box>
           </Group>
@@ -247,7 +249,7 @@ export function KTGlobalRules(props: BoxProps) {
 
 const csvJsonSchema = z.array(
   z.object({
-    rule_type: z.enum(["must_return", "may_return", "max_length", "resolve_entity"]),
+    rule_type: z.enum(["must_return", "may_return", "max_length", "resolve_entity", "resolve_conditional"]),
     value: z.string(),
     entity_type: z.string().optional()
   })
diff --git a/frontend/src/components/kt/kt-table/kt-cells/kt-column-settings/kt-column-settings.tsx b/frontend/src/components/kt/kt-table/kt-cells/kt-column-settings/kt-column-settings.tsx
@@ -157,6 +157,31 @@ const rulesMenu = (
                   }}
                 />
               </Group>
+            ) : rule.type === "resolve_conditional" ? (
+              <Group gap="xs" wrap="nowrap">
+                <TextInput
+                  w={150}
+                  placeholder="word a + word b"
+                  value={rule.options?.[0]?.split(":")[0] ?? ""}
+                  onChange={e => {
+                    const after = rule.options?.[0]?.split(":")[1] ?? "";
+                    handleRuleChange(rule, {
+                      options: [`${e.target.value}:${after}`]
+                    });
+                  }}
+                />
+                <TextInput
+                  w={100}
+                  placeholder="word c"
+                  value={rule.options?.[0]?.split(":")[1] ?? ""}
+                  onChange={e => {
+                    const before = rule.options?.[0]?.split(":")[0] ?? "";
+                    handleRuleChange(rule, {
+                      options: [`${before}:${e.target.value}`]
+                    });
+                  }}
+                />
+              </Group>
             ) : (
               <TagsInput
                 w={210}
diff --git a/frontend/src/config/store/store.ts b/frontend/src/config/store/store.ts
@@ -420,12 +420,27 @@ export const useStore = create<Store>()(
                   ? entity.original.join(' ') 
                   : entity.original;
                   
-                return globalRules.some(rule => 
-                  rule.type === 'resolve_entity' && 
-                  rule.options?.some(pattern => 
-                    originalText.toLowerCase().includes(pattern.toLowerCase())
-                  )
-                );
+                return globalRules.some(rule => {
+                  // Handle regular resolve_entity rules
+                  if (rule.type === 'resolve_entity') {
+                    return rule.options?.some(pattern => 
+                      originalText.toLowerCase().includes(pattern.split(':')[0].toLowerCase())
+                    );
+                  }
+                  
+                  // Handle conditional resolve rules
+                  if (rule.type === 'resolve_conditional') {
+                    return rule.options?.some(pattern => {
+                      const [conditions] = pattern.split(':');
+                      const requiredWords = conditions.split('+').map(word => word.trim());
+                      return requiredWords.every(word => 
+                        originalText.toLowerCase().includes(word.toLowerCase())
+                      );
+                    });
+                  }
+                  
+                  return false;
+                });
               };
               
               editTable(activeTableId, {
@@ -451,7 +466,7 @@ export const useStore = create<Store>()(
                 })),
                 globalRules: currentTable.globalRules.map(rule => ({
                   ...rule,
-                  resolvedEntities: rule.type === 'resolve_entity'
+                  resolvedEntities: (rule.type === 'resolve_entity' || rule.type === 'resolve_conditional')
                     ? [
                         ...(rule.resolvedEntities || []),
                         ...(resolvedEntities || [])
diff --git a/frontend/src/config/store/store.types.ts b/frontend/src/config/store/store.types.ts
@@ -111,7 +111,7 @@ export interface AnswerTableGlobalRule extends AnswerTableRule {
 }
 
 export interface AnswerTableRule {
-  type: "must_return" | "may_return" | "max_length" | "resolve_entity";
+  type: "must_return" | "may_return" | "max_length" | "resolve_entity" | "resolve_conditional";
   options?: string[];
   length?: number;
 }
diff --git a/frontend/src/config/store/store.utils.ts b/frontend/src/config/store/store.utils.ts
@@ -80,7 +80,8 @@ export const defaultRules: Record<AnswerTableRule["type"], AnswerTableRule> = {
   must_return: { type: "must_return", options: [] },
   may_return: { type: "may_return", options: [] },
   max_length: { type: "max_length", length: 1 },
-  resolve_entity: { type: "resolve_entity", options: [] }
+  resolve_entity: { type: "resolve_entity", options: [] },
+  resolve_conditional: { type: "resolve_conditional", options: [] }
 };
 
 export const ruleOptions: {
@@ -90,15 +91,18 @@ export const ruleOptions: {
   { value: "must_return", label: "Must return" },
   { value: "may_return", label: "May return" },
   { value: "max_length", label: "Allowed # of responses" },
-  { value: "resolve_entity", label: "Resolve entity" }
+  { value: "resolve_entity", label: "Resolve entity" },
+  { value: "resolve_conditional", label: "Resolve conditional" }
 ];
 
 export const ruleInfo: Record<AnswerTableRule["type"], string> = {
   must_return: "The column must return the specified values",
   may_return: "The column may return the specified values",
   max_length: "The column must return at most N values",
   resolve_entity:
-    "Replace all specified values with the first one from the list (i.e. 'turquioise:blue')"
+    "Replace all specified values with the first one from the list (i.e. 'turquioise:blue')",
+  resolve_conditional:
+    "Replace all specified values with the first one from the list (i.e. 'word a + word b:word c')"
 };
 
 // Casting

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ export interface AnswerTableGlobalRule extends AnswerTableRule {`
`111`	`111`	`}`
`112`	`112`
`113`	`113`	`export interface AnswerTableRule {`
`114`		`- type: "must_return" \| "may_return" \| "max_length" \| "resolve_entity";`
	`114`	`+ type: "must_return" \| "may_return" \| "max_length" \| "resolve_entity" \| "resolve_conditional";`
`115`	`115`	`options?: string[];`
`116`	`116`	`length?: number;`
`117`	`117`	`}`