|
| 1 | +from typing import Any, Union |
| 2 | + |
| 3 | +import dsp |
| 4 | +from dsp.primitives.demonstrate import Example |
| 5 | + |
| 6 | +from .base_template import BaseTemplate |
| 7 | + |
| 8 | + |
| 9 | +class ExperimentalAdapter(BaseTemplate): |
| 10 | + def query(self, example: Example, is_demo: bool = False) -> str: |
| 11 | + """Retrieves the input variables from the example and formats them into a query string.""" |
| 12 | + result: list[str] = [] |
| 13 | + |
| 14 | + # If not a demo, find the last field that doesn't have a value set in `example` and set it to "" |
| 15 | + # This creates the "Output:" prefix at the end of the prompt. |
| 16 | + if not is_demo: |
| 17 | + has_value = [ |
| 18 | + field.input_variable in example |
| 19 | + and example[field.input_variable] is not None |
| 20 | + and example[field.input_variable] != "" |
| 21 | + for field in self.fields |
| 22 | + ] |
| 23 | + |
| 24 | + if not any(has_value): |
| 25 | + assert False, "No input variables found in the example" |
| 26 | + |
| 27 | + for i in range(1, len(has_value)): |
| 28 | + if has_value[i - 1] and not any(has_value[i:]): |
| 29 | + example[self.fields[i].input_variable] = "" |
| 30 | + break |
| 31 | + |
| 32 | + for field in self.fields: |
| 33 | + if field.input_variable in example and example[field.input_variable] is not None: |
| 34 | + if field.input_variable in self.format_handlers: |
| 35 | + format_handler = self.format_handlers[field.input_variable] |
| 36 | + else: |
| 37 | + def format_handler(x): |
| 38 | + return str(x).strip() |
| 39 | + |
| 40 | + formatted_value = format_handler(example[field.input_variable]) |
| 41 | + separator = "\n" if field.separator == " " and "\n" in formatted_value else field.separator |
| 42 | + |
| 43 | + result.append(f"{field.name}{separator}{formatted_value}",) |
| 44 | + |
| 45 | + return "\n\n".join([r for r in result if r]) |
| 46 | + |
| 47 | + def guidelines(self, show_guidelines=True) -> str: |
| 48 | + """Returns the task guidelines as described in the lm prompt""" |
| 49 | + if (not show_guidelines) or (hasattr(dsp.settings, "show_guidelines") and not dsp.settings.show_guidelines): |
| 50 | + return "" |
| 51 | + |
| 52 | + result = "Follow the following format.\n\n" |
| 53 | + |
| 54 | + example = dsp.Example() |
| 55 | + for field in self.fields: |
| 56 | + example[field.input_variable] = field.description |
| 57 | + example.augmented = True |
| 58 | + |
| 59 | + result += self.query(example) |
| 60 | + return result |
| 61 | + |
| 62 | + def extract( |
| 63 | + self, |
| 64 | + example: Union[Example, dict[str, Any]], |
| 65 | + raw_pred: str, |
| 66 | + ) -> Example: |
| 67 | + """Extracts the answer from the LM raw prediction using the template structure |
| 68 | +
|
| 69 | + Args: |
| 70 | + example (Union[Example, dict[str, Any]]): Contains the input variables that raw_pred was completed on. |
| 71 | + raw_pred (str): LM generated string |
| 72 | +
|
| 73 | + Returns: |
| 74 | + Example: The example with the output variables filled in |
| 75 | + """ |
| 76 | + example = dsp.Example(example) |
| 77 | + |
| 78 | + raw_pred = raw_pred.strip() |
| 79 | + parts = raw_pred.split('\n') |
| 80 | + adjusted_parts = [] |
| 81 | + for part in parts: |
| 82 | + trimmed_part = part.strip() |
| 83 | + if trimmed_part: |
| 84 | + if adjusted_parts: |
| 85 | + adjusted_parts.append('\n' + trimmed_part) |
| 86 | + else: |
| 87 | + adjusted_parts.append(trimmed_part) |
| 88 | + raw_pred = '\n'.join(adjusted_parts) |
| 89 | + |
| 90 | + idx = 0 |
| 91 | + while idx < len(self.fields): |
| 92 | + if self.fields[idx].input_variable not in example or example[self.fields[idx].input_variable] is None: |
| 93 | + break |
| 94 | + idx += 1 |
| 95 | + |
| 96 | + import dspy |
| 97 | + |
| 98 | + idx = min(idx, len(self.fields) - 1) |
| 99 | + while raw_pred != "" and idx < len(self.fields): |
| 100 | + if idx < len(self.fields) - 1: |
| 101 | + next_field_name = "\n" + self.fields[idx + 1].name |
| 102 | + offset = raw_pred.find(next_field_name) |
| 103 | + |
| 104 | + if offset >= 0: |
| 105 | + if dspy.settings.release >= 20231003: |
| 106 | + example[self.fields[idx].output_variable] = raw_pred[:offset].strip().rstrip("---").strip() |
| 107 | + raw_pred = raw_pred[offset + len(next_field_name) :].strip().rstrip("---").strip() |
| 108 | + else: |
| 109 | + field_name_parts = self.fields[idx].name.split() |
| 110 | + start_pos = 0 |
| 111 | + for part in field_name_parts: |
| 112 | + pos = raw_pred.find(part.strip()) |
| 113 | + if pos != -1: |
| 114 | + start_pos = pos + len(part) |
| 115 | + else: |
| 116 | + break |
| 117 | + |
| 118 | + example[self.fields[idx].output_variable] = raw_pred[start_pos:offset].strip().rstrip("---").strip() |
| 119 | + raw_pred = raw_pred[offset + len(next_field_name) :].strip() |
| 120 | + idx += 1 |
| 121 | + else: |
| 122 | + example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip() |
| 123 | + |
| 124 | + raw_pred = "" |
| 125 | + idx += 1 |
| 126 | + break |
| 127 | + |
| 128 | + else: |
| 129 | + assert idx == len(self.fields) - 1, (idx, len(self.fields)) |
| 130 | + |
| 131 | + if dspy.settings.release >= 20231003: |
| 132 | + example[self.fields[idx].output_variable] = raw_pred.strip().rstrip("---").strip() |
| 133 | + else: |
| 134 | + field_name_parts = self.fields[idx].name.split() |
| 135 | + start_pos = 0 |
| 136 | + for part in field_name_parts: |
| 137 | + pos = raw_pred.find(part.strip()) |
| 138 | + if pos != -1: |
| 139 | + start_pos = pos + len(part) |
| 140 | + else: |
| 141 | + break |
| 142 | + example[self.fields[idx].output_variable] = raw_pred[start_pos:].strip() |
| 143 | + |
| 144 | + break |
| 145 | + |
| 146 | + return example |
| 147 | + |
| 148 | + def __call__(self, example, show_guidelines=True) -> str: |
| 149 | + example = dsp.Example(example) |
| 150 | + output_fields = [] |
| 151 | + for i in range(len(self.fields)): |
| 152 | + if self.fields[i].input_variable not in example: |
| 153 | + output_field = self.fields[i].input_variable |
| 154 | + if output_field not in output_fields: |
| 155 | + output_fields.append(self.fields[i].name.split(':')[0]) |
| 156 | + |
| 157 | + if hasattr(dsp.settings, "query_only") and dsp.settings.query_only: |
| 158 | + return self.query(example) |
| 159 | + |
| 160 | + # The training data should not contain the output variable |
| 161 | + assert self.fields[-1].input_variable not in example, f"Output variable {self.fields[-1].input_variable} should not be supplied for querying the LM." |
| 162 | + # del example[self.fields[-1].input_variable] |
| 163 | + |
| 164 | + rdemos = [ |
| 165 | + self.query(demo, is_demo=True) |
| 166 | + for demo in example.demos |
| 167 | + if ( |
| 168 | + (not demo.get("augmented", False)) |
| 169 | + and ( # validate that the training example has the same primitive input var as the template |
| 170 | + self.fields[-1].input_variable in demo and demo[self.fields[-1].input_variable] is not None |
| 171 | + ) |
| 172 | + ) |
| 173 | + ] |
| 174 | + |
| 175 | + ademos = [self.query(demo, is_demo=True) for demo in example.demos if demo.get("augmented", False)] |
| 176 | + |
| 177 | + # Move the rdemos to ademos if rdemo has all the fields filled in |
| 178 | + rdemos_ = [] |
| 179 | + new_ademos = [] |
| 180 | + for rdemo in rdemos: |
| 181 | + if all((field.name in rdemo) for field in self.fields if field.input_variable in example): |
| 182 | + new_ademos.append(rdemo) |
| 183 | + else: |
| 184 | + rdemos_.append(rdemo) |
| 185 | + |
| 186 | + ademos = new_ademos + ademos |
| 187 | + rdemos = rdemos_ |
| 188 | + |
| 189 | + example["augmented"] = True |
| 190 | + |
| 191 | + query = self.query(example) |
| 192 | + parts = [self.instructions, *rdemos, self.guidelines(show_guidelines), *ademos, query,] |
| 193 | + |
| 194 | + prompt = "\n\n---\n\n".join([p.strip() for p in parts if p]) |
| 195 | + prompt_ = prompt[: prompt.rfind("\n")].strip() |
| 196 | + |
| 197 | + s_or_not = "s" if len(output_fields) > 1 else "" |
| 198 | + only_or_not = "only " if len(output_fields) == 1 else "" |
| 199 | + |
| 200 | + prompt_ += f"\n\nPlease provide the output field{s_or_not} {', '.join(output_fields[:-1]) + (', then ' if len(output_fields) > 2 else ' then ') + output_fields[-1] if len(output_fields) > 1 else output_fields[0]}. Do so immediately, without additional content before or after, and precisely as the format above shows. Begin with {only_or_not}the field {output_fields[0]}." |
| 201 | + return prompt_.strip() |
| 202 | + |
0 commit comments