|
| 1 | +import re |
| 2 | +from typing import Any |
| 3 | + |
1 | 4 | from base_test_case import BaseTestCase
|
| 5 | +from geophires_x.GeoPHIRESUtils import sig_figs |
| 6 | +from geophires_x.Parameter import HasQuantity |
2 | 7 | from geophires_x_client import GeophiresInputParameters
|
3 | 8 | from geophires_x_client import GeophiresXClient
|
| 9 | +from geophires_x_client import GeophiresXResult |
4 | 10 |
|
5 | 11 |
|
6 | 12 | class FervoProjectCape4TestCase(BaseTestCase):
|
@@ -41,3 +47,182 @@ def test_fervo_project_cape_4_results_against_reference_values(self):
|
41 | 47 | ]['value']
|
42 | 48 | self.assertGreater(pumping_power_pct, 13)
|
43 | 49 | self.assertLess(pumping_power_pct, 17)
|
| 50 | + |
| 51 | + def test_case_study_documentation(self): |
| 52 | + """ |
| 53 | + Parses result values from case study documentation markdown and checks that they match the actual result. |
| 54 | + Useful for catching when minor updates are made to the case study which need to be manually synced to the |
| 55 | + documentation. |
| 56 | + """ |
| 57 | + |
| 58 | + def _parse_value_unit(raw_string: str) -> dict: |
| 59 | + """ |
| 60 | + A helper function to parse a string and extract a numerical value and its unit. |
| 61 | + It handles various formats like currency, percentages, and scientific notation. |
| 62 | + """ |
| 63 | + # First, strip any parenthetical notes, e.g., "(based on...)" |
| 64 | + clean_str = re.split(r'\(', raw_string)[0].strip() |
| 65 | + |
| 66 | + # Case 1: LCOE format ($X.X/MWh -> cents/kWh) |
| 67 | + match = re.match(r'^\$(\d+\.?\d*)/MWh$', clean_str) |
| 68 | + if match: |
| 69 | + value_ = float(match.group(1)) |
| 70 | + # Convert $/MWh to cents/kWh by dividing by 10 |
| 71 | + return {'value': round(value_ / 10, 2), 'unit': 'cents/kWh'} |
| 72 | + |
| 73 | + # Case 2: Billion dollar format ($X.XB -> MUSD) |
| 74 | + match = re.match(r'^\$(\d+\.?\d*)B$', clean_str) |
| 75 | + if match: |
| 76 | + value_ = float(match.group(1)) |
| 77 | + return {'value': value_ * 1000, 'unit': 'MUSD'} |
| 78 | + |
| 79 | + # Case 3: Million dollar format ($X.XM...) |
| 80 | + match = re.match(r'^\$(\d+\.?\d*)M', clean_str) |
| 81 | + if match: |
| 82 | + value_ = float(match.group(1)) |
| 83 | + return {'value': value_, 'unit': 'MUSD'} |
| 84 | + |
| 85 | + # Case 4: Dollar per kW format ($X/kW -> USD/kW) |
| 86 | + match = re.match(r'^\$(\d+\.?\d*)/kW$', clean_str) |
| 87 | + if match: |
| 88 | + value_ = float(match.group(1)) |
| 89 | + return {'value': value_, 'unit': 'USD/kW'} |
| 90 | + |
| 91 | + # Case 5: Percentage format (X.X%) |
| 92 | + match = re.match(r'^(\d+\.?\d*)%$', clean_str) |
| 93 | + if match: |
| 94 | + value_ = float(match.group(1)) |
| 95 | + return {'value': value_, 'unit': '%'} |
| 96 | + |
| 97 | + # Case 6: Temperature format (X℃ -> degC) |
| 98 | + match = re.match(r'^(\d+\.?\d*)℃$', clean_str) |
| 99 | + if match: |
| 100 | + value_ = float(match.group(1)) |
| 101 | + return {'value': value_, 'unit': 'degC'} |
| 102 | + |
| 103 | + # Case 7: Scientific notation format (X.X×10⁶ Y) # ruff: noqa: RUF003 |
| 104 | + match = re.match(r'^(\d+\.?\d*)\s*[×xX]\s*10[⁶6]\s*(.*)$', clean_str) |
| 105 | + if match: |
| 106 | + base_value = float(match.group(1)) |
| 107 | + unit = match.group(2).strip() |
| 108 | + return {'value': base_value * 1e6, 'unit': unit} |
| 109 | + |
| 110 | + # Case 8: Standard number and unit (e.g., "503 MW") |
| 111 | + match = re.match(r'^(\d+\.?\d*)\s*([a-zA-Z²³\/]+)$', clean_str) |
| 112 | + if match: |
| 113 | + value_ = float(match.group(1)) |
| 114 | + unit = match.group(2) |
| 115 | + return {'value': value_, 'unit': unit} |
| 116 | + |
| 117 | + # Case 9: Dimensionless integer number (e.g., "3") |
| 118 | + match = re.match(r'^(\d+)$', clean_str) |
| 119 | + if match: |
| 120 | + value_ = int(match.group(1)) |
| 121 | + return {'value': value_, 'unit': 'count'} |
| 122 | + |
| 123 | + # Fallback for any unhandled formats |
| 124 | + return {'value': clean_str, 'unit': 'unknown'} |
| 125 | + |
| 126 | + def parse_markdown_results_structured(markdown_text: str) -> dict: |
| 127 | + """ |
| 128 | + Parses result values from markdown into a structured dictionary with values and units. |
| 129 | + """ |
| 130 | + raw_results = {} |
| 131 | + table_pattern = re.compile(r'^\s*\|\s*(?!-)([^|]+?)\s*\|\s*([^|]+?)\s*\|', re.MULTILINE) |
| 132 | + |
| 133 | + try: |
| 134 | + results_start_index = markdown_text.index('## Results') |
| 135 | + search_area = markdown_text[results_start_index:] |
| 136 | + |
| 137 | + matches = table_pattern.findall(search_area) |
| 138 | + |
| 139 | + # Use key_ and value_ to avoid shadowing |
| 140 | + for match in matches: |
| 141 | + key_ = match[0].strip() |
| 142 | + value_ = match[1].strip() |
| 143 | + if key_.lower() not in ('metric', 'parameter'): |
| 144 | + raw_results[key_] = value_ |
| 145 | + except ValueError: |
| 146 | + print("Warning: '## Results' section not found.") |
| 147 | + return {} |
| 148 | + |
| 149 | + # Consistency check |
| 150 | + special_case_pattern = re.compile(r'LCOE\s*=\s*(\S+)\s*and\s*CAPEX\s*=\s*(\S+)') |
| 151 | + special_case_match = special_case_pattern.search(markdown_text) |
| 152 | + if special_case_match: |
| 153 | + lcoe_text = special_case_match.group(1).rstrip('.,;') |
| 154 | + lcoe_table_base = raw_results.get('LCOE', '').split('(')[0].strip() |
| 155 | + if lcoe_text != lcoe_table_base: |
| 156 | + raise ValueError( |
| 157 | + f'LCOE mismatch: Text value ({lcoe_text}) does not match table value ({lcoe_table_base}).' |
| 158 | + ) |
| 159 | + |
| 160 | + # Now, process the raw results into the structured format |
| 161 | + structured_results = {} |
| 162 | + # Use key_ and value_ to avoid shadowing |
| 163 | + for key_, value_ in raw_results.items(): |
| 164 | + if key_ in [ |
| 165 | + 'After-tax IRR', |
| 166 | + 'Average Production Temperature', |
| 167 | + 'LCOE', |
| 168 | + 'Maximum Total Electricity Generation', |
| 169 | + 'Minimum Net Electricity Generation', |
| 170 | + 'Number of times redrilling', |
| 171 | + 'Project capital costs: Total CAPEX', |
| 172 | + 'Project capital costs: $/kW', |
| 173 | + 'WACC', |
| 174 | + 'Well Drilling and Completion Cost', |
| 175 | + ]: |
| 176 | + structured_results[key_] = _parse_value_unit(value_) |
| 177 | + |
| 178 | + return structured_results |
| 179 | + |
| 180 | + results_in_markdown = parse_markdown_results_structured( |
| 181 | + '\n'.join(self._get_test_file_content('../../docs/Fervo_Project_Cape-4.md')) |
| 182 | + ) |
| 183 | + |
| 184 | + self.assertEqual(3.96, results_in_markdown['Well Drilling and Completion Cost']['value']) |
| 185 | + self.assertEqual('MUSD', results_in_markdown['Well Drilling and Completion Cost']['unit']) |
| 186 | + |
| 187 | + class Q(HasQuantity): |
| 188 | + def __init__(self, vu: dict[str, Any]): |
| 189 | + self.value = vu['value'] |
| 190 | + |
| 191 | + # https://stackoverflow.com/questions/2280334/shortest-way-of-creating-an-object-with-arbitrary-attributes-in-python |
| 192 | + self.CurrentUnits = type('', (), {})() |
| 193 | + |
| 194 | + self.CurrentUnits.value = vu['unit'] |
| 195 | + |
| 196 | + capex_q = Q(results_in_markdown['Project capital costs: Total CAPEX']).quantity() |
| 197 | + markdown_capex_USD_per_kW = ( |
| 198 | + capex_q.to('USD').magnitude |
| 199 | + / Q(results_in_markdown['Maximum Total Electricity Generation']).quantity().to('kW').magnitude |
| 200 | + ) |
| 201 | + self.assertAlmostEqual( |
| 202 | + sig_figs(markdown_capex_USD_per_kW, 3), results_in_markdown['Project capital costs: $/kW']['value'] |
| 203 | + ) |
| 204 | + |
| 205 | + field_mapping = { |
| 206 | + 'LCOE': 'Electricity breakeven price', |
| 207 | + 'Project capital costs: Total CAPEX': 'Total CAPEX', |
| 208 | + 'Well Drilling and Completion Cost': 'Drilling and completion costs per well', |
| 209 | + } |
| 210 | + |
| 211 | + ignore_keys = ['Project capital costs: $/kW', 'Total fracture surface area per production well'] |
| 212 | + |
| 213 | + example_result = GeophiresXResult(self._get_test_file_path('../examples/Fervo_Project_Cape-4.out')) |
| 214 | + example_result_values_in_documentation = {} |
| 215 | + for key, _ in results_in_markdown.items(): |
| 216 | + if key not in ignore_keys: |
| 217 | + mapped_key = field_mapping.get(key) if key in field_mapping else key |
| 218 | + entry = example_result._get_result_field(mapped_key) |
| 219 | + if entry is not None and 'value' in entry: |
| 220 | + entry['value'] = sig_figs(entry['value'], 3) |
| 221 | + |
| 222 | + example_result_values_in_documentation[key] = entry |
| 223 | + |
| 224 | + for ignore_key in ignore_keys: |
| 225 | + if ignore_key in results_in_markdown: |
| 226 | + del results_in_markdown[ignore_key] |
| 227 | + |
| 228 | + self.assertDictAlmostEqual(results_in_markdown, example_result_values_in_documentation, places=3) |
0 commit comments