|
1 | 1 | import os |
| 2 | +import ast |
| 3 | +import re |
2 | 4 | from pathlib import Path |
3 | 5 |
|
4 | 6 | # This script is used to create RST files for all Python examples |
|
7 | 9 |
|
8 | 10 | path_examples = Path("../rips/PythonExamples") |
9 | 11 |
|
| 12 | +def extract_searchable_content(python_file_path): |
| 13 | + """Extract searchable content from a Python file including comments, docstrings, and function names.""" |
| 14 | + try: |
| 15 | + with open(python_file_path, 'r', encoding='utf-8') as f: |
| 16 | + content = f.read() |
| 17 | + |
| 18 | + searchable_parts = [] |
| 19 | + |
| 20 | + # Extract top-level comments (especially the header comments) |
| 21 | + lines = content.split('\n') |
| 22 | + header_comments = [] |
| 23 | + for line in lines[:20]: # Check first 20 lines for header comments |
| 24 | + line = line.strip() |
| 25 | + if line.startswith('#'): |
| 26 | + comment = line.lstrip('#').strip() |
| 27 | + if comment and not comment.startswith('!'): # Skip shebang |
| 28 | + header_comments.append(comment) |
| 29 | + |
| 30 | + if header_comments: |
| 31 | + searchable_parts.append(' '.join(header_comments)) |
| 32 | + |
| 33 | + # Parse the AST to extract docstrings and function/class names |
| 34 | + try: |
| 35 | + tree = ast.parse(content) |
| 36 | + |
| 37 | + # Extract module docstring |
| 38 | + if (ast.get_docstring(tree)): |
| 39 | + searchable_parts.append(ast.get_docstring(tree)) |
| 40 | + |
| 41 | + # Extract function and class names and their docstrings |
| 42 | + for node in ast.walk(tree): |
| 43 | + if isinstance(node, (ast.FunctionDef, ast.ClassDef)): |
| 44 | + searchable_parts.append(node.name.replace('_', ' ')) |
| 45 | + if ast.get_docstring(node): |
| 46 | + searchable_parts.append(ast.get_docstring(node)) |
| 47 | + except SyntaxError: |
| 48 | + # If we can't parse the AST, just extract what we can with regex |
| 49 | + pass |
| 50 | + |
| 51 | + # Extract import statements to understand what the script does |
| 52 | + import_matches = re.findall(r'import\s+(\w+)', content) |
| 53 | + from_matches = re.findall(r'from\s+(\w+)', content) |
| 54 | + all_imports = import_matches + from_matches |
| 55 | + if 'rips' in all_imports: |
| 56 | + searchable_parts.append('ResInsight Python API scripting') |
| 57 | + |
| 58 | + # Extract string literals that might contain meaningful descriptions |
| 59 | + string_matches = re.findall(r'["\']([^"\']{10,})["\']', content) |
| 60 | + for match in string_matches[:5]: # Limit to first 5 long strings |
| 61 | + if not any(char in match for char in ['/', '\\', '.', '@']): # Skip paths, emails, etc. |
| 62 | + searchable_parts.append(match) |
| 63 | + |
| 64 | + return ' '.join(searchable_parts) if searchable_parts else '' |
| 65 | + |
| 66 | + except Exception as e: |
| 67 | + print(f"Warning: Could not extract content from {python_file_path}: {e}") |
| 68 | + return '' |
| 69 | + |
10 | 70 | def create_rst_for_folder(folder_path, relative_to_examples): |
11 | 71 | """Create an RST file for a specific folder containing Python examples.""" |
12 | 72 |
|
@@ -37,6 +97,11 @@ def create_rst_for_folder(folder_path, relative_to_examples): |
37 | 97 | txt += example_heading + "\n" |
38 | 98 | txt += "-" * len(example_heading) + "\n\n" |
39 | 99 |
|
| 100 | + # Extract and add searchable content |
| 101 | + searchable_content = extract_searchable_content(folder_path / py_file) |
| 102 | + if searchable_content: |
| 103 | + txt += f"**Description:** {searchable_content[:500]}...\n\n" if len(searchable_content) > 500 else f"**Description:** {searchable_content}\n\n" |
| 104 | + |
40 | 105 | # Build path relative to the source directory |
41 | 106 | relative_path = path_examples / relative_to_examples / py_file.name |
42 | 107 | txt += f".. literalinclude:: {relative_path}\n" |
@@ -73,6 +138,11 @@ def create_general_examples_page(): |
73 | 138 | txt += f".. _general_{reference}:\n\n" |
74 | 139 | txt += example_heading + "\n" |
75 | 140 | txt += "-" * len(example_heading) + "\n\n" |
| 141 | + |
| 142 | + # Extract and add searchable content |
| 143 | + searchable_content = extract_searchable_content(path_examples / py_file) |
| 144 | + if searchable_content: |
| 145 | + txt += f"**Description:** {searchable_content[:500]}...\n\n" if len(searchable_content) > 500 else f"**Description:** {searchable_content}\n\n" |
76 | 146 |
|
77 | 147 | relative_path = path_examples / py_file.name |
78 | 148 | txt += f".. literalinclude:: {relative_path}\n" |
|
0 commit comments