|
10 | 10 | """
|
11 | 11 |
|
12 | 12 | import os
|
13 |
| -import glob |
14 | 13 | import shutil
|
15 | 14 | import logging
|
16 |
| -from typing import Dict, Any |
17 | 15 |
|
18 | 16 | from components.example import Example
|
19 | 17 | from components.util import mkdir_p
|
@@ -89,70 +87,80 @@ def process_local_examples(local_examples_dir: str = 'local_examples',
|
89 | 87 | if os.path.exists(examples_json):
|
90 | 88 | examples_data = load_dict(examples_json)
|
91 | 89 |
|
92 |
| - # Process each file in local_examples directory |
93 |
| - for filename in os.listdir(local_examples_dir): |
94 |
| - source_file = os.path.join(local_examples_dir, filename) |
95 |
| - |
96 |
| - if not os.path.isfile(source_file): |
97 |
| - continue |
98 |
| - |
99 |
| - # Get language from file extension |
100 |
| - language = get_language_from_extension(filename) |
101 |
| - if not language: |
102 |
| - logging.warning(f"Unknown file extension for: {filename}") |
103 |
| - continue |
104 |
| - |
105 |
| - # Get example ID from file content |
106 |
| - example_id = get_example_id_from_file(source_file) |
107 |
| - if not example_id: |
108 |
| - logging.warning(f"No EXAMPLE: header found in {filename}") |
109 |
| - continue |
110 |
| - |
111 |
| - logging.info(f"Processing local example: {example_id} ({language})") |
112 |
| - |
113 |
| - # Create target directory |
114 |
| - target_dir = os.path.join(examples_dir, example_id) |
115 |
| - mkdir_p(target_dir) |
116 |
| - |
117 |
| - # Initialize example data |
118 |
| - if example_id not in examples_data: |
119 |
| - examples_data[example_id] = {} |
120 |
| - |
121 |
| - # Copy file to target directory with local_ prefix |
122 |
| - base_name = os.path.splitext(filename)[0] |
123 |
| - ext = os.path.splitext(filename)[1] |
124 |
| - target_filename = f"local_{base_name}{ext}" |
125 |
| - target_file = os.path.join(target_dir, target_filename) |
126 |
| - shutil.copy2(source_file, target_file) |
127 |
| - |
128 |
| - # Process with Example class |
129 |
| - example = Example(language, target_file) |
130 |
| - |
131 |
| - # Get client name |
132 |
| - client_name = get_client_name_from_language(language) |
133 |
| - |
134 |
| - # Create metadata |
135 |
| - example_metadata = { |
136 |
| - 'source': source_file, |
137 |
| - 'language': language, |
138 |
| - 'target': target_file, |
139 |
| - 'highlight': example.highlight, |
140 |
| - 'hidden': example.hidden, |
141 |
| - 'named_steps': example.named_steps, |
142 |
| - 'sourceUrl': None # Local examples don't have source URLs |
143 |
| - } |
144 |
| - |
145 |
| - examples_data[example_id][client_name] = example_metadata |
146 |
| - logging.info(f"Processed {client_name} example for {example_id}") |
| 90 | + # Process each file in local_examples directory and subdirectories |
| 91 | + for root, _, files in os.walk(local_examples_dir): |
| 92 | + for filename in files: |
| 93 | + source_file = os.path.join(root, filename) |
| 94 | + |
| 95 | + # Get language from file extension |
| 96 | + language = get_language_from_extension(filename) |
| 97 | + if not language: |
| 98 | + logging.warning(f"Unknown file extension for: {filename}") |
| 99 | + continue |
| 100 | + |
| 101 | + # Get example ID from file content |
| 102 | + example_id = get_example_id_from_file(source_file) |
| 103 | + if not example_id: |
| 104 | + logging.warning(f"No EXAMPLE: header found in {filename}") |
| 105 | + continue |
| 106 | + |
| 107 | + logging.info(f"Processing local example: {example_id} ({language}) " |
| 108 | + f"from {source_file}") |
| 109 | + |
| 110 | + # Create target directory |
| 111 | + target_dir = os.path.join(examples_dir, example_id) |
| 112 | + mkdir_p(target_dir) |
| 113 | + |
| 114 | + # Initialize example data |
| 115 | + if example_id not in examples_data: |
| 116 | + examples_data[example_id] = {} |
| 117 | + |
| 118 | + # Copy file to target directory with local_ prefix |
| 119 | + # Include subdirectory structure in the filename to avoid conflicts |
| 120 | + relative_path = os.path.relpath(source_file, local_examples_dir) |
| 121 | + relative_dir = os.path.dirname(relative_path) |
| 122 | + base_name = os.path.splitext(filename)[0] |
| 123 | + ext = os.path.splitext(filename)[1] |
| 124 | + |
| 125 | + # Create a unique filename that includes subdirectory info |
| 126 | + if relative_dir and relative_dir != '.': |
| 127 | + # Replace path separators with underscores for flat filename |
| 128 | + subdir_prefix = relative_dir.replace(os.sep, '_') |
| 129 | + target_filename = f"local_{subdir_prefix}_{base_name}{ext}" |
| 130 | + else: |
| 131 | + target_filename = f"local_{base_name}{ext}" |
| 132 | + |
| 133 | + target_file = os.path.join(target_dir, target_filename) |
| 134 | + shutil.copy2(source_file, target_file) |
| 135 | + |
| 136 | + # Process with Example class |
| 137 | + example = Example(language, target_file) |
| 138 | + |
| 139 | + # Get client name |
| 140 | + client_name = get_client_name_from_language(language) |
| 141 | + |
| 142 | + # Create metadata |
| 143 | + example_metadata = { |
| 144 | + 'source': source_file, |
| 145 | + 'language': language, |
| 146 | + 'target': target_file, |
| 147 | + 'highlight': example.highlight, |
| 148 | + 'hidden': example.hidden, |
| 149 | + 'named_steps': example.named_steps, |
| 150 | + 'sourceUrl': None # Local examples don't have source URLs |
| 151 | + } |
| 152 | + |
| 153 | + examples_data[example_id][client_name] = example_metadata |
| 154 | + logging.info(f"Processed {client_name} example for {example_id}") |
147 | 155 |
|
148 | 156 | # Save updated examples data
|
149 | 157 | dump_dict(examples_json, examples_data)
|
150 | 158 | logging.info(f"Updated examples data saved to {examples_json}")
|
151 | 159 |
|
152 | 160 |
|
153 | 161 | if __name__ == '__main__':
|
154 |
| - logging.basicConfig(level=logging.INFO, |
155 |
| - format='%(levelname)s: %(message)s') |
156 |
| - |
| 162 | + logging.basicConfig(level=logging.INFO, |
| 163 | + format='%(levelname)s: %(message)s') |
| 164 | + |
157 | 165 | process_local_examples()
|
158 | 166 | print("Local examples processing complete")
|
0 commit comments