-
Notifications
You must be signed in to change notification settings - Fork 324
feat: microgen - adds code generation logic #2294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b9d4a04
5b4d538
132c571
90b224e
e071eab
dc72a98
7318f0b
07910c5
dc54c99
28de5f8
c457754
595e59f
44a0777
3e9ade6
485b9d4
a4276fe
889870b
f113bde
bb6ba4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,9 +26,9 @@ | |
| import os | ||
| import glob | ||
| import logging | ||
| import re | ||
| from collections import defaultdict | ||
| from typing import List, Dict, Any, Iterator | ||
| from pathlib import Path | ||
| from typing import List, Dict, Any | ||
|
|
||
| from . import name_utils | ||
| from . import utils | ||
|
|
@@ -492,3 +492,129 @@ def analyze_source_files( | |
|
|
||
| return parsed_data, all_imports, all_types, request_arg_schema | ||
|
|
||
|
|
||
| # ============================================================================= | ||
| # Section 3: Code Generation | ||
| # ============================================================================= | ||
|
|
||
|
|
||
| def _generate_import_statement( | ||
| context: List[Dict[str, Any]], key: str, package: str | ||
| ) -> str: | ||
| """Generates a formatted import statement from a list of context dictionaries. | ||
|
|
||
| Args: | ||
| context: A list of dictionaries containing the data. | ||
| key: The key to extract from each dictionary in the context. | ||
| package: The base import package (e.g., "google.cloud.bigquery_v2.services"). | ||
|
|
||
| Returns: | ||
| A formatted, multi-line import statement string. | ||
| """ | ||
|
|
||
| names = sorted(list(set([item[key] for item in context]))) | ||
| names_str = ",\n ".join(names) | ||
| return f"from {package} import (\n {names_str}\n)" | ||
|
|
||
|
|
||
| def _get_request_class_name(method_name: str, config: Dict[str, Any]) -> str: | ||
| """Gets the inferred request class name, applying overrides from config.""" | ||
| inferred_request_name = name_utils.method_to_request_class_name(method_name) | ||
| method_overrides = config.get("filter", {}).get("methods", {}).get("overrides", {}) | ||
| if method_name in method_overrides: | ||
| return method_overrides[method_name].get( | ||
| "request_class_name", inferred_request_name | ||
| ) | ||
| return inferred_request_name | ||
|
|
||
|
|
||
| def _find_fq_request_name( | ||
| request_name: str, request_arg_schema: Dict[str, List[str]] | ||
| ) -> str: | ||
| """Finds the fully qualified request name in the schema.""" | ||
| for key in request_arg_schema.keys(): | ||
| if key.endswith(f".{request_name}"): | ||
| return key | ||
| return "" | ||
|
|
||
|
|
||
| def generate_code(config: Dict[str, Any], analysis_results: tuple) -> None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A tuple input is a bit difficult to review to determine if the order of the fields is correct. Have you considered using a frozen data class? Or if positional access is required a named tuple?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the code first started, we were only passing one item, which became two in a tuple and then three and is now four items. I agree, it is time to move it to a more robust solution. Not all the parts that will end up being affected by this move are in this PR, so I would much prefer to merge all the outstanding PRs before doing too many changes to logic, etc. This is all microgenerator code so no customers are gonna see this OR interact with it, just us devs, but there are better approaches that will make our lives easier in the long run. I will defer this to the TODO list hosted internally at b/445158219 for now. |
||
| """ | ||
| Generates source code files using Jinja2 templates. | ||
| """ | ||
|
|
||
| data, all_imports, all_types, request_arg_schema = analysis_results | ||
| project_root = config["project_root"] | ||
| config_dir = config["config_dir"] | ||
|
|
||
| templates_config = config.get("templates", []) | ||
| for item in templates_config: | ||
| template_path = str(Path(config_dir) / item["template"]) | ||
| output_path = str(Path(project_root) / item["output"]) | ||
|
|
||
| template = utils.load_template(template_path) | ||
| methods_context = [] | ||
| for class_name, methods in data.items(): | ||
| for method_name, method_info in methods.items(): | ||
| context = { | ||
| "name": method_name, | ||
| "class_name": class_name, | ||
| "return_type": method_info["return_type"], | ||
| } | ||
|
Comment on lines
+559
to
+563
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thoughts on using a data class for this instead of a dictionary?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will look this over and consider whether it should be modified in a future PR. Right now, for an alpha release to see what works and what doesn't, a very small dict is probably a reasonable conveyance in a microgenerator. Also added this to the TODO list for tracking. |
||
|
|
||
| request_name = _get_request_class_name(method_name, config) | ||
| fq_request_name = _find_fq_request_name( | ||
| request_name, request_arg_schema | ||
| ) | ||
|
|
||
| if fq_request_name: | ||
| context["request_class_full_name"] = fq_request_name | ||
| context["request_id_args"] = request_arg_schema[fq_request_name] | ||
|
|
||
| methods_context.append(context) | ||
|
Comment on lines
557
to
574
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With several nested loops and if statements, I'm having some trouble following along today. Maybe worth adding some private helper methods.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We pulled out two chunks of processing and created two helper functions. which definitely makes the code a bit easier to parse. I think we might be able to a bit more, but gonna hold off until all the things are merged and working before pushing my luck. |
||
|
|
||
| # Prepare imports for the template | ||
| services_context = [] | ||
| client_class_names = sorted( | ||
| list(set([m["class_name"] for m in methods_context])) | ||
| ) | ||
|
|
||
| for class_name in client_class_names: | ||
| service_name_cluster = name_utils.generate_service_names(class_name) | ||
| services_context.append(service_name_cluster) | ||
|
|
||
| # Also need to update methods_context to include the service_name and module_name | ||
| # so the template knows which client to use for each method. | ||
| class_to_service_map = {s["service_client_class"]: s for s in services_context} | ||
| for method in methods_context: | ||
| service_info = class_to_service_map.get(method["class_name"]) | ||
| if service_info: | ||
| method["service_name"] = service_info["service_name"] | ||
| method["service_module_name"] = service_info["service_module_name"] | ||
|
|
||
| # Prepare new imports | ||
| service_imports = [ | ||
| _generate_import_statement( | ||
| services_context, | ||
| "service_module_name", | ||
| "google.cloud.bigquery_v2.services", | ||
| ) | ||
| ] | ||
|
|
||
| # Prepare type imports | ||
| type_imports = [ | ||
| _generate_import_statement( | ||
| services_context, "service_name", "google.cloud.bigquery_v2.types" | ||
| ) | ||
| ] | ||
|
|
||
| final_code = template.render( | ||
| service_name=config.get("service_name"), | ||
| methods=methods_context, | ||
| services=services_context, | ||
| service_imports=service_imports, | ||
| type_imports=type_imports, | ||
| request_arg_schema=request_arg_schema, | ||
| ) | ||
|
|
||
| utils.write_code_to_file(output_path, final_code) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[optional] This looks like it'd be a good fit for a trie data structure. https://en.wikipedia.org/wiki/Trie That said, the current dictionary is probably small enough and this is part of code generation, not the user-visible path, so maybe not worth it.
Alternatively, it may be worth it to create a separate dictionary from request_name to fully-qualified name, since this method will be called more than once. That would take us from O(n^2) to O(n) (or possibly O(n log n) since I think Python dictionaries are actually trees not hashmaps.