Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 128 additions & 2 deletions scripts/microgenerator/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
import os
import glob
import logging
import re
from collections import defaultdict
from typing import List, Dict, Any, Iterator
from pathlib import Path
from typing import List, Dict, Any

from . import name_utils
from . import utils
Expand Down Expand Up @@ -492,3 +492,129 @@ def analyze_source_files(

return parsed_data, all_imports, all_types, request_arg_schema


# =============================================================================
# Section 3: Code Generation
# =============================================================================


def _generate_import_statement(
context: List[Dict[str, Any]], key: str, package: str
) -> str:
"""Generates a formatted import statement from a list of context dictionaries.

Args:
context: A list of dictionaries containing the data.
key: The key to extract from each dictionary in the context.
package: The base import package (e.g., "google.cloud.bigquery_v2.services").

Returns:
A formatted, multi-line import statement string.
"""

names = sorted(list(set([item[key] for item in context])))
names_str = ",\n ".join(names)
return f"from {package} import (\n {names_str}\n)"


def _get_request_class_name(method_name: str, config: Dict[str, Any]) -> str:
"""Gets the inferred request class name, applying overrides from config."""
inferred_request_name = name_utils.method_to_request_class_name(method_name)
method_overrides = config.get("filter", {}).get("methods", {}).get("overrides", {})
if method_name in method_overrides:
return method_overrides[method_name].get(
"request_class_name", inferred_request_name
)
return inferred_request_name


def _find_fq_request_name(
request_name: str, request_arg_schema: Dict[str, List[str]]
) -> str:
"""Finds the fully qualified request name in the schema."""
for key in request_arg_schema.keys():
if key.endswith(f".{request_name}"):
Comment on lines +535 to +536
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[optional] This looks like it'd be a good fit for a trie data structure. https://en.wikipedia.org/wiki/Trie That said, the current dictionary is probably small enough and this is part of code generation, not the user-visible path, so maybe not worth it.

Alternatively, it may be worth it to create a separate dictionary from request_name to fully-qualified name, since this method will be called more than once. That would take us from O(n^2) to O(n) (or possibly O(n log n) since I think Python dictionaries are actually trees not hashmaps.

return key
return ""


def generate_code(config: Dict[str, Any], analysis_results: tuple) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A tuple input is a bit difficult to review to determine if the order of the fields is correct. Have you considered using a frozen data class? Or if positional access is required a named tuple?

Copy link
Collaborator Author

@chalmerlowe chalmerlowe Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the code first started, we were only passing one item, which became two in a tuple and then three and is now four items.

I agree, it is time to move it to a more robust solution. Not all the parts that will end up being affected by this move are in this PR, so I would much prefer to merge all the outstanding PRs before doing too many changes to logic, etc.

This is all microgenerator code so no customers are gonna see this OR interact with it, just us devs, but there are better approaches that will make our lives easier in the long run.

I will defer this to the TODO list hosted internally at b/445158219 for now.

"""
Generates source code files using Jinja2 templates.
"""

data, all_imports, all_types, request_arg_schema = analysis_results
project_root = config["project_root"]
config_dir = config["config_dir"]

templates_config = config.get("templates", [])
for item in templates_config:
template_path = str(Path(config_dir) / item["template"])
output_path = str(Path(project_root) / item["output"])

template = utils.load_template(template_path)
methods_context = []
for class_name, methods in data.items():
for method_name, method_info in methods.items():
context = {
"name": method_name,
"class_name": class_name,
"return_type": method_info["return_type"],
}
Comment on lines +559 to +563
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts on using a data class for this instead of a dictionary?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will look this over and consider whether it should be modified in a future PR. Right now, for an alpha release to see what works and what doesn't, a very small dict is probably a reasonable conveyance in a microgenerator. Also added this to the TODO list for tracking.


request_name = _get_request_class_name(method_name, config)
fq_request_name = _find_fq_request_name(
request_name, request_arg_schema
)

if fq_request_name:
context["request_class_full_name"] = fq_request_name
context["request_id_args"] = request_arg_schema[fq_request_name]

methods_context.append(context)
Comment on lines 557 to 574
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With several nested loops and if statements, I'm having some trouble following along today. Maybe worth adding some private helper methods.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We pulled out two chunks of processing and created two helper functions. which definitely makes the code a bit easier to parse.

I think we might be able to a bit more, but gonna hold off until all the things are merged and working before pushing my luck.


# Prepare imports for the template
services_context = []
client_class_names = sorted(
list(set([m["class_name"] for m in methods_context]))
)

for class_name in client_class_names:
service_name_cluster = name_utils.generate_service_names(class_name)
services_context.append(service_name_cluster)

# Also need to update methods_context to include the service_name and module_name
# so the template knows which client to use for each method.
class_to_service_map = {s["service_client_class"]: s for s in services_context}
for method in methods_context:
service_info = class_to_service_map.get(method["class_name"])
if service_info:
method["service_name"] = service_info["service_name"]
method["service_module_name"] = service_info["service_module_name"]

# Prepare new imports
service_imports = [
_generate_import_statement(
services_context,
"service_module_name",
"google.cloud.bigquery_v2.services",
)
]

# Prepare type imports
type_imports = [
_generate_import_statement(
services_context, "service_name", "google.cloud.bigquery_v2.types"
)
]

final_code = template.render(
service_name=config.get("service_name"),
methods=methods_context,
services=services_context,
service_imports=service_imports,
type_imports=type_imports,
request_arg_schema=request_arg_schema,
)

utils.write_code_to_file(output_path, final_code)
Loading