Skip to content

feat: Proof of Concept (PoC) microgenerator #2257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: autogen
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions scripts/microgenerator/bigqueryclient.py.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# TODO: Add a header if needed.

# ======== 🦕 HERE THERE BE DINOSAURS 🦖 =========
# This content is subject to significant change. Not for review yet.
# Included as a proof of concept for context or testing ONLY.
# ================================================

class BigQueryClient:
def __init__(self):
self._clients = {}

{% for method in methods %}
def {{ method.name }}({{ method.args_for_def }}):
"""TODO: extract docstring for use here.
A generated method to call the BigQuery API."""

if "{{ method.class_name }}" not in self._clients:
from google.cloud.bigquery_v2 import {{ method.class_name }}
self._clients["{{ method.class_name }}"] = {{ method.class_name }}()

client = self._clients["{{ method.class_name }}"]
from google.cloud.bigquery_v2 import types
request = types.{{ method.request_class_name }}({{ method.args_for_call }})
return client.{{ method.name }}(request=request)

{% endfor %}

# ======== An example of the code generated ======

def get_dataset(self, request, retry, timeout, metadata):
"""TODO: extract docstring for use here.
A generated method to call the BigQuery API."""

if "DatasetServiceClient" not in self._clients:
from google.cloud.bigquery_v2 import DatasetServiceClient
self._clients["DatasetServiceClient"] = DatasetServiceClient()

client = self._clients["DatasetServiceClient"]
from google.cloud.bigquery_v2 import types
request = types.GetDatasetRequest(request=request, retry=retry, timeout=timeout, metadata=metadata)
return client.get_dataset(request=request)
21 changes: 21 additions & 0 deletions scripts/microgenerator/bigqueryclient_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# TODO: Add a header if needed.

include_class_name_patterns:
- Client

exclude_class_name_patterns: []

include_method_name_patterns:
- batch_delete_
- cancel_
- create_
- delete_
- get_
- insert_
- list_
- patch_
- undelete_
- update_

exclude_method_name_patterns:
- get_mtls_endpoint_and_cert_source
149 changes: 149 additions & 0 deletions scripts/microgenerator/bigqueryclient_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# TODO: Add a header if needed.

import ast
import os
from collections import defaultdict

from config_helper import (
CLASSES_TO_INCLUDE,
# CLASSES_TO_EXCLUDE, # Not currently being used.
METHODS_TO_INCLUDE,
METHODS_TO_EXCLUDE,
)
from template_utils import load_template

# Constants
BASE_DIR = "google/cloud/bigquery_v2/services"
FILES_TO_PARSE = [
os.path.join(root, file)
for root, _, files in os.walk(BASE_DIR)
for file in files
if file.endswith(".py")
]


def create_tree(file_path):
with open(file_path, "r") as source:
tree = ast.parse(source.read())
return tree


def _extract_classes(tree):
"""Extracts class nodes from an AST."""
classes = []

for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name.endswith(
*CLASSES_TO_INCLUDE
): # TODO: currently this is variable includes only one class. Refactor if necessary
classes.append(node)
return classes


def _extract_methods(class_node):
"""Extracts method nodes from a class node."""
return (m for m in class_node.body if isinstance(m, ast.FunctionDef))


def _process_method(method, class_name, parsed_data):
"""Processes a single method and updates parsed_data."""
method_name = method.name
if any(method_name.startswith(prefix) for prefix in METHODS_TO_INCLUDE) and not any(
method_name.startswith(prefix) for prefix in METHODS_TO_EXCLUDE
):
parameters = [arg.arg for arg in method.args.args + method.args.kwonlyargs]
parsed_data[class_name][method_name] = parameters


def parse_files(file_paths):
"""
Parse a list of Python files and extract information about classes,
methods, and parameters.

Args:
file_paths (list): List of file paths to parse.

Returns:
Defaultdict with zero or more entries.
"""

parsed_data = defaultdict(dict)

for file_path in file_paths:
tree = create_tree(file_path)

for class_ in _extract_classes(tree):
class_name = class_.name
parsed_data[class_name]

for method in _extract_methods(class_):
_process_method(method, class_name, parsed_data)

return parsed_data


def _format_args(method_args):
"""Formats method arguments for use in creating a method definition
and a method call.
"""
args_for_def = ", ".join(method_args)
args_for_call = ", ".join([f"{arg}={arg}" for arg in method_args if arg != "self"])
return args_for_def, args_for_call


def _format_class_name(method_name, suffix="Request"):
"""Formats a class name from a method name.

Example:
list_datasets -> ListDatasetsRequest
"""
return "".join(word.capitalize() for word in method_name.split("_")) + suffix


def generate_client_class_source(data):
"""
Generates the BigQueryClient source code using a Jinja2 template.

Args:
data: A dictionary where keys are *ServiceClient class names and
values are dictionaries of methods for that client.

Returns:
A string containing the complete, formatted Python source code
for the BigQueryClient class.
"""

template = load_template("bigqueryclient.py.j2")

# Prepare the context for the template.
# We transform the input data into a flat list of methods
methods_context = []
for class_name, methods in data.items():
for method_name, method_args in methods.items():
args_for_def, args_for_call = _format_args(method_args)
request_class_name = _format_class_name(method_name)
methods_context.append(
{
"name": method_name,
"class_name": class_name,
"args_for_def": args_for_def,
"args_for_call": args_for_call,
"request_class_name": request_class_name,
}
)

# Render the template with the context.
generated_code = template.render(methods=methods_context)

return generated_code


if __name__ == "__main__":
data = parse_files(FILES_TO_PARSE)

final_code = generate_client_class_source(data)

# TODO: write final code to file instead of print to screen.
print(final_code)

# TODO: Ensure blacken gets called on the generated source files as a final step.
39 changes: 39 additions & 0 deletions scripts/microgenerator/config_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# TODO: Add a header if needed.

import yaml
import os


def load_config_yaml(filepath):
"""Loads configuration from a YAML file."""
try:
with open(filepath, "r") as f:
config = yaml.safe_load(f)
return config
except FileNotFoundError:
print(f"Error: Configuration file '{filepath}' not found.")
return None
except yaml.YAMLError as e:
print(f"Error: Could not load YAML from '{filepath}': {e}")
return None


# Determine the absolute path to the config file relative to this file.
# This makes the path robust to where the script is run from.
_CONFIG_FILE_PATH = os.path.join(
os.path.dirname(__file__), "bigqueryclient_config.yaml"
)

config_data = load_config_yaml(_CONFIG_FILE_PATH)

if config_data:
CLASSES_TO_INCLUDE = config_data.get("include_class_name_patterns", [])
CLASSES_TO_EXCLUDE = config_data.get("exclude_class_name_patterns", [])
METHODS_TO_INCLUDE = config_data.get("include_method_name_patterns", [])
METHODS_TO_EXCLUDE = config_data.get("exclude_method_name_patterns", [])
else:
# Define default empty values if the config fails to load
CLASSES_TO_INCLUDE = []
CLASSES_TO_EXCLUDE = []
METHODS_TO_INCLUDE = []
METHODS_TO_EXCLUDE = []
17 changes: 17 additions & 0 deletions scripts/microgenerator/template_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# TODO: Add a header if needed.

import os
import jinja2


def load_template(template_name):
"""
Loads a Jinja2 template from the same directory as the script.
"""
template_dir = os.path.dirname(os.path.abspath(__file__))
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(template_dir),
trim_blocks=True, # prevents blank lines by removing '\n' after block tags (e.g. {% if condition %}\n)
lstrip_blocks=True, # prevents unwanted empty spaces before lines of text by removing non-explicit spaces, tabs, etc
)
return env.get_template(template_name)
Loading