diff --git a/google/cloud/bigquery_v2/services/centralized_service/_helpers.py b/google/cloud/bigquery_v2/services/centralized_service/_helpers.py index 53b585b18..9901bc764 100644 --- a/google/cloud/bigquery_v2/services/centralized_service/_helpers.py +++ b/google/cloud/bigquery_v2/services/centralized_service/_helpers.py @@ -14,11 +14,87 @@ # limitations under the License. # +from typing import Any, Callable, Optional, Type, TypeVar + + +T = TypeVar("T") + def _drop_self_key(kwargs): - "Drops 'self' key from a given kwargs dict." + """Drops 'self' key from a given kwargs dict.""" if not isinstance(kwargs, dict): raise TypeError("kwargs must be a dict.") kwargs.pop("self", None) # Essentially a no-op if 'self' key does not exist return kwargs + + +def _make_request( + request_class: Type[T], + user_request: Optional[T], + identifier_value: Optional[Any], + identifier_name: str, + parser: Callable[..., Any], + identifier_required: bool = True, +) -> T: + """A factory for creating *Request objects. + + This function simplifies the creation of request objects by extracting identifier + values from strings (e.g., 'project_id.dataset_id') and using them to instantiate + the appropriate request object. It allows users to continue using identifier + strings with BigQueryClient methods, even though the underlying *ServiceClient + methods do not directly support this convenience. + + For example, this helper is used in methods like: + - BigQueryClient.get_dataset() + - BigQueryClient.delete_dataset() + + Args: + request_class: The class of the request object to create (e.g., + GetDatasetRequest, ListModelsRequest). + user_request: A user-provided *Request object. If not None, this + object is returned directly. + identifier_value: The value to be parsed to create the request object + (e.g., a dataset_id for GetDatasetRequest). + identifier_name: The name of the identifier field (e.g., 'dataset_id', + 'job_id'). + parser: A callable that takes the identifier_value and returns a dict + of fields for the request object. For example, a parser could + separate a 'project_id.dataset_id' string into its components. + identifier_required: Whether the identifier is required. Defaults to True. + + Returns: + A *Request object. + + Raises: + ValueError: If both user_request and identifier_value are provided, or + if identifier_required is True and both are None. + + Example: + request = _make_request( + request_class=dataset.GetDatasetRequest, + user_request=request, + identifier_value=dataset_id, + identifier_name="dataset_id", + parser=self._parse_dataset_id_to_dict, + identifier_required=True, + ) + """ + if user_request is not None and identifier_value is not None: + raise ValueError( + f"Provide either a request object or '{identifier_name}', not both." + ) + + if user_request is not None: + return user_request + + if identifier_required and identifier_value is None: + raise ValueError( + f"Either a request object or '{identifier_name}' must be provided." + ) + + if identifier_value is None: + request_fields = parser() + else: + request_fields = parser(identifier_value) + return request_class(**request_fields) diff --git a/google/cloud/bigquery_v2/services/centralized_service/client.py b/google/cloud/bigquery_v2/services/centralized_service/client.py index 3b53fb53b..78a7cef6b 100644 --- a/google/cloud/bigquery_v2/services/centralized_service/client.py +++ b/google/cloud/bigquery_v2/services/centralized_service/client.py @@ -16,6 +16,7 @@ import os from typing import ( + Dict, Optional, Sequence, Tuple, @@ -34,6 +35,7 @@ # Import types modules (to access *Requests classes) from google.cloud.bigquery_v2.types import ( dataset, + dataset_reference, job, model, ) @@ -43,37 +45,96 @@ from google.api_core import retry as retries from google.auth import credentials as auth_credentials -# Create a type alias +# Create type aliases try: OptionalRetry = Union[retries.Retry, gapic_v1.method._MethodDefault, None] except AttributeError: # pragma: NO COVER OptionalRetry = Union[retries.Retry, object, None] # type: ignore -# TODO: This line is here to simplify prototyping, etc. -PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT") +DatasetIdentifier = Union[str, dataset_reference.DatasetReference] +# TODO: This variable is here to simplify prototyping, etc. +PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT") DEFAULT_RETRY: OptionalRetry = gapic_v1.method.DEFAULT DEFAULT_TIMEOUT: Union[float, object] = gapic_v1.method.DEFAULT DEFAULT_METADATA: Sequence[Tuple[str, Union[str, bytes]]] = () - # Create Centralized Client class BigQueryClient: + """A centralized client for BigQuery API.""" + def __init__( self, *, credentials: Optional[auth_credentials.Credentials] = None, client_options: Optional[Union[client_options_lib.ClientOptions, dict]] = None, ): - self._clients = {} + """ + Initializes the BigQueryClient. + + Args: + credentials: + The credentials to use for authentication. If not provided, the + client will attempt to use the default credentials. + client_options: + A dictionary of client options to pass to the underlying + service clients. + """ + + self._clients: Dict[str, object] = {} self._credentials = credentials self._client_options = client_options + self.project = PROJECT_ID + + # --- HELPER METHODS --- + def _parse_dataset_path(self, dataset_path: str) -> Tuple[Optional[str], str]: + """ + Helper to parse project_id and/or dataset_id from a string identifier. + + Args: + dataset_path: A string in the format 'project_id.dataset_id' or + 'dataset_id'. + + Returns: + A tuple of (project_id, dataset_id). + """ + if "." in dataset_path: + # Use rsplit to handle legacy paths like `google.com:my-project.my_dataset`. + project_id, dataset_id = dataset_path.rsplit(".", 1) + return project_id, dataset_id + return self.project, dataset_path + + def _parse_dataset_id_to_dict(self, dataset_id: DatasetIdentifier) -> dict: + """ + Helper to create a dictionary from a project_id and dataset_id to pass + internally between helper functions. + Args: + dataset_id: A string or DatasetReference. + + Returns: + A dict of {"project_id": project_id, "dataset_id": dataset_id_str }. + """ + if isinstance(dataset_id, str): + project_id, dataset_id_str = self._parse_dataset_path(dataset_id) + return {"project_id": project_id, "dataset_id": dataset_id_str} + elif isinstance(dataset_id, dataset_reference.DatasetReference): + return { + "project_id": dataset_id.project_id, + "dataset_id": dataset_id.dataset_id, + } + else: + raise TypeError(f"Invalid type for dataset_id: {type(dataset_id)}") + + def _parse_project_id_to_dict(self, project_id: Optional[str] = None) -> dict: + """Helper to create a request dictionary from a project_id.""" + final_project_id = project_id or self.project + return {"project_id": final_project_id} + + # --- *SERVICECLIENT ATTRIBUTES --- @property def dataset_service_client(self): if "dataset" not in self._clients: - from google.cloud.bigquery_v2.services import dataset_service - self._clients["dataset"] = dataset_service.DatasetServiceClient( credentials=self._credentials, client_options=self._client_options ) @@ -81,28 +142,15 @@ def dataset_service_client(self): @dataset_service_client.setter def dataset_service_client(self, value): - # Check for the methods the centralized client exposes (to allow duck-typing) - required_methods = [ - "get_dataset", - "insert_dataset", - "patch_dataset", - "update_dataset", - "delete_dataset", - "list_datasets", - "undelete_dataset", - ] - for method in required_methods: - if not hasattr(value, method) or not callable(getattr(value, method)): - raise AttributeError( - f"Object assigned to dataset_service_client is missing a callable '{method}' method." - ) + if not isinstance(value, dataset_service.DatasetServiceClient): + raise TypeError( + "Expected an instance of dataset_service.DatasetServiceClient." + ) self._clients["dataset"] = value @property def job_service_client(self): if "job" not in self._clients: - from google.cloud.bigquery_v2.services import job_service - self._clients["job"] = job_service.JobServiceClient( credentials=self._credentials, client_options=self._client_options ) @@ -110,25 +158,13 @@ def job_service_client(self): @job_service_client.setter def job_service_client(self, value): - required_methods = [ - "get_job", - "insert_job", - "cancel_job", - "delete_job", - "list_jobs", - ] - for method in required_methods: - if not hasattr(value, method) or not callable(getattr(value, method)): - raise AttributeError( - f"Object assigned to job_service_client is missing a callable '{method}' method." - ) + if not isinstance(value, job_service.JobServiceClient): + raise TypeError("Expected an instance of job_service.JobServiceClient.") self._clients["job"] = value @property def model_service_client(self): if "model" not in self._clients: - from google.cloud.bigquery_v2.services import model_service - self._clients["model"] = model_service.ModelServiceClient( credentials=self._credentials, client_options=self._client_options ) @@ -136,37 +172,44 @@ def model_service_client(self): @model_service_client.setter def model_service_client(self, value): - required_methods = [ - "get_model", - "delete_model", - "patch_model", - "list_models", - ] - for method in required_methods: - if not hasattr(value, method) or not callable(getattr(value, method)): - raise AttributeError( - f"Object assigned to model_service_client is missing a callable '{method}' method." - ) + if not isinstance(value, model_service.ModelServiceClient): + raise TypeError("Expected an instance of model_service.ModelServiceClient.") self._clients["model"] = value + # --- *SERVICECLIENT METHODS --- def get_dataset( self, - request: Optional[Union[dataset.GetDatasetRequest, dict]] = None, + dataset_id: Optional[DatasetIdentifier] = None, *, + request: Optional["dataset.GetDatasetRequest"] = None, retry: OptionalRetry = DEFAULT_RETRY, timeout: Union[float, object] = DEFAULT_TIMEOUT, metadata: Sequence[Tuple[str, Union[str, bytes]]] = DEFAULT_METADATA, - ): + ) -> "dataset.Dataset": """ TODO: Docstring is purposefully blank. microgenerator will add automatically. """ - kwargs = _helpers._drop_self_key(locals()) - return self.dataset_service_client.get_dataset(**kwargs) + final_request = _helpers._make_request( + request_class=dataset.GetDatasetRequest, + user_request=request, + identifier_value=dataset_id, + identifier_name="dataset_id", + parser=self._parse_dataset_id_to_dict, + identifier_required=True, + ) + + return self.dataset_service_client.get_dataset( + request=final_request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) def list_datasets( self, - request: Optional[Union[dataset.ListDatasetsRequest, dict]] = None, + project_id: Optional[str] = None, *, + request: Optional["dataset.ListDatasetsRequest"] = None, retry: OptionalRetry = DEFAULT_RETRY, timeout: Union[float, object] = DEFAULT_TIMEOUT, metadata: Sequence[Tuple[str, Union[str, bytes]]] = DEFAULT_METADATA, @@ -174,8 +217,28 @@ def list_datasets( """ TODO: Docstring is purposefully blank. microgenerator will add automatically. """ - kwargs = _helpers._drop_self_key(locals()) - return self.dataset_service_client.list_datasets(**kwargs) + final_request = _helpers._make_request( + request_class=dataset.ListDatasetsRequest, + user_request=request, + identifier_value=project_id, + identifier_name="project_id", + parser=self._parse_project_id_to_dict, + identifier_required=False, + ) + + return self.dataset_service_client.list_datasets( + request=final_request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # ============================================================================ + # TODO: HERE THERE BE DRAGONS. Once the above changes have been approved the + # methods below this comment will be updated to look and function similar + # to the above. + # NOT YET READY FOR REVIEW. + # ============================================================================ def list_jobs( self, diff --git a/tests/unit/gapic/bigquery_v2/test_centralized_service.py b/tests/unit/gapic/bigquery_v2/test_centralized_service.py index 9160dc7b1..93640fc47 100644 --- a/tests/unit/gapic/bigquery_v2/test_centralized_service.py +++ b/tests/unit/gapic/bigquery_v2/test_centralized_service.py @@ -88,60 +88,36 @@ def assert_client_called_once_with( # --- FIXTURES --- @pytest.fixture -def mock_dataset_service_client(): - """Mocks the DatasetServiceClient.""" +def mock_dataset_service_client_class(): with mock.patch( "google.cloud.bigquery_v2.services.dataset_service.DatasetServiceClient", autospec=True, - ) as mock_client: - yield mock_client + ) as mock_class: + yield mock_class @pytest.fixture -def mock_job_service_client(): - """Mocks the JobServiceClient.""" +def mock_job_service_client_class(): with mock.patch( "google.cloud.bigquery_v2.services.job_service.JobServiceClient", autospec=True, - ) as mock_client: - yield mock_client + ) as mock_class: + yield mock_class @pytest.fixture -def mock_model_service_client(): - """Mocks the ModelServiceClient.""" +def mock_model_service_client_class(): with mock.patch( "google.cloud.bigquery_v2.services.model_service.ModelServiceClient", autospec=True, - ) as mock_client: - yield mock_client - - -# TODO: figure out a solution for this... is there an easier way to feed in clients? -# TODO: is there an easier way to make mock_x_service_clients? -@pytest.fixture -def bq_client( - mock_dataset_service_client, mock_job_service_client, mock_model_service_client -): - """Provides a BigQueryClient with mocked underlying services.""" - client = centralized_service.BigQueryClient() - client.dataset_service_client = mock_dataset_service_client - client.job_service_client = mock_job_service_client - client.model_service_client = mock_model_service_client - ... - return client - + ) as mock_class: + yield mock_class # --- TEST CLASSES --- from google.api_core import client_options as client_options_lib - -# from google.api_core.client_options import ClientOptions from google.auth import credentials as auth_credentials -# from google.auth.credentials import Credentials - - class TestCentralizedClientInitialization: @pytest.mark.parametrize( "credentials, client_options", @@ -162,9 +138,9 @@ def test_client_initialization_arguments( self, credentials, client_options, - mock_dataset_service_client, - mock_job_service_client, - mock_model_service_client, + mock_dataset_service_client_class, + mock_job_service_client_class, + mock_model_service_client_class, ): # Act client = centralized_service.BigQueryClient( @@ -172,66 +148,104 @@ def test_client_initialization_arguments( ) # Assert - # The BigQueryClient should have been initialized. Accessing the - # service client properties should instantiate them with the correct arguments. - - # Access the property to trigger instantiation _ = client.dataset_service_client - mock_dataset_service_client.assert_called_once_with( + mock_dataset_service_client_class.assert_called_once_with( credentials=credentials, client_options=client_options ) _ = client.job_service_client - mock_job_service_client.assert_called_once_with( + mock_job_service_client_class.assert_called_once_with( credentials=credentials, client_options=client_options ) _ = client.model_service_client - mock_model_service_client.assert_called_once_with( + mock_model_service_client_class.assert_called_once_with( credentials=credentials, client_options=client_options ) class TestCentralizedClientDatasetService: - def test_get_dataset(self, bq_client, mock_dataset_service_client): + def test_get_dataset(self, mock_dataset_service_client_class): # Arrange + mock_instance = mock_dataset_service_client_class.return_value expected_dataset = dataset.Dataset( kind="bigquery#dataset", id=f"{PROJECT_ID}:{DATASET_ID}" ) - mock_dataset_service_client.get_dataset.return_value = expected_dataset + mock_instance.get_dataset.return_value = expected_dataset get_dataset_request = dataset.GetDatasetRequest( project_id=PROJECT_ID, dataset_id=DATASET_ID ) + client = centralized_service.BigQueryClient() # Act - dataset_response = bq_client.get_dataset(request=get_dataset_request) + dataset_response = client.get_dataset(request=get_dataset_request) # Assert assert dataset_response == expected_dataset - assert_client_called_once_with( - mock_dataset_service_client.get_dataset, get_dataset_request - ) + assert_client_called_once_with(mock_instance.get_dataset, get_dataset_request) + + @pytest.mark.parametrize( + "call_args, expected_request", + [ + ( + {"project_id": PROJECT_ID}, + dataset.ListDatasetsRequest(project_id=PROJECT_ID), + ), + ( + {"request": dataset.ListDatasetsRequest(project_id=PROJECT_ID)}, + dataset.ListDatasetsRequest(project_id=PROJECT_ID), + ), + ({}, dataset.ListDatasetsRequest(project_id=PROJECT_ID)), + ], + ) + def test_list_datasets( + self, mock_dataset_service_client_class, call_args, expected_request + ): + # Arrange + mock_instance = mock_dataset_service_client_class.return_value + expected_datasets = [ + dataset.Dataset(kind="bigquery#dataset", id=f"{PROJECT_ID}:{DATASET_ID}") + ] + mock_instance.list_datasets.return_value = expected_datasets + client = centralized_service.BigQueryClient() + client.project = PROJECT_ID + + # Act + datasets_response = client.list_datasets(**call_args) + + # Assert + assert datasets_response == expected_datasets + assert_client_called_once_with(mock_instance.list_datasets, expected_request) + + def test_list_datasets_conflicting_args(self, mock_dataset_service_client_class): + # Arrange + client = centralized_service.BigQueryClient() + list_datasets_request = dataset.ListDatasetsRequest(project_id=PROJECT_ID) + + # Act & Assert + with pytest.raises(ValueError): + client.list_datasets(project_id=PROJECT_ID, request=list_datasets_request) class TestCentralizedClientJobService: - def test_list_jobs(self, bq_client, mock_job_service_client): + def test_list_jobs(self, mock_job_service_client_class): # Arrange + mock_instance = mock_job_service_client_class.return_value expected_jobs = [job.Job(kind="bigquery#job", id=f"{PROJECT_ID}:{JOB_ID}")] - mock_job_service_client.list_jobs.return_value = expected_jobs + mock_instance.list_jobs.return_value = expected_jobs list_jobs_request = job.ListJobsRequest(project_id=PROJECT_ID) + client = centralized_service.BigQueryClient() # Act - jobs_response = bq_client.list_jobs(request=list_jobs_request) + jobs_response = client.list_jobs(request=list_jobs_request) # Assert assert jobs_response == expected_jobs - assert_client_called_once_with( - mock_job_service_client.list_jobs, list_jobs_request - ) + assert_client_called_once_with(mock_instance.list_jobs, list_jobs_request) class TestCentralizedClientModelService: - def test_get_model(self, bq_client, mock_model_service_client): + def test_get_model(self, mock_model_service_client_class): # Arrange expected_model = model.Model( etag=DEFAULT_ETAG, @@ -241,43 +255,39 @@ def test_get_model(self, bq_client, mock_model_service_client): "model_id": MODEL_ID, }, ) - mock_model_service_client.get_model.return_value = expected_model + mock_instance = mock_model_service_client_class.return_value + mock_instance.get_model.return_value = expected_model get_model_request = model.GetModelRequest( project_id=PROJECT_ID, dataset_id=DATASET_ID, model_id=MODEL_ID ) + client = centralized_service.BigQueryClient() # Act - model_response = bq_client.get_model(request=get_model_request) + model_response = client.get_model(request=get_model_request) # Assert assert model_response == expected_model - assert_client_called_once_with( - mock_model_service_client.get_model, get_model_request - ) + assert_client_called_once_with(mock_instance.get_model, get_model_request) - def test_delete_model(self, bq_client, mock_model_service_client): + def test_delete_model(self, mock_model_service_client_class): # Arrange - # The underlying service call returns nothing on success. - mock_model_service_client.delete_model.return_value = None + mock_instance = mock_model_service_client_class.return_value + mock_instance.delete_model.return_value = None delete_model_request = model.DeleteModelRequest( project_id=PROJECT_ID, dataset_id=DATASET_ID, model_id=MODEL_ID ) + client = centralized_service.BigQueryClient() # Act - # The wrapper method should also return nothing. - result = bq_client.delete_model(request=delete_model_request) + result = client.delete_model(request=delete_model_request) # Assert - # 1. Assert the return value is None. This fails if the method doesn't exist. assert result is None - # 2. Assert the underlying service was called correctly. - assert_client_called_once_with( - mock_model_service_client.delete_model, - delete_model_request, - ) + assert_client_called_once_with(mock_instance.delete_model, delete_model_request) - def test_patch_model(self, bq_client, mock_model_service_client): + def test_patch_model(self, mock_model_service_client_class): # Arrange + mock_instance = mock_model_service_client_class.return_value expected_model = model.Model( etag="new_etag", model_reference={ @@ -287,8 +297,7 @@ def test_patch_model(self, bq_client, mock_model_service_client): }, description="A newly patched description.", ) - mock_model_service_client.patch_model.return_value = expected_model - + mock_instance.patch_model.return_value = expected_model model_patch = model.Model(description="A newly patched description.") patch_model_request = model.PatchModelRequest( project_id=PROJECT_ID, @@ -296,18 +305,18 @@ def test_patch_model(self, bq_client, mock_model_service_client): model_id=MODEL_ID, model=model_patch, ) + client = centralized_service.BigQueryClient() # Act - patched_model = bq_client.patch_model(request=patch_model_request) + patched_model = client.patch_model(request=patch_model_request) # Assert assert patched_model == expected_model - assert_client_called_once_with( - mock_model_service_client.patch_model, patch_model_request - ) + assert_client_called_once_with(mock_instance.patch_model, patch_model_request) - def test_list_models(self, bq_client, mock_model_service_client): + def test_list_models(self, mock_model_service_client_class): # Arrange + mock_instance = mock_model_service_client_class.return_value expected_models = [ model.Model( etag=DEFAULT_ETAG, @@ -318,15 +327,15 @@ def test_list_models(self, bq_client, mock_model_service_client): }, ) ] - mock_model_service_client.list_models.return_value = expected_models + mock_instance.list_models.return_value = expected_models list_models_request = model.ListModelsRequest( project_id=PROJECT_ID, dataset_id=DATASET_ID ) + client = centralized_service.BigQueryClient() + # Act - models_response = bq_client.list_models(request=list_models_request) + models_response = client.list_models(request=list_models_request) # Assert assert models_response == expected_models - assert_client_called_once_with( - mock_model_service_client.list_models, list_models_request - ) + assert_client_called_once_with(mock_instance.list_models, list_models_request)