-

tanmay-db · tanmay-db · commit e2d8c4cc959f · 2024-12-06T19:01:26.000+01:00
diff --git a/databricks/sdk/config.py b/databricks/sdk/config.py
@@ -92,15 +92,16 @@ class Config:
     max_connections_per_pool: int = ConfigAttribute()
     databricks_environment: Optional[DatabricksEnvironment] = None
 
-    def __init__(self,
-                 *,
-                 # Deprecated. Use credentials_strategy instead.
-                 credentials_provider: Optional[CredentialsStrategy] = None,
-                 credentials_strategy: Optional[CredentialsStrategy] = None,
-                 product=None,
-                 product_version=None,
-                 clock: Optional[Clock] = None,
-                 **kwargs):
+    def __init__(
+            self,
+            *,
+            # Deprecated. Use credentials_strategy instead.
+            credentials_provider: Optional[CredentialsStrategy] = None,
+            credentials_strategy: Optional[CredentialsStrategy] = None,
+            product=None,
+            product_version=None,
+            clock: Optional[Clock] = None,
+            **kwargs):
         self._header_factory = None
         self._inner = {}
         self._user_agent_other_info = []
diff --git a/databricks/sdk/credentials_provider.py b/databricks/sdk/credentials_provider.py
@@ -304,11 +304,12 @@ def github_oidc_azure(cfg: 'Config') -> Optional[CredentialsProvider]:
         # detect Azure AD Tenant ID if it's not specified directly
         token_endpoint = cfg.oidc_endpoints.token_endpoint
         cfg.azure_tenant_id = token_endpoint.replace(aad_endpoint, '').split('/')[0]
-    inner = ClientCredentials(client_id=cfg.azure_client_id,
-                              client_secret="", # we have no (rotatable) secrets in OIDC flow
-                              token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
-                              endpoint_params=params,
-                              use_params=True)
+    inner = ClientCredentials(
+        client_id=cfg.azure_client_id,
+        client_secret="", # we have no (rotatable) secrets in OIDC flow
+        token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token",
+        endpoint_params=params,
+        use_params=True)
 
     def refreshed_headers() -> Dict[str, str]:
         token = inner.token()
diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py
@@ -133,15 +133,16 @@ def _test_runtime_auth_from_jobs_inner(w, env_or_skip, random, dbr_versions, lib
 
     tasks = []
     for v in dbr_versions:
-        t = Task(task_key=f'test_{v.key.replace(".", "_")}',
-                 notebook_task=NotebookTask(notebook_path=notebook_path),
-                 new_cluster=ClusterSpec(
-                     spark_version=v.key,
-                     num_workers=1,
-                     instance_pool_id=instance_pool_id,
-                     # GCP uses "custom" data security mode by default, which does not support UC.
-                     data_security_mode=DataSecurityMode.SINGLE_USER),
-                 libraries=[library])
+        t = Task(
+            task_key=f'test_{v.key.replace(".", "_")}',
+            notebook_task=NotebookTask(notebook_path=notebook_path),
+            new_cluster=ClusterSpec(
+                spark_version=v.key,
+                num_workers=1,
+                instance_pool_id=instance_pool_id,
+                # GCP uses "custom" data security mode by default, which does not support UC.
+                data_security_mode=DataSecurityMode.SINGLE_USER),
+            libraries=[library])
         tasks.append(t)
 
     waiter = w.jobs.submit(run_name=f'Runtime Native Auth {random(10)}', tasks=tasks)
diff --git a/tests/integration/test_jobs.py b/tests/integration/test_jobs.py
@@ -17,18 +17,19 @@ def test_submitting_jobs(w, random, env_or_skip):
     with w.dbfs.open(py_on_dbfs, write=True, overwrite=True) as f:
         f.write(b'import time; time.sleep(10); print("Hello, World!")')
 
-    waiter = w.jobs.submit(run_name=f'py-sdk-{random(8)}',
-                           tasks=[
-                               jobs.SubmitTask(
-                                   task_key='pi',
-                                   new_cluster=compute.ClusterSpec(
-                                       spark_version=w.clusters.select_spark_version(long_term_support=True),
-                                       # node_type_id=w.clusters.select_node_type(local_disk=True),
-                                       instance_pool_id=env_or_skip('TEST_INSTANCE_POOL_ID'),
-                                       num_workers=1),
-                                   spark_python_task=jobs.SparkPythonTask(python_file=f'dbfs:{py_on_dbfs}'),
-                               )
-                           ])
+    waiter = w.jobs.submit(
+        run_name=f'py-sdk-{random(8)}',
+        tasks=[
+            jobs.SubmitTask(
+                task_key='pi',
+                new_cluster=compute.ClusterSpec(
+                    spark_version=w.clusters.select_spark_version(long_term_support=True),
+                    # node_type_id=w.clusters.select_node_type(local_disk=True),
+                    instance_pool_id=env_or_skip('TEST_INSTANCE_POOL_ID'),
+                    num_workers=1),
+                spark_python_task=jobs.SparkPythonTask(python_file=f'dbfs:{py_on_dbfs}'),
+            )
+        ])
 
     logging.info(f'starting to poll: {waiter.run_id}')
 
diff --git a/tests/test_base_client.py b/tests/test_base_client.py
@@ -281,11 +281,13 @@ def inner(h: BaseHTTPRequestHandler):
     assert len(requests) == 2
 
 
-@pytest.mark.parametrize('chunk_size,expected_chunks,data_size',
-                         [(5, 20, 100), # 100 / 5 bytes per chunk = 20 chunks
-                          (10, 10, 100), # 100 / 10 bytes per chunk = 10 chunks
-                          (200, 1, 100), # 100 / 200 bytes per chunk = 1 chunk
-                          ])
+@pytest.mark.parametrize(
+    'chunk_size,expected_chunks,data_size',
+    [
+        (5, 20, 100), # 100 / 5 bytes per chunk = 20 chunks
+        (10, 10, 100), # 100 / 10 bytes per chunk = 10 chunks
+        (200, 1, 100), # 100 / 200 bytes per chunk = 1 chunk
+    ])
 def test_streaming_response_chunk_size(chunk_size, expected_chunks, data_size):
     rng = random.Random(42)
     test_data = bytes(rng.getrandbits(8) for _ in range(data_size))
@@ -355,12 +357,14 @@ def tell(self):
     assert client._is_seekable_stream(CustomSeekableStream())
 
 
-@pytest.mark.parametrize('input_data', [
-    b"0123456789", # bytes -> BytesIO
-    "0123456789", # str -> BytesIO
-    io.BytesIO(b"0123456789"), # BytesIO directly
-    io.StringIO("0123456789"), # StringIO
-])
+@pytest.mark.parametrize(
+    'input_data',
+    [
+        b"0123456789", # bytes -> BytesIO
+        "0123456789", # str -> BytesIO
+        io.BytesIO(b"0123456789"), # BytesIO directly
+        io.StringIO("0123456789"), # StringIO
+    ])
 def test_reset_seekable_stream_on_retry(input_data):
     received_data = []
 
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -370,14 +370,20 @@ def inner(h: BaseHTTPRequestHandler):
         assert {'Authorization': 'Taker this-is-it'} == headers
 
 
-@pytest.mark.parametrize(['azure_environment', 'expected'],
-                         [('PUBLIC', ENVIRONMENTS['PUBLIC']), ('USGOVERNMENT', ENVIRONMENTS['USGOVERNMENT']),
-                          ('CHINA', ENVIRONMENTS['CHINA']), ('public', ENVIRONMENTS['PUBLIC']),
-                          ('usgovernment', ENVIRONMENTS['USGOVERNMENT']), ('china', ENVIRONMENTS['CHINA']),
-                          # Kept for historical compatibility
-                          ('AzurePublicCloud', ENVIRONMENTS['PUBLIC']),
-                          ('AzureUSGovernment', ENVIRONMENTS['USGOVERNMENT']),
-                          ('AzureChinaCloud', ENVIRONMENTS['CHINA']), ])
+@pytest.mark.parametrize(
+    ['azure_environment', 'expected'],
+    [
+        ('PUBLIC', ENVIRONMENTS['PUBLIC']),
+        ('USGOVERNMENT', ENVIRONMENTS['USGOVERNMENT']),
+        ('CHINA', ENVIRONMENTS['CHINA']),
+        ('public', ENVIRONMENTS['PUBLIC']),
+        ('usgovernment', ENVIRONMENTS['USGOVERNMENT']),
+        ('china', ENVIRONMENTS['CHINA']),
+        # Kept for historical compatibility
+        ('AzurePublicCloud', ENVIRONMENTS['PUBLIC']),
+        ('AzureUSGovernment', ENVIRONMENTS['USGOVERNMENT']),
+        ('AzureChinaCloud', ENVIRONMENTS['CHINA']),
+    ])
 def test_azure_environment(azure_environment, expected):
     c = Config(credentials_strategy=noop_credentials,
                azure_workspace_resource_id='...',
diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -83,52 +83,53 @@ def make_private_link_response() -> requests.Response:
                        for x in base_subclass_test_cases]
 
 
-@pytest.mark.parametrize('response, expected_error, expected_message', subclass_test_cases + [
-    (fake_response('GET', 400, ''), errors.BadRequest, 'Bad Request'),
-    (fake_valid_response('GET', 417, 'WHOOPS', 'nope'), errors.DatabricksError, 'nope'),
-    (fake_valid_response('GET', 522, '', 'nope'), errors.DatabricksError, 'nope'),
-    (make_private_link_response(), errors.PrivateLinkValidationError,
-     ('The requested workspace has AWS PrivateLink enabled and is not accessible from the current network. '
-      'Ensure that AWS PrivateLink is properly configured and that your device has access to the AWS VPC '
-      'endpoint. For more information, see '
-      'https://docs.databricks.com/en/security/network/classic/privatelink.html.'),
-     ),
-    (fake_valid_response(
-        'GET', 400, 'INVALID_PARAMETER_VALUE', 'Cluster abcde does not exist',
-        '/api/2.0/clusters/get'), errors.ResourceDoesNotExist, 'Cluster abcde does not exist'),
-    (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Job abcde does not exist',
-                         '/api/2.0/jobs/get'), errors.ResourceDoesNotExist, 'Job abcde does not exist'),
-    (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Job abcde does not exist',
-                         '/api/2.1/jobs/get'), errors.ResourceDoesNotExist, 'Job abcde does not exist'),
-    (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Invalid spark version',
-                         '/api/2.1/jobs/get'), errors.InvalidParameterValue, 'Invalid spark version'),
-    (fake_response(
-        'GET', 400,
-        'MALFORMED_REQUEST: vpc_endpoints malformed parameters: VPC Endpoint ... with use_case ... cannot be attached in ... list'
-    ), errors.BadRequest,
-     'vpc_endpoints malformed parameters: VPC Endpoint ... with use_case ... cannot be attached in ... list'),
-    (fake_response('GET', 400, '<pre>Worker environment not ready</pre>'), errors.BadRequest,
-     'Worker environment not ready'),
-    (fake_response('GET', 400, 'this is not a real response'), errors.BadRequest,
-     ('unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. '
-      'Please report this issue with the following debugging information to the SDK issue tracker at '
-      'https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n'
-      '< 400 Bad Request\n'
-      '< this is not a real response```')),
-    (fake_response(
-        'GET', 404,
-        json.dumps({
-            'detail': 'Group with id 1234 is not found',
-            'status': '404',
-            'schemas': ['urn:ietf:params:scim:api:messages:2.0:Error']
-        })), errors.NotFound, 'None Group with id 1234 is not found'),
-    (fake_response('GET', 404, json.dumps("This is JSON but not a dictionary")), errors.NotFound,
-     'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< "This is JSON but not a dictionary"```'
-     ),
-    (fake_raw_response('GET', 404, b'\x80'), errors.NotFound,
-     'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< �```'
-     )
-])
+@pytest.mark.parametrize(
+    'response, expected_error, expected_message', subclass_test_cases +
+    [(fake_response('GET', 400, ''), errors.BadRequest, 'Bad Request'),
+     (fake_valid_response('GET', 417, 'WHOOPS', 'nope'), errors.DatabricksError, 'nope'),
+     (fake_valid_response('GET', 522, '', 'nope'), errors.DatabricksError, 'nope'),
+     (make_private_link_response(), errors.PrivateLinkValidationError,
+      ('The requested workspace has AWS PrivateLink enabled and is not accessible from the current network. '
+       'Ensure that AWS PrivateLink is properly configured and that your device has access to the AWS VPC '
+       'endpoint. For more information, see '
+       'https://docs.databricks.com/en/security/network/classic/privatelink.html.'),
+      ),
+     (fake_valid_response(
+         'GET', 400, 'INVALID_PARAMETER_VALUE', 'Cluster abcde does not exist',
+         '/api/2.0/clusters/get'), errors.ResourceDoesNotExist, 'Cluster abcde does not exist'),
+     (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Job abcde does not exist',
+                          '/api/2.0/jobs/get'), errors.ResourceDoesNotExist, 'Job abcde does not exist'),
+     (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Job abcde does not exist',
+                          '/api/2.1/jobs/get'), errors.ResourceDoesNotExist, 'Job abcde does not exist'),
+     (fake_valid_response('GET', 400, 'INVALID_PARAMETER_VALUE', 'Invalid spark version',
+                          '/api/2.1/jobs/get'), errors.InvalidParameterValue, 'Invalid spark version'),
+     (fake_response(
+         'GET', 400,
+         'MALFORMED_REQUEST: vpc_endpoints malformed parameters: VPC Endpoint ... with use_case ... cannot be attached in ... list'
+     ), errors.BadRequest,
+      'vpc_endpoints malformed parameters: VPC Endpoint ... with use_case ... cannot be attached in ... list'
+      ),
+     (fake_response('GET', 400, '<pre>Worker environment not ready</pre>'), errors.BadRequest,
+      'Worker environment not ready'),
+     (fake_response('GET', 400, 'this is not a real response'), errors.BadRequest,
+      ('unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. '
+       'Please report this issue with the following debugging information to the SDK issue tracker at '
+       'https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n'
+       '< 400 Bad Request\n'
+       '< this is not a real response```')),
+     (fake_response(
+         'GET', 404,
+         json.dumps({
+             'detail': 'Group with id 1234 is not found',
+             'status': '404',
+             'schemas': ['urn:ietf:params:scim:api:messages:2.0:Error']
+         })), errors.NotFound, 'None Group with id 1234 is not found'),
+     (fake_response('GET', 404, json.dumps("This is JSON but not a dictionary")), errors.NotFound,
+      'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< "This is JSON but not a dictionary"```'
+      ),
+     (fake_raw_response('GET', 404, b'\x80'), errors.NotFound,
+      'unable to parse response. This is likely a bug in the Databricks SDK for Python or the underlying API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log:```GET /api/2.0/service\n< 404 Not Found\n< �```'
+      )])
 def test_get_api_error(response, expected_error, expected_message):
     parser = errors._Parser()
     with pytest.raises(errors.DatabricksError) as e:
diff --git a/tests/test_model_serving_auth.py b/tests/test_model_serving_auth.py
@@ -47,13 +47,16 @@ def test_model_serving_auth(env_values, del_env_values, oauth_file_name, monkeyp
     assert headers.get("Authorization") == 'Bearer databricks_sdk_unit_test_token'
 
 
-@pytest.mark.parametrize("env_values, oauth_file_name", [
-    ([], "invalid_file_name"), # Not in Model Serving and Invalid File Name
-    ([('IS_IN_DB_MODEL_SERVING_ENV', 'true')], "invalid_file_name"), # In Model Serving and Invalid File Name
-    ([('IS_IN_DATABRICKS_MODEL_SERVING_ENV', 'true')
-      ], "invalid_file_name"), # In Model Serving and Invalid File Name
-    ([], "tests/testdata/model-serving-test-token") # Not in Model Serving and Valid File Name
-])
+@pytest.mark.parametrize(
+    "env_values, oauth_file_name",
+    [
+        ([], "invalid_file_name"), # Not in Model Serving and Invalid File Name
+        ([('IS_IN_DB_MODEL_SERVING_ENV', 'true')
+          ], "invalid_file_name"), # In Model Serving and Invalid File Name
+        ([('IS_IN_DATABRICKS_MODEL_SERVING_ENV', 'true')
+          ], "invalid_file_name"), # In Model Serving and Invalid File Name
+        ([], "tests/testdata/model-serving-test-token") # Not in Model Serving and Valid File Name
+    ])
 @raises(default_auth_base_error_message)
 def test_model_serving_auth_errors(env_values, oauth_file_name, monkeypatch):
     # Guarantee that the tests defaults to env variables rather than config file.
diff --git a/tests/test_oauth.py b/tests/test_oauth.py
@@ -10,25 +10,26 @@ def test_token_cache_unique_filename_by_host():
     common_args = dict(client_id="abc",
                        redirect_url="http://localhost:8020",
                        oidc_endpoints=OidcEndpoints("http://localhost:1234", "http://localhost:1234"))
-    assert TokenCache(host="http://localhost:", **common_args).filename != TokenCache(
-        "https://bar.cloud.databricks.com", **common_args).filename
+    assert TokenCache(host="http://localhost:",
+                      **common_args).filename != TokenCache("https://bar.cloud.databricks.com",
+                                                            **common_args).filename
 
 
 def test_token_cache_unique_filename_by_client_id():
     common_args = dict(host="http://localhost:",
                        redirect_url="http://localhost:8020",
                        oidc_endpoints=OidcEndpoints("http://localhost:1234", "http://localhost:1234"))
-    assert TokenCache(client_id="abc", **common_args).filename != TokenCache(client_id="def", **
-                                                                             common_args).filename
+    assert TokenCache(client_id="abc", **common_args).filename != TokenCache(client_id="def",
+                                                                             **common_args).filename
 
 
 def test_token_cache_unique_filename_by_scopes():
     common_args = dict(host="http://localhost:",
                        client_id="abc",
                        redirect_url="http://localhost:8020",
                        oidc_endpoints=OidcEndpoints("http://localhost:1234", "http://localhost:1234"))
-    assert TokenCache(scopes=["foo"], **common_args).filename != TokenCache(scopes=["bar"], **
-                                                                            common_args).filename
+    assert TokenCache(scopes=["foo"], **common_args).filename != TokenCache(scopes=["bar"],
+                                                                            **common_args).filename
 
 
 def test_account_oidc_endpoints(requests_mock):