-
Notifications
You must be signed in to change notification settings - Fork 110
Refactor do_connect for better logging #504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -350,6 +350,13 @@ def __init__( | |
max_concurrency = batch_size | ||
self.max_concurrency = max_concurrency | ||
|
||
@property | ||
def account_url(self) -> str: | ||
if hasattr(self, "account_host"): | ||
return f"https://{self.account_host}" | ||
else: | ||
return f"https://{self.account_name}.blob.core.windows.net" | ||
|
||
@classmethod | ||
def _strip_protocol(cls, path: str): | ||
""" | ||
|
@@ -464,55 +471,15 @@ def _get_default_azure_credential(self, **kwargs): | |
|
||
def do_connect(self): | ||
"""Connect to the BlobServiceClient, using user-specified connection details. | ||
Tries credentials first, then connection string and finally account key | ||
Tries connection string first, then credential and finally account key | ||
|
||
Raises | ||
------ | ||
ValueError if none of the connection details are available | ||
""" | ||
|
||
try: | ||
if self.connection_string is not None: | ||
self.service_client = AIOBlobServiceClient.from_connection_string( | ||
conn_str=self.connection_string | ||
) | ||
elif self.account_name is not None: | ||
if hasattr(self, "account_host"): | ||
self.account_url: str = f"https://{self.account_host}" | ||
else: | ||
self.account_url: str = ( | ||
f"https://{self.account_name}.blob.core.windows.net" | ||
) | ||
|
||
creds = [self.credential, self.account_key] | ||
if any(creds): | ||
self.service_client = [ | ||
AIOBlobServiceClient( | ||
account_url=self.account_url, | ||
credential=cred, | ||
_location_mode=self.location_mode, | ||
) | ||
for cred in creds | ||
if cred is not None | ||
][0] | ||
elif self.sas_token is not None: | ||
if not self.sas_token.startswith("?"): | ||
self.sas_token = f"?{self.sas_token}" | ||
self.service_client = AIOBlobServiceClient( | ||
account_url=self.account_url + self.sas_token, | ||
credential=None, | ||
_location_mode=self.location_mode, | ||
) | ||
else: | ||
# Fall back to anonymous login, and assume public container | ||
self.service_client = AIOBlobServiceClient( | ||
account_url=self.account_url | ||
) | ||
else: | ||
raise ValueError( | ||
"Must provide either a connection_string or account_name with credentials!!" | ||
) | ||
|
||
self.service_client = self._get_service_client() | ||
except RuntimeError: | ||
loop = get_loop() | ||
asyncio.set_event_loop(loop) | ||
|
@@ -521,6 +488,46 @@ def do_connect(self): | |
except Exception as e: | ||
raise ValueError(f"unable to connect to account for {e}") from e | ||
|
||
def _get_service_client(self) -> AIOBlobServiceClient: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we hoist this helper internal method to the module level instead of making it a method on the class? Mainly that helps better avoid accessing internal methods of one class from another class. |
||
"""Connect to the Asynchronous BlobServiceClient. | ||
|
||
Tries connection string first, then credential and finally account key | ||
""" | ||
# Shortcut for connection string | ||
if self.connection_string is not None: | ||
logger.info("Connect using connection string") | ||
return AIOBlobServiceClient.from_connection_string( | ||
conn_str=self.connection_string | ||
) | ||
|
||
if self.account_name is not None: | ||
kwargs = { | ||
"account_url": self.account_url, | ||
"credential": None, | ||
"_location_mode": self.location_mode, | ||
} | ||
creds = ["credential", "sync_credential", "account_key"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any ideas on why the file system's client and the file-object's client were out of sync in terms of credential resolution here in the first place? I need to go through the git history on how these became out of sync, but I'd probably lean toward even removing
I'll look more into this and circle back on what we do here. |
||
for name in creds: | ||
if (cred := getattr(self, name, None)) is not None: | ||
logger.info("Connect using %s", name.replace("_", " ")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For these logging statements, let's set the level to |
||
kwargs["credential"] = cred | ||
break | ||
else: | ||
if self.sas_token is not None: | ||
logger.info("Connect using SAS token") | ||
if not self.sas_token.startswith("?"): | ||
self.sas_token = f"?{self.sas_token}" | ||
Comment on lines
+518
to
+519
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be interesting if we could hoist this logic to the |
||
kwargs["account_url"] = f'{kwargs["account_url"]}{self.sas_token}' | ||
else: | ||
logger.info("Connect using anonymous login") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the previous logic, the |
||
# Fall back to anonymous login, and assume public container | ||
|
||
return AIOBlobServiceClient(**kwargs) | ||
|
||
raise ValueError( | ||
"Must provide either a connection_string or account_name with credentials!!" | ||
) | ||
|
||
def split_path( | ||
self, path, delimiter="/", return_container: bool = False, **kwargs | ||
) -> Tuple[str, str, Optional[str]]: | ||
|
@@ -2030,43 +2037,16 @@ def close(self): | |
|
||
def connect_client(self): | ||
"""Connect to the Asynchronous BlobServiceClient, using user-specified connection details. | ||
Tries credentials first, then connection string and finally account key | ||
Tries connection string first, then credential and finally account key | ||
|
||
Raises | ||
------ | ||
ValueError if none of the connection details are available | ||
""" | ||
try: | ||
if hasattr(self.fs, "account_host"): | ||
self.fs.account_url: str = f"https://{self.fs.account_host}" | ||
else: | ||
self.fs.account_url: str = ( | ||
f"https://{self.fs.account_name}.blob.core.windows.net" | ||
) | ||
|
||
creds = [self.fs.sync_credential, self.fs.account_key, self.fs.credential] | ||
if any(creds): | ||
self.container_client = [ | ||
AIOBlobServiceClient( | ||
account_url=self.fs.account_url, | ||
credential=cred, | ||
_location_mode=self.fs.location_mode, | ||
).get_container_client(self.container_name) | ||
for cred in creds | ||
if cred is not None | ||
][0] | ||
elif self.fs.connection_string is not None: | ||
self.container_client = AIOBlobServiceClient.from_connection_string( | ||
conn_str=self.fs.connection_string | ||
).get_container_client(self.container_name) | ||
elif self.fs.sas_token is not None: | ||
self.container_client = AIOBlobServiceClient( | ||
account_url=self.fs.account_url + self.fs.sas_token, credential=None | ||
).get_container_client(self.container_name) | ||
else: | ||
self.container_client = AIOBlobServiceClient( | ||
account_url=self.fs.account_url | ||
).get_container_client(self.container_name) | ||
self.container_client = self.fs._get_service_client().get_container_client( | ||
self.container_name | ||
) | ||
|
||
except Exception as e: | ||
raise ValueError( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically, to keep the existing behavior, we will need to check whether the connection string is set and if so return
None
. We should add that since if aconnection_string
is provided, anaccount_name
would not be and we could end up encodingNone
into the account url.While it is possible to piece together an account url from a connection string, that would be quite a bit of logic for the purpose of this PR.