Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ humantime = "2.1"
itertools = "0.14.0"
parking_lot = { version = "0.12" }
percent-encoding = "2.1"
regex = "1.11.1"
thiserror = "2.0.2"
tracing = { version = "0.1" }
url = "2.2"
Expand Down
141 changes: 117 additions & 24 deletions src/azure/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use percent_encoding::percent_decode_str;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::OnceLock;
use regex::Regex;
use url::Url;

/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
Expand Down Expand Up @@ -671,36 +673,87 @@ impl MicrosoftAzureBuilder {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix(".blob.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
} else if let Some(a) = host.strip_suffix(".blob.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix("-api.onelake.fabric.microsoft.com") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Is *-api.onelake.fabric.microsoft.com a publicly documented endpoint? If yes, can you point to the Microsoft doc so we can cite it in code/tests?

I don't see it in https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is still outstanding I think. Is this a vaild url?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it is a valid URL- added in the documentation.

self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}
}
"https" => match host.split_once('.') {
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
let container = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);
if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
"https" => {
// Regex to match WS-PL FQDN:
// "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
static WS_PL_REGEX: OnceLock<Regex> = OnceLock::new();
let ws_pl_regex = WS_PL_REGEX.get_or_init(|| {
Regex::new(
r"^(?P<workspaceid>[0-9a-f]{32})\.z(?P<xy>[0-9a-f]{2})\.(onelake|dfs|blob)\.fabric\.microsoft\.com$"
).unwrap()
});

// WS-PL Fabric endpoint
if let Some(captures) = ws_pl_regex.captures(host) {
let workspaceid = captures.name("workspaceid").unwrap().as_str();
let xy = captures.name("xy").unwrap().as_str();

self.account_name = Some(format!("{workspaceid}.z{xy}"));
self.container_name = Some(validate(workspaceid)?);
self.use_fabric_endpoint = true.into();
return Ok(());
}
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
// Attempt to infer the container name from the URL
// - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
//
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
let workspace = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);

// Api Onelake Fabric endpoint
if host.ends_with("-api.onelake.fabric.microsoft.com") {
let account = host.strip_suffix("-api.onelake.fabric.microsoft.com").unwrap();
self.account_name = Some(validate(account)?);
let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string())
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
return Ok(());
}

match host.split_once('.') {
// Azure Storage public
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);

let container = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
}

// Fabric endpoints
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);

// Attempt to infer the container name from the URL
let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
}

_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
}
_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
},
scheme => {
let scheme = scheme.into();
Expand Down Expand Up @@ -1119,6 +1172,14 @@ mod tests {
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("abfss://file_system@account-api.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder.parse_url("abfs://container/path").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
Expand Down Expand Up @@ -1166,6 +1227,14 @@ mod tests {
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account-api.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.dfs.fabric.microsoft.com/container")
Expand All @@ -1184,10 +1253,34 @@ mod tests {

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.blob.fabric.microsoft.com/container")
.parse_url("https://account.blob.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.blob.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let err_cases = [
Expand Down Expand Up @@ -1256,4 +1349,4 @@ mod tests {
panic!("{key} not propagated as ClientConfigKey");
}
}
}
}
Loading