Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ humantime = "2.1"
itertools = "0.14.0"
parking_lot = { version = "0.12" }
percent-encoding = "2.1"
regex = "1.11.1"
thiserror = "2.0.2"
tracing = { version = "0.1" }
url = "2.2"
Expand Down
148 changes: 127 additions & 21 deletions src/azure/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use percent_encoding::percent_decode_str;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
use std::sync::Arc;
use std::sync::OnceLock;
use regex::Regex;
use url::Url;

/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
Expand Down Expand Up @@ -671,36 +673,92 @@ impl MicrosoftAzureBuilder {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix(".blob.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
} else if let Some(a) = host.strip_suffix(".blob.fabric.microsoft.com") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else if let Some(a) = host.strip_suffix("-api.onelake.fabric.microsoft.com") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Is *-api.onelake.fabric.microsoft.com a publicly documented endpoint? If yes, can you point to the Microsoft doc so we can cite it in code/tests?

I don't see it in https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api

self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
self.use_fabric_endpoint = true.into();
} else {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}
}
"https" => match host.split_once('.') {
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
let container = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);
if !container.is_empty() {
self.container_name = Some(validate(container)?);
"https" => {
// Regex to match WS-PL FQDN:
// "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add an example URL for each of the APIs you are adding support for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added for WS-PL DFS/ Blob endpoints. We are waiting for PM to confirm on ABFSS & WS-PL onelake domains.

static WS_PL_REGEX: OnceLock<Regex> = OnceLock::new();

let ws_pl_regex = WS_PL_REGEX.get_or_init(|| {
Regex::new(
r"(?i)^(?P<workspaceid>[0-9a-f]{32})\.z(?P<xy>[0-9a-f]{2})\.(onelake|dfs|blob)\.fabric\.microsoft\.com$"
).unwrap()
});

// WS-PL Fabric endpoint, eg- 1) c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com, 2) c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com
if let Some(captures) = ws_pl_regex.captures(host) {
let workspaceid = captures.name("workspaceid").unwrap().as_str();
let xy = captures.name("xy").unwrap().as_str();

if !workspaceid.get(0..2).is_some_and(|pfx| pfx.eq_ignore_ascii_case(xy)) {
return Err(Error::UrlNotRecognised { url: url.into() }.into());
}

self.account_name = Some(format!("{workspaceid}.z{xy}"));
self.container_name = Some(validate(workspaceid)?);
self.use_fabric_endpoint = true.into();
return Ok(());
}
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);
// Attempt to infer the container name from the URL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this comment? It seems helpful

// - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
// - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
//
// See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
let workspace = parsed.path_segments().unwrap().next().expect(
"iterator always contains at least one string (which may be empty)",
);

// Api Onelake Fabric endpoint
if host.ends_with("-api.onelake.fabric.microsoft.com") {
let account = host.strip_suffix("-api.onelake.fabric.microsoft.com").unwrap();
self.account_name = Some(validate(account)?);
let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string())
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
return Ok(());
}

match host.split_once('.') {
// Azure Storage public
Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);

// Attempt to infer the container name from the URL
let container = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !container.is_empty() {
self.container_name = Some(validate(container)?);
}
}

// Fabric endpoints
Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
self.account_name = Some(validate(a)?);

let workspace = parsed.path_segments().unwrap().next()
.expect("iterator always contains at least one string (which may be empty)");

if !workspace.is_empty() {
self.container_name = Some(workspace.to_string());
}

self.use_fabric_endpoint = true.into();
}

_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
}
_ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
},
scheme => {
let scheme = scheme.into();
Expand Down Expand Up @@ -1119,6 +1177,14 @@ mod tests {
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("abfss://[email protected]/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, Some("file_system".to_string()));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder.parse_url("abfs://container/path").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
Expand Down Expand Up @@ -1166,6 +1232,14 @@ mod tests {
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account-api.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, None);
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.dfs.fabric.microsoft.com/container")
Expand All @@ -1190,6 +1264,38 @@ mod tests {
assert_eq!(builder.container_name.as_deref(), Some("container"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://Ab000000000000000000000000000000.zAb.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.blob.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://ab000000000000000000000000000000.zab.onelake.fabric.microsoft.com/")
.unwrap();
assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string()));
assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000"));
assert!(builder.use_fabric_endpoint.get().unwrap());

let err_cases = [
"mailto://account.blob.core.windows.net/",
"az://blob.mydomain/",
Expand Down Expand Up @@ -1256,4 +1362,4 @@ mod tests {
panic!("{key} not propagated as ClientConfigKey");
}
}
}
}