-
Notifications
You must be signed in to change notification settings - Fork 120
Whitelisting Onelake API & Workspace PL FQDNs #552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,8 @@ use percent_encoding::percent_decode_str; | |
| use serde::{Deserialize, Serialize}; | ||
| use std::str::FromStr; | ||
| use std::sync::Arc; | ||
| use std::sync::OnceLock; | ||
| use regex::Regex; | ||
| use url::Url; | ||
|
|
||
| /// The well-known account used by Azurite and the legacy Azure Storage Emulator. | ||
|
|
@@ -671,36 +673,92 @@ impl MicrosoftAzureBuilder { | |
| self.container_name = Some(validate(parsed.username())?); | ||
| self.account_name = Some(validate(a)?); | ||
| self.use_fabric_endpoint = true.into(); | ||
| } else if let Some(a) = host.strip_suffix(".blob.core.windows.net") { | ||
| self.container_name = Some(validate(parsed.username())?); | ||
| self.account_name = Some(validate(a)?); | ||
| } else if let Some(a) = host.strip_suffix(".blob.fabric.microsoft.com") { | ||
| self.container_name = Some(validate(parsed.username())?); | ||
| self.account_name = Some(validate(a)?); | ||
| self.use_fabric_endpoint = true.into(); | ||
| } else if let Some(a) = host.strip_suffix("-api.onelake.fabric.microsoft.com") { | ||
| self.container_name = Some(validate(parsed.username())?); | ||
| self.account_name = Some(validate(a)?); | ||
| self.use_fabric_endpoint = true.into(); | ||
| } else { | ||
| return Err(Error::UrlNotRecognised { url: url.into() }.into()); | ||
| } | ||
| } | ||
| "https" => match host.split_once('.') { | ||
| Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => { | ||
| self.account_name = Some(validate(a)?); | ||
| let container = parsed.path_segments().unwrap().next().expect( | ||
| "iterator always contains at least one string (which may be empty)", | ||
| ); | ||
| if !container.is_empty() { | ||
| self.container_name = Some(validate(container)?); | ||
| "https" => { | ||
| // Regex to match WS-PL FQDN: | ||
| // "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please also add an example URL for each of the APIs you are adding support for?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added for WS-PL DFS/ Blob endpoints. We are waiting for PM to confirm on ABFSS & WS-PL onelake domains. |
||
| static WS_PL_REGEX: OnceLock<Regex> = OnceLock::new(); | ||
|
|
||
| let ws_pl_regex = WS_PL_REGEX.get_or_init(|| { | ||
| Regex::new( | ||
| r"(?i)^(?P<workspaceid>[0-9a-f]{32})\.z(?P<xy>[0-9a-f]{2})\.(onelake|dfs|blob)\.fabric\.microsoft\.com$" | ||
| ).unwrap() | ||
| }); | ||
|
|
||
| // WS-PL Fabric endpoint, eg- 1) c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com, 2) c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com | ||
| if let Some(captures) = ws_pl_regex.captures(host) { | ||
| let workspaceid = captures.name("workspaceid").unwrap().as_str(); | ||
| let xy = captures.name("xy").unwrap().as_str(); | ||
|
|
||
| if !workspaceid.get(0..2).is_some_and(|pfx| pfx.eq_ignore_ascii_case(xy)) { | ||
| return Err(Error::UrlNotRecognised { url: url.into() }.into()); | ||
| } | ||
|
|
||
| self.account_name = Some(format!("{workspaceid}.z{xy}")); | ||
| self.container_name = Some(validate(workspaceid)?); | ||
| self.use_fabric_endpoint = true.into(); | ||
| return Ok(()); | ||
| } | ||
| Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => { | ||
| self.account_name = Some(validate(a)?); | ||
| // Attempt to infer the container name from the URL | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove this comment? It seems helpful |
||
| // - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv | ||
| // - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName> | ||
| // | ||
| // See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api> | ||
| let workspace = parsed.path_segments().unwrap().next().expect( | ||
| "iterator always contains at least one string (which may be empty)", | ||
| ); | ||
|
|
||
| // Api Onelake Fabric endpoint | ||
| if host.ends_with("-api.onelake.fabric.microsoft.com") { | ||
| let account = host.strip_suffix("-api.onelake.fabric.microsoft.com").unwrap(); | ||
| self.account_name = Some(validate(account)?); | ||
| let workspace = parsed.path_segments().unwrap().next() | ||
| .expect("iterator always contains at least one string (which may be empty)"); | ||
|
|
||
| if !workspace.is_empty() { | ||
| self.container_name = Some(workspace.to_string()) | ||
| self.container_name = Some(workspace.to_string()); | ||
| } | ||
|
|
||
| self.use_fabric_endpoint = true.into(); | ||
| return Ok(()); | ||
| } | ||
|
|
||
| match host.split_once('.') { | ||
| // Azure Storage public | ||
| Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => { | ||
| self.account_name = Some(validate(a)?); | ||
|
|
||
| // Attempt to infer the container name from the URL | ||
| let container = parsed.path_segments().unwrap().next() | ||
| .expect("iterator always contains at least one string (which may be empty)"); | ||
|
|
||
| if !container.is_empty() { | ||
| self.container_name = Some(validate(container)?); | ||
| } | ||
| } | ||
|
|
||
| // Fabric endpoints | ||
| Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => { | ||
| self.account_name = Some(validate(a)?); | ||
|
|
||
| let workspace = parsed.path_segments().unwrap().next() | ||
| .expect("iterator always contains at least one string (which may be empty)"); | ||
|
|
||
| if !workspace.is_empty() { | ||
| self.container_name = Some(workspace.to_string()); | ||
| } | ||
|
|
||
| self.use_fabric_endpoint = true.into(); | ||
| } | ||
|
|
||
| _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()), | ||
| } | ||
| _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()), | ||
| }, | ||
| scheme => { | ||
| let scheme = scheme.into(); | ||
|
|
@@ -1119,6 +1177,14 @@ mod tests { | |
| assert_eq!(builder.container_name, Some("file_system".to_string())); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("abfss://[email protected]/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("account".to_string())); | ||
| assert_eq!(builder.container_name, Some("file_system".to_string())); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder.parse_url("abfs://container/path").unwrap(); | ||
| assert_eq!(builder.container_name, Some("container".to_string())); | ||
|
|
@@ -1166,6 +1232,14 @@ mod tests { | |
| assert_eq!(builder.container_name, None); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://account-api.onelake.fabric.microsoft.com/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("account".to_string())); | ||
| assert_eq!(builder.container_name, None); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://account.dfs.fabric.microsoft.com/container") | ||
|
|
@@ -1190,6 +1264,38 @@ mod tests { | |
| assert_eq!(builder.container_name.as_deref(), Some("container")); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://Ab000000000000000000000000000000.zAb.dfs.fabric.microsoft.com/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string())); | ||
| assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000")); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string())); | ||
| assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000")); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://ab000000000000000000000000000000.zab.blob.fabric.microsoft.com/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string())); | ||
| assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000")); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let mut builder = MicrosoftAzureBuilder::new(); | ||
| builder | ||
| .parse_url("https://ab000000000000000000000000000000.zab.onelake.fabric.microsoft.com/") | ||
| .unwrap(); | ||
| assert_eq!(builder.account_name, Some("ab000000000000000000000000000000.zab".to_string())); | ||
| assert_eq!(builder.container_name.as_deref(), Some("ab000000000000000000000000000000")); | ||
| assert!(builder.use_fabric_endpoint.get().unwrap()); | ||
|
|
||
| let err_cases = [ | ||
| "mailto://account.blob.core.windows.net/", | ||
| "az://blob.mydomain/", | ||
|
|
@@ -1256,4 +1362,4 @@ mod tests { | |
| panic!("{key} not propagated as ClientConfigKey"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see it in https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api