diff --git a/changelog.d/azure_blob_token_auth_via_env_vars_readded.fix.md b/changelog.d/azure_blob_token_auth_via_env_vars_readded.fix.md new file mode 100644 index 0000000000000..095689da93ccc --- /dev/null +++ b/changelog.d/azure_blob_token_auth_via_env_vars_readded.fix.md @@ -0,0 +1,4 @@ +The `azure_blob` sink once again supports authenticating via tokens in environment variables. +This unblocks production use cases as connection_strings are not sufficiently secure for use in real environments. + +authors: oganel diff --git a/src/sinks/azure_blob/config.rs b/src/sinks/azure_blob/config.rs index 3c2437c554106..adbcc475400d3 100644 --- a/src/sinks/azure_blob/config.rs +++ b/src/sinks/azure_blob/config.rs @@ -55,13 +55,38 @@ pub struct AzureBlobSinkConfig { /// | Allowed services | Blob | /// | Allowed resource types | Container & Object | /// | Allowed permissions | Read & Create | + /// + /// Either `storage_account`, or this field, must be specified. #[configurable(metadata( docs::examples = "DefaultEndpointsProtocol=https;AccountName=mylogstorage;AccountKey=storageaccountkeybase64encoded;EndpointSuffix=core.windows.net" ))] #[configurable(metadata( docs::examples = "BlobEndpoint=https://mylogstorage.blob.core.windows.net/;SharedAccessSignature=generatedsastoken" ))] - pub connection_string: SensitiveString, + pub connection_string: Option, + + /// The Azure Blob Storage Account name. + /// + /// Attempts to load credentials for the account via environment variables ([more information][env_cred_docs]) + /// + /// Either `connection_string`, or this field, must be specified. + /// + /// [env_cred_docs]: https://docs.rs/azure_identity/0.17.0/azure_identity/struct.EnvironmentCredential.html + #[configurable(metadata(docs::examples = "mylogstorage"))] + pub storage_account: Option, + + /// The Azure Blob Storage Endpoint URL. + /// + /// This is used to override the default blob storage endpoint URL in cases where you are using + /// credentials read from the environment/managed identities or access tokens without using an + /// explicit connection_string (which already explicitly supports overriding the blob endpoint + /// URL). + /// + /// This may only be used with `storage_account` and is ignored when used with + /// `connection_string`. + #[configurable(metadata(docs::examples = "https://test.blob.core.usgovcloudapi.net/"))] + #[configurable(metadata(docs::examples = "https://test.blob.core.windows.net/"))] + pub endpoint: Option, /// The Azure Blob Storage Account container name. #[configurable(metadata(docs::examples = "my-logs"))] @@ -147,8 +172,10 @@ pub fn default_blob_prefix() -> Template { impl GenerateConfig for AzureBlobSinkConfig { fn generate_config() -> toml::Value { toml::Value::try_from(Self { - connection_string: String::from("DefaultEndpointsProtocol=https;AccountName=some-account-name;AccountKey=some-account-key;").into(), + connection_string: Some(String::from("DefaultEndpointsProtocol=https;AccountName=some-account-name;AccountKey=some-account-key;").into()), + storage_account: Some(String::from("some-account-name")), container_name: String::from("logs"), + endpoint: None, blob_prefix: default_blob_prefix(), blob_time_format: Some(String::from("%s")), blob_append_uuid: Some(true), @@ -167,8 +194,12 @@ impl GenerateConfig for AzureBlobSinkConfig { impl SinkConfig for AzureBlobSinkConfig { async fn build(&self, _cx: SinkContext) -> Result<(VectorSink, Healthcheck)> { let client = azure_common::config::build_client( - self.connection_string.clone().into(), + self.connection_string + .as_ref() + .map(|v| v.inner().to_string()), + self.storage_account.as_ref().map(|v| v.to_string()), self.container_name.clone(), + self.endpoint.clone(), )?; let healthcheck = azure_common::config::build_healthcheck( diff --git a/src/sinks/azure_blob/test.rs b/src/sinks/azure_blob/test.rs index b7f1a9689229f..0468632468534 100644 --- a/src/sinks/azure_blob/test.rs +++ b/src/sinks/azure_blob/test.rs @@ -23,7 +23,9 @@ use crate::{ fn default_config(encoding: EncodingConfigWithFraming) -> AzureBlobSinkConfig { AzureBlobSinkConfig { connection_string: Default::default(), + storage_account: Default::default(), container_name: Default::default(), + endpoint: Default::default(), blob_prefix: Default::default(), blob_time_format: Default::default(), blob_append_uuid: Default::default(), diff --git a/src/sinks/azure_common/azure_credential_interop.rs b/src/sinks/azure_common/azure_credential_interop.rs new file mode 100644 index 0000000000000..e6750638d72ba --- /dev/null +++ b/src/sinks/azure_common/azure_credential_interop.rs @@ -0,0 +1,48 @@ +// Interop module for Azure Credentials +// Copied from https://github.com/Metaswitch/apt-transport-blob/pull/77/files + +use azure_core::credentials::TokenCredential; +use azure_core_for_storage::error::{Error, ErrorKind}; +use std::sync::Arc; + +#[derive(Clone, Debug)] +pub(crate) struct TokenCredentialInterop { + // Credential + credential: Arc, +} + +impl TokenCredentialInterop { + /// Create a new `TokenCredentialInterop` from a `DefaultAzureCredential` + pub fn new(credential: Arc) -> Self { + Self { credential } + } +} + +#[cfg_attr(target_arch = "wasm32", async_trait::async_trait(?Send))] +#[cfg_attr(not(target_arch = "wasm32"), async_trait::async_trait)] +impl azure_core_for_storage::auth::TokenCredential for TokenCredentialInterop { + async fn get_token( + &self, + scopes: &[&str], + ) -> azure_core_for_storage::Result { + let access_token = self + .credential + .get_token(scopes, None) + .await + .map_err(|err| Error::new(ErrorKind::Credential, err))?; + + // Construct an old AccessToken from the information in the new AccessToken. + let secret = access_token.token.secret().to_string(); + let access_token = azure_core_for_storage::auth::AccessToken { + token: secret.into(), + expires_on: access_token.expires_on, + }; + + // Return the new AccessToken + Ok(access_token) + } + + async fn clear_cache(&self) -> azure_core_for_storage::Result<()> { + Ok(()) + } +} diff --git a/src/sinks/azure_common/config.rs b/src/sinks/azure_common/config.rs index fadd0e2c4707c..539592935ac6d 100644 --- a/src/sinks/azure_common/config.rs +++ b/src/sinks/azure_common/config.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use azure_core::error::HttpError; use azure_core_for_storage::RetryOptions; -use azure_storage::{CloudLocation, ConnectionString}; +use azure_storage::{CloudLocation, ConnectionString, StorageCredentials}; use azure_storage_blobs::{blob::operations::PutBlockBlobResponse, prelude::*}; use bytes::Bytes; use futures::FutureExt; @@ -16,9 +16,14 @@ use vector_lib::{ use crate::{ event::{EventFinalizers, EventStatus, Finalizable}, - sinks::{Healthcheck, util::retries::RetryLogic}, + sinks::{ + Healthcheck, azure_common::environment_credentials::EnvironmentCredential, + util::retries::RetryLogic, + }, }; +use super::azure_credential_interop::TokenCredentialInterop; + #[derive(Debug, Clone)] pub struct AzureBlobRequest { pub blob_data: Bytes, @@ -125,35 +130,74 @@ pub fn build_healthcheck( } pub fn build_client( - connection_string: String, + connection_string: Option, + storage_account: Option, container_name: String, + endpoint: Option, ) -> crate::Result> { - let client = { - let connection_string = ConnectionString::new(&connection_string)?; - let account_name = connection_string - .account_name - .ok_or("Account name missing in connection string")?; - - match connection_string.blob_endpoint { - // When the blob_endpoint is provided, we use the Custom CloudLocation since it is - // required to contain the full URI to the blob storage API endpoint, this means - // that account_name is not required to exist in the connection_string since - // account_name is only used with the default CloudLocation in the Azure SDK to - // generate the storage API endpoint - Some(uri) => ClientBuilder::with_location( - CloudLocation::Custom { - uri: uri.to_string(), - account: account_name.to_string(), - }, - connection_string.storage_credentials()?, - ), - // Without a valid blob_endpoint in the connection_string, assume we are in Azure - // Commercial (AzureCloud location) and create a default Blob Storage Client that - // builds the API endpoint location using the account_name as input - None => ClientBuilder::new(account_name, connection_string.storage_credentials()?), + let client = match (connection_string, storage_account) { + (Some(connection_string_p), None) => { + let connection_string = ConnectionString::new(&connection_string_p)?; + let account_name = connection_string + .account_name + .ok_or("Account name missing in connection string")?; + + match connection_string.blob_endpoint { + // When the blob_endpoint is provided, we use the Custom CloudLocation since it is + // required to contain the full URI to the blob storage API endpoint, this means + // that account_name is not required to exist in the connection_string since + // account_name is only used with the default CloudLocation in the Azure SDK to + // generate the storage API endpoint + Some(uri) => ClientBuilder::with_location( + CloudLocation::Custom { + uri: uri.to_string(), + account: account_name.to_string(), + }, + connection_string.storage_credentials()?, + ), + // Without a valid blob_endpoint in the connection_string, assume we are in Azure + // Commercial (AzureCloud location) and create a default Blob Storage Client that + // builds the API endpoint location using the account_name as input + None => ClientBuilder::new(account_name, connection_string.storage_credentials()?), + } + .retry(RetryOptions::none()) + .container_client(container_name) + } + (None, Some(storage_account_p)) => { + let environment_credential = EnvironmentCredential::default(); + let creds = TokenCredentialInterop::new(Arc::new(environment_credential)); + let storage_credentials = StorageCredentials::token_credential(Arc::new(creds)); + + match endpoint { + // If a blob_endpoint is provided in the configuration, use it with a Custom + // CloudLocation, to allow overriding the blob storage API endpoint + Some(endpoint) => ClientBuilder::with_location( + CloudLocation::Custom { + uri: endpoint, + account: storage_account_p, + }, + storage_credentials, + ), + // Use the storage_account configuration parameter and assume we are in Azure + // Commercial (AzureCloud location) and build the blob storage API endpoint using + // the storage_account as input. + None => ClientBuilder::new(storage_account_p, storage_credentials), + } + .retry(RetryOptions::none()) + .container_client(container_name) + } + (None, None) => { + return Err( + "Either `connection_string` or `storage_account` has to be provided".into(), + ); + } + (Some(_), Some(_)) => { + return Err( + "`connection_string` and `storage_account` can't be provided at the same time" + .into(), + ); } - .retry(RetryOptions::none()) - .container_client(container_name) }; + Ok(Arc::new(client)) } diff --git a/src/sinks/azure_common/environment_credentials.rs b/src/sinks/azure_common/environment_credentials.rs new file mode 100644 index 0000000000000..b7376a66b3391 --- /dev/null +++ b/src/sinks/azure_common/environment_credentials.rs @@ -0,0 +1,105 @@ +use std::path::PathBuf; + +use azure_core::credentials::{AccessToken, TokenCredential, TokenRequestOptions}; +use azure_core::error::{Error, ErrorKind, ResultExt}; +use azure_identity::{ + ClientAssertionCredentialOptions, ClientSecretCredential, ClientSecretCredentialOptions, + TokenCredentialOptions, WorkloadIdentityCredential, WorkloadIdentityCredentialOptions, +}; + +const AZURE_TENANT_ID_ENV_KEY: &str = "AZURE_TENANT_ID"; +const AZURE_CLIENT_ID_ENV_KEY: &str = "AZURE_CLIENT_ID"; +const AZURE_FEDERATED_TOKEN_FILE: &str = "AZURE_FEDERATED_TOKEN_FILE"; +const AZURE_CLIENT_SECRET_ENV_KEY: &str = "AZURE_CLIENT_SECRET"; + +// Modified from https://docs.rs/azure_identity/0.17.0/src/azure_identity/token_credentials/environment_credentials.rs.html to work with version 0.25.0 of Azure Rust SDK + +/// Enables authentication with Workflows Identity if either `AZURE_FEDERATED_TOKEN` or `AZURE_FEDERATED_TOKEN_FILE` is set, +/// otherwise enables authentication to Azure Active Directory using client secret, or a username and password. +/// +/// +/// Details configured in the following environment variables: +/// +/// | Variable | Description | +/// |-------------------------------------|--------------------------------------------------| +/// | `AZURE_TENANT_ID` | The Azure Active Directory tenant(directory) ID. | +/// | `AZURE_CLIENT_ID` | The client(application) ID of an App Registration in the tenant. | +/// | `AZURE_CLIENT_SECRET` | A client secret that was generated for the App Registration. | +/// | `AZURE_FEDERATED_TOKEN_FILE` | Path to an federated token file. Variable is present in pods with aks workload identities. | +/// | `AZURE_AUTHORITY_HOST` | Url for the identity provider to exchange to federated token for an `access_token`. Variable is present in pods with aks workload identities. | +/// +/// This credential ultimately uses a `WorkloadIdentityCredential` or a `ClientSecretCredential` to perform the authentication using +/// these details. +/// Please consult the documentation of those classes for more details. +#[derive(Clone, Debug)] +pub struct EnvironmentCredential { + options: TokenCredentialOptions, +} + +impl Default for EnvironmentCredential { + /// Creates an instance of the `EnvironmentCredential` using the default `HttpClient`. + fn default() -> Self { + Self::new(TokenCredentialOptions::default()) + } +} + +impl EnvironmentCredential { + /// Creates a new `EnvironmentCredential`. + pub fn new(options: TokenCredentialOptions) -> Self { + Self { options } + } +} + +#[cfg_attr(target_arch = "wasm32", async_trait::async_trait(?Send))] +#[cfg_attr(not(target_arch = "wasm32"), async_trait::async_trait)] +impl TokenCredential for EnvironmentCredential { + async fn get_token( + &self, + scopes: &[&str], + options: Option, + ) -> azure_core::Result { + let tenant_id = std::env::var(AZURE_TENANT_ID_ENV_KEY) + .with_context(ErrorKind::Credential, || { + format!("missing tenant id set in {AZURE_TENANT_ID_ENV_KEY} environment variable") + })?; + let client_id = std::env::var(AZURE_CLIENT_ID_ENV_KEY) + .with_context(ErrorKind::Credential, || { + format!("missing client id set in {AZURE_CLIENT_ID_ENV_KEY} environment variable") + })?; + + let federated_token_file = std::env::var(AZURE_FEDERATED_TOKEN_FILE); + let client_secret = std::env::var(AZURE_CLIENT_SECRET_ENV_KEY); + + if let Ok(file) = federated_token_file { + if let Ok(credential) = + WorkloadIdentityCredential::new(Some(WorkloadIdentityCredentialOptions { + credential_options: ClientAssertionCredentialOptions { + credential_options: self.options.clone(), + ..Default::default() + }, + client_id: Some(client_id), + tenant_id: Some(tenant_id), + token_file_path: Some(PathBuf::from(file)), + })) + { + return credential.get_token(scopes, options).await; + } + } else if let Ok(client_secret) = client_secret { + if let Ok(credential) = ClientSecretCredential::new( + &tenant_id, + client_id, + client_secret.into(), + Some(ClientSecretCredentialOptions { + credential_options: self.options.clone(), + }), + ) { + return credential.get_token(scopes, options).await; + } + } + + Err(Error::message( + ErrorKind::Credential, + "no valid environment credential providers", + )) + } +} diff --git a/src/sinks/azure_common/mod.rs b/src/sinks/azure_common/mod.rs index 4d1c931977f03..0d8c6badff427 100644 --- a/src/sinks/azure_common/mod.rs +++ b/src/sinks/azure_common/mod.rs @@ -1,3 +1,5 @@ +pub mod azure_credential_interop; pub mod config; +pub mod environment_credentials; pub mod service; pub mod sink;