diff --git a/Cargo.lock b/Cargo.lock index bb5478d575f5c..b987c66fdd9e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12413,6 +12413,16 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "users" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24cc0f6d6f267b73e5a2cadf007ba8f9bc39c6a6f9666f8cf25ea809a153b032" +dependencies = [ + "libc", + "log", +] + [[package]] name = "utf-8" version = "0.7.6" @@ -12707,6 +12717,7 @@ dependencies = [ "tracing-tower", "typetag", "url", + "users", "uuid", "vector-common", "vector-config", diff --git a/Cargo.toml b/Cargo.toml index 26e0f5b355f4c..d23c607c2edb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -444,6 +444,7 @@ windows-service = "0.8.0" [target.'cfg(unix)'.dependencies] nix = { version = "0.26.2", default-features = false, features = ["socket", "signal"] } +users = "0.11.0" [target.'cfg(target_os = "linux")'.dependencies] netlink-packet-utils = "0.5.2" diff --git a/changelog.d/24440_add_file_owner_and_group.feature.md b/changelog.d/24440_add_file_owner_and_group.feature.md new file mode 100644 index 0000000000000..dea6e3a06b0be --- /dev/null +++ b/changelog.d/24440_add_file_owner_and_group.feature.md @@ -0,0 +1,3 @@ +Include file's owner and group in the file source namespace (UNIX only). + +authors: Hiruma31 diff --git a/src/sources/file.rs b/src/sources/file.rs index 62a95db362f10..8239a39d8b562 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -43,6 +43,10 @@ use crate::{ shutdown::ShutdownSignal, }; +/// UNIX specific imports for file ownership retrieval +#[cfg(unix)] +use std::os::unix::fs::MetadataExt; + #[derive(Debug, Snafu)] enum BuildError { #[snafu(display( @@ -148,6 +152,20 @@ pub struct FileConfig { #[configurable(metadata(docs::examples = "offset"))] pub offset_key: Option, + /// Overrides the name of the log field used to add the file owner to each event. + /// + /// The value is the owner (user) of the file where the event was read. + #[serde(default)] + #[configurable(metadata(docs::examples = "owner"))] + pub owner_key: Option, + + /// Overrides the name of the log field used to add the file owner group to each event. + /// + /// The value is the group owner of the file where the event was read. + #[serde(default)] + #[configurable(metadata(docs::examples = "group"))] + pub group_key: Option, + /// The delay between file discovery calls. /// /// This controls the interval at which files are searched. A higher value results in greater @@ -369,6 +387,8 @@ impl Default for FileConfig { ignore_not_found: false, host_key: None, offset_key: None, + owner_key: None, + group_key: None, data_dir: None, glob_minimum_cooldown_ms: default_glob_minimum_cooldown_ms(), message_start_indicator: None, @@ -444,6 +464,18 @@ impl SourceConfig for FileConfig { .and_then(|k| k.path) .map(LegacyKey::Overwrite); + let owner_key = self + .owner_key + .clone() + .and_then(|k| k.path) + .map(LegacyKey::Overwrite); + + let group_key = self + .group_key + .clone() + .and_then(|k| k.path) + .map(LegacyKey::Overwrite); + let schema_definition = BytesDeserializerConfig .schema_definition(global_log_namespace.merge(self.log_namespace)) .with_standard_vector_source_metadata() @@ -467,6 +499,20 @@ impl SourceConfig for FileConfig { &owned_value_path!("path"), Kind::bytes(), None, + ) + .with_source_metadata( + Self::NAME, + owner_key, + &owned_value_path!("owner"), + Kind::bytes().or_undefined(), + None, + ) + .with_source_metadata( + Self::NAME, + group_key, + &owned_value_path!("group"), + Kind::bytes().or_undefined(), + None, ); vec![SourceOutput::new_maybe_logs( @@ -560,6 +606,8 @@ pub fn file_source( hostname: crate::get_hostname().ok(), file_key: config.file_key.clone().path, offset_key: config.offset_key.clone().and_then(|k| k.path), + owner_key: config.owner_key.clone().and_then(|k| k.path), + group_key: config.group_key.clone().and_then(|k| k.path), }; let include = config.include.clone(); @@ -724,6 +772,32 @@ fn reconcile_position_options( } } +#[cfg(unix)] +fn get_file_ownership(file_path: &str) -> (Option, Option) { + use std::fs::metadata; + + match metadata(file_path) { + Ok(meta) => { + let uid = meta.uid(); + let gid = meta.gid(); + + let owner = + users::get_user_by_uid(uid).map(|user| user.name().to_string_lossy().into_owned()); + + let group = + users::get_group_by_gid(gid).map(|grp| grp.name().to_string_lossy().into_owned()); + + (owner, group) + } + Err(_) => (None, None), + } +} + +#[cfg(not(unix))] +fn get_file_ownership(_file_path: &str) -> (Option, Option) { + (None, None) +} + fn wrap_with_line_agg( rx: impl Stream + Send + std::marker::Unpin + 'static, config: line_agg::Config, @@ -759,6 +833,8 @@ struct EventMetadata { hostname: Option, file_key: Option, offset_key: Option, + owner_key: Option, + group_key: Option, } fn create_event( @@ -815,6 +891,30 @@ fn create_event( file, ); + let (owner, group) = get_file_ownership(file); + + if let Some(owner_name) = owner { + let legacy_owner_key = meta.owner_key.as_ref().map(LegacyKey::Overwrite); + log_namespace.insert_source_metadata( + FileConfig::NAME, + &mut event, + legacy_owner_key, + path!("owner"), + owner_name, + ); + } + + if let Some(group_name) = group { + let legacy_group_key = meta.group_key.as_ref().map(LegacyKey::Overwrite); + log_namespace.insert_source_metadata( + FileConfig::NAME, + &mut event, + legacy_group_key, + path!("group"), + group_name, + ); + } + emit!(FileEventsReceived { count: 1, file,