Skip to content

Commit 69f23d6

Browse files
authored
Logging to directory + log file management; default to log directory for hf_xet (#502)
This PR switches the default logging to log events to a file in '~/.cache/huggingface/xet/logs' (or 'xet/logs' under the specified cache directory if not `~/.cache/huggingface/`). In this directory, log files older than 2 weeks are cleaned up on process start, and if the total size of files in the directory is larger than 1gb, then log files are deleted by age to get the directory size under 1gb. Log files are named with a timestamp and PID; by default, logs newer than 1 day or logs with an active associated PID are never deleted. All of these are user configurable constants.
1 parent 2eec20b commit 69f23d6

File tree

17 files changed

+1339
-179
lines changed

17 files changed

+1339
-179
lines changed

Cargo.lock

Lines changed: 461 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ members = [
1616
"merklehash",
1717
"progress_tracking",
1818
"utils",
19+
"xet_logging",
1920
"xet_runtime",
2021
]
2122

@@ -94,6 +95,7 @@ serde_repr = "0.1"
9495
sha2 = "0.10"
9596
shellexpand = "3.1"
9697
static_assertions = "1.1"
98+
sysinfo = "0.37"
9799
tempfile = "3.20"
98100
thiserror = "2.0"
99101
tokio = { version = "1.47" }

data/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ async-trait = { workspace = true }
4444
bytes = { workspace = true }
4545
chrono = { workspace = true }
4646
clap = { workspace = true }
47-
dirs = { workspace = true }
4847
jsonwebtoken = { workspace = true }
4948
lazy_static = { workspace = true }
5049
more-asserts = { workspace = true }
@@ -72,6 +71,7 @@ sha2 = { workspace = true }
7271
serial_test = { workspace = true }
7372
tracing-test = { workspace = true }
7473
ctor = { workspace = true }
74+
dirs = { workspace = true }
7575

7676
[features]
7777
strict = []

data/src/data_client.rs

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::env;
2-
use std::env::current_dir;
31
use std::fs::File;
42
use std::io::Read;
53
use std::path::{Path, PathBuf};
@@ -9,15 +7,13 @@ use cas_client::remote_client::PREFIX_DEFAULT;
97
use cas_client::{CHUNK_CACHE_SIZE_BYTES, CacheConfig, FileProvider, OutputProvider};
108
use cas_object::CompressionScheme;
119
use deduplication::DeduplicationMetrics;
12-
use dirs::home_dir;
1310
use progress_tracking::TrackingProgressUpdater;
1411
use progress_tracking::item_tracking::ItemProgressUpdater;
1512
use tracing::{Instrument, Span, info, info_span, instrument};
1613
use ulid::Ulid;
1714
use utils::auth::{AuthConfig, TokenRefresher};
18-
use utils::normalized_path_from_user_string;
1915
use xet_runtime::utils::run_constrained_with_semaphore;
20-
use xet_runtime::{GlobalSemaphoreHandle, XetRuntime, global_semaphore_handle};
16+
use xet_runtime::{GlobalSemaphoreHandle, XetRuntime, global_semaphore_handle, xet_cache_root};
2117

2218
use crate::configurations::*;
2319
use crate::constants::{INGESTION_BLOCK_SIZE, MAX_CONCURRENT_DOWNLOADS};
@@ -35,34 +31,7 @@ pub fn default_config(
3531
token_info: Option<(String, u64)>,
3632
token_refresher: Option<Arc<dyn TokenRefresher>>,
3733
) -> errors::Result<TranslatorConfig> {
38-
// if HF_HOME is set use that instead of ~/.cache/huggingface
39-
// if HF_XET_CACHE is set use that instead of ~/.cache/huggingface/xet
40-
// HF_XET_CACHE takes precedence over HF_HOME
41-
let cache_root_path = {
42-
// If HF_XET_CACHE is set, use that directly.
43-
if let Ok(cache) = env::var("HF_XET_CACHE") {
44-
normalized_path_from_user_string(cache)
45-
46-
// If HF_HOME is set, use the $HF_HOME/xet
47-
} else if let Ok(hf_home) = env::var("HF_HOME") {
48-
normalized_path_from_user_string(hf_home).join("xet")
49-
50-
// If XDG_CACHE_HOME is set, use the $XDG_CACHE_HOME/huggingface/xet, otherwise
51-
// use $HOME/.cache/huggingface/xet
52-
} else if let Ok(xdg_cache_home) = env::var("XDG_CACHE_HOME") {
53-
normalized_path_from_user_string(xdg_cache_home).join("huggingface").join("xet")
54-
55-
// Use the same default as huggingface_hub, ~/.cache/huggingface/xet (slightly nonstandard, but won't
56-
// mess with it).
57-
} else {
58-
home_dir()
59-
.unwrap_or(current_dir()?)
60-
.join(".cache")
61-
.join("huggingface")
62-
.join("xet")
63-
}
64-
};
65-
34+
let cache_root_path = xet_cache_root();
6635
info!("Using cache path {cache_root_path:?}.");
6736

6837
let (token, token_expiration) = token_info.unzip();
@@ -287,6 +256,7 @@ async fn smudge_file(
287256

288257
#[cfg(test)]
289258
mod tests {
259+
use dirs::home_dir;
290260
use serial_test::serial;
291261
use tempfile::tempdir;
292262
use utils::EnvVarGuard;

hf_xet/Cargo.lock

Lines changed: 131 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hf_xet/Cargo.toml

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ error_printer = { path = "../error_printer" }
1616
progress_tracking = { path = "../progress_tracking" }
1717
utils = { path = "../utils" }
1818
xet_runtime = { path = "../xet_runtime" }
19+
xet_logging = { path = "../xet_logging" }
1920

2021
async-trait = "0.1"
2122
bipbuffer = "0.1"
@@ -36,15 +37,6 @@ rand = "0.9.2"
3637
serde = { version = "1", features = ["derive"] }
3738
serde_json = "1"
3839
tracing = "0.1"
39-
tracing-subscriber = { version = "0.3", features = [
40-
"json",
41-
"tracing-log",
42-
"env-filter",
43-
"registry",
44-
] }
45-
tracing-appender = "0.2"
46-
47-
console-subscriber = { version = "0.4.1", optional = true }
4840

4941
# Unix-specific dependencies
5042
[target.'cfg(unix)'.dependencies]
@@ -58,7 +50,7 @@ ctrlc = "3.4"
5850
native-tls = ["cas_client/native-tls-vendored"]
5951
native-tls-vendored = ["cas_client/native-tls-vendored"]
6052
profiling = ["pprof"]
61-
tokio-console = ["dep:console-subscriber"]
53+
tokio-console = ["xet_logging/tokio-console"]
6254

6355
[profile.release]
6456
split-debuginfo = "packed"

0 commit comments

Comments
 (0)