Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
1f9a3eb
feat: Introduce an anonymous usage telemetry system with a dedicated …
Nathan903 Mar 6, 2026
fe9c572
remove server code
Nathan903 Mar 6, 2026
8e09801
fix cargo test failure
Nathan903 Mar 6, 2026
f52fd36
chore: update Cargo.lock.
Nathan903 Mar 6, 2026
ece60a0
refactor: streamline imports in telemetry module; update user docs to…
Nathan903 Mar 6, 2026
2d36ab0
chore: update version to 0.2.0-dev.2 in Helm chart
Nathan903 Mar 6, 2026
328432d
refactor: rust linter
Nathan903 Mar 6, 2026
88a6565
refactor: py linter
Nathan903 Mar 6, 2026
2ae4119
Merge branch 'main' into 2024-anonymous-usage-telemetry-framework-for…
Nathan903 Mar 6, 2026
03dca64
feat: Implement graceful shutdown for the telemetry background task u…
Nathan903 Mar 6, 2026
b17a4ff
Revert "feat: Implement graceful shutdown for the telemetry backgroun…
Nathan903 Mar 6, 2026
807bcb6
Reorder router initialization to occur before telemetry setup.
Nathan903 Mar 6, 2026
72b77d4
fix: Resolve CLP_VERSION file path using `_clp_home` instead of `logs…
Nathan903 Mar 6, 2026
ffa6dda
fix: make `CLP_DISABLE_TELEMETRY` environment variable check case-ins…
Nathan903 Mar 6, 2026
5da85cc
fix: replace existing telemetry configuration block or append a new o…
Nathan903 Mar 6, 2026
e997a5e
refactor: separate `CLP_HOST_ARCH` export from its assignment.
Nathan903 Mar 6, 2026
ca531c5
docs: add GitHub links to telemetry client, consent prompt, and serve…
Nathan903 Mar 6, 2026
07d9959
fix: improve telemetry config detection by anchoring grep pattern to …
Nathan903 Mar 6, 2026
cfc2f8a
Update Docker Compose project name to use the full instance ID.
Nathan903 Mar 6, 2026
2e7cc5a
build: Add `rust-version = "1.85"` to `Cargo.toml` files for api-serv…
Nathan903 Mar 6, 2026
2a141fa
refactor: Strip whitespace from read instance IDs and generate 4-char…
Nathan903 Mar 6, 2026
0ac9335
fix rust lint
Nathan903 Mar 6, 2026
fb32994
feat: Add DO_NOT_TRACK environment variable to telemetry configuration
Nathan903 Mar 6, 2026
6e46a65
fix: Handle empty input for telemetry consent prompt
Nathan903 Mar 6, 2026
0d1339b
feat: Enhance telemetry consent logic to support configurable logs di…
Nathan903 Mar 6, 2026
7c2a4d6
fix: Update instance ID generation to use full UUID instead of trunca…
Nathan903 Mar 6, 2026
0404d61
fix: Update instance ID validation to accept full UUIDs and 4-charact…
Nathan903 Mar 6, 2026
8351497
Merge branch 'main' into 2024-anonymous-usage-telemetry-framework-for…
Nathan903 Mar 6, 2026
bbaafad
chore: Bump chart version to 0.2.0-dev.3
Nathan903 Mar 6, 2026
0ba0b26
Merge branch 'main' into 2024-anonymous-usage-telemetry-framework-for…
Nathan903 Mar 6, 2026
7d34e8c
Merge branch 'main' into 2024-anonymous-usage-telemetry-framework-for…
junhaoliao Mar 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 267 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions components/api-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,8 @@ thiserror = "2.0.18"
tokio = { version = "1.49.0", features = ["full"] }
tower-http = { version = "0.6.8", features = ["cors"] }
tracing = "0.1.44"
uuid = { version = "1", features = ["v4"] }
utoipa = { version = "5.4.0", features = ["axum_extras"] }
utoipa-axum = "0.2.0"
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
chrono = { version = "0.4", features = ["serde"] }
3 changes: 3 additions & 0 deletions components/api-server/src/bin/api_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ async fn main() -> anyhow::Result<()> {

let router = api_server::routes::from_client(client)?;

// Spawn telemetry background task (non-blocking, failures are silent)
tokio::spawn(api_server::telemetry::run_telemetry_loop(config));

tracing::info!("Server started at {addr}");
axum::serve(listener, router)
.with_graceful_shutdown(shutdown_signal())
Expand Down
1 change: 1 addition & 0 deletions components/api-server/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod client;
mod error;
pub mod routes;
pub mod telemetry;
144 changes: 144 additions & 0 deletions components/api-server/src/telemetry.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
use std::{env, time::Duration};

use chrono::Utc;
use clp_rust_utils::clp_config::package::config::Config;
use serde::Serialize;

const TELEMETRY_ENDPOINT: &str = "https://telemetry.yscope.io/v1/events";
const TELEMETRY_SEND_INTERVAL: Duration = Duration::from_hours(24); // 24 hours
const TELEMETRY_HTTP_TIMEOUT: Duration = Duration::from_secs(5);

/// Schema version for the telemetry payload.
const SCHEMA_VERSION: u32 = 1;

#[derive(Debug, Serialize)]
struct TelemetryEvent {
schema_version: u32,
telemetry_id: String,
timestamp: String,
event_type: String,
clp_version: String,
deployment_method: String,
os: String,
os_version: String,
arch: String,
storage_engine: String,
#[serde(skip_serializing_if = "Option::is_none")]
payload: Option<serde_json::Value>,
}

/// Checks whether telemetry is disabled through any of the supported mechanisms.
fn is_telemetry_disabled(config: &Config) -> bool {
// Check config file setting
if config.telemetry.disable {
return true;
}

// Check CLP_DISABLE_TELEMETRY env var
if let Ok(val) = env::var("CLP_DISABLE_TELEMETRY")
&& (val.eq_ignore_ascii_case("true") || val == "1")
{
return true;
}

// Check DO_NOT_TRACK env var (https://consoledonottrack.com/)
if let Ok(val) = env::var("DO_NOT_TRACK")
&& val == "1"
{
return true;
}

false
}

const fn get_storage_engine_str(config: &Config) -> &'static str {
match config.package.storage_engine {
clp_rust_utils::clp_config::package::config::StorageEngine::Clp => "clp",
clp_rust_utils::clp_config::package::config::StorageEngine::ClpS => "clp-s",
}
}

fn build_event(event_type: &str, config: &Config) -> TelemetryEvent {
let telemetry_id = env::var("CLP_INSTANCE_ID").unwrap_or_else(|_| "unknown".to_owned());
let clp_version = env::var("CLP_VERSION").unwrap_or_else(|_| "unknown".to_owned());
let deployment_method =
env::var("CLP_DEPLOYMENT_METHOD").unwrap_or_else(|_| "unknown".to_owned());
let os = env::var("CLP_HOST_OS").unwrap_or_else(|_| std::env::consts::OS.to_owned());
let os_version = env::var("CLP_HOST_OS_VERSION").unwrap_or_else(|_| "unknown".to_owned());
let arch = env::var("CLP_HOST_ARCH").unwrap_or_else(|_| std::env::consts::ARCH.to_owned());

TelemetryEvent {
schema_version: SCHEMA_VERSION,
telemetry_id,
timestamp: Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string(),
event_type: event_type.to_owned(),
clp_version,
deployment_method,
os,
os_version,
arch,
storage_engine: get_storage_engine_str(config).to_owned(),
payload: None,
}
}

async fn send_event(client: &reqwest::Client, event: &TelemetryEvent) {
let is_debug =
env::var("CLP_TELEMETRY_DEBUG").is_ok_and(|v| v.eq_ignore_ascii_case("true") || v == "1");

if is_debug {
match serde_json::to_string_pretty(event) {
Ok(json) => tracing::info!("[telemetry-debug] Would send:\n{json}"),
Err(e) => tracing::warn!("[telemetry-debug] Failed to serialize event: {e}"),
}
return;
}

match client.post(TELEMETRY_ENDPOINT).json(event).send().await {
Ok(resp) => {
tracing::debug!("Telemetry event sent, status: {}", resp.status());
}
Err(e) => {
tracing::debug!("Failed to send telemetry event (this is not an error): {e}");
}
}
}

/// Runs the telemetry background loop. Sends a `deployment_start` event on startup
/// and a `heartbeat` event every 24 hours. All failures are silently ignored.
///
/// This function is designed to be spawned as a background tokio task:
/// ```ignore
/// tokio::spawn(telemetry::run_telemetry_loop(config));
/// ```
pub async fn run_telemetry_loop(config: Config) {
if is_telemetry_disabled(&config) {
tracing::info!("Anonymous telemetry is disabled.");
return;
}

tracing::info!("Anonymous telemetry is enabled. Set CLP_DISABLE_TELEMETRY=true to disable.");

let client = match reqwest::Client::builder()
.timeout(TELEMETRY_HTTP_TIMEOUT)
.build()
{
Ok(c) => c,
Err(e) => {
tracing::debug!("Failed to create telemetry HTTP client: {e}");
return;
}
};

// Send deployment_start event
let start_event = build_event("deployment_start", &config);
send_event(&client, &start_event).await;

// Periodic heartbeat
loop {
tokio::time::sleep(TELEMETRY_SEND_INTERVAL).await;

let heartbeat_event = build_event("heartbeat", &config);
send_event(&client, &heartbeat_event).await;
}
}
28 changes: 27 additions & 1 deletion components/clp-package-utils/clp_package_utils/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,32 @@ def _set_up_env_for_api_server(self) -> EnvVarsDict:
"CLP_API_SERVER_PORT": str(self._clp_config.api_server.port),
}

# Telemetry env vars
instance_id_file = self._clp_config.logs_directory / "instance-id"
resolved_id_file = resolve_host_path_in_container(instance_id_file)
if resolved_id_file.exists():
with resolved_id_file.open("r") as f:
env_vars["CLP_INSTANCE_ID"] = f.readline().strip()

version_file = resolve_host_path_in_container(self._clp_home / "VERSION")
if version_file.exists():
with version_file.open("r") as f:
env_vars["CLP_VERSION"] = f.read().strip()

env_vars["CLP_DEPLOYMENT_METHOD"] = "docker-compose"

# Pass through host OS info (set by start-clp.sh)
for var in (
"CLP_HOST_OS",
"CLP_HOST_OS_VERSION",
"CLP_HOST_ARCH",
"CLP_DISABLE_TELEMETRY",
"CLP_TELEMETRY_DEBUG",
):
val = os.environ.get(var)
if val is not None:
env_vars[var] = val

return env_vars

def _set_up_env_for_log_ingestor(self) -> EnvVarsDict:
Expand Down Expand Up @@ -1155,7 +1181,7 @@ def get_or_create_instance_id(clp_config: ClpConfig) -> str:
with open(resolved_instance_id_file_path, "r") as f:
instance_id = f.readline()
else:
instance_id = str(uuid.uuid4())[-4:]
instance_id = str(uuid.uuid4())
with open(resolved_instance_id_file_path, "w") as f:
f.write(instance_id)

Expand Down
5 changes: 5 additions & 0 deletions components/clp-py-utils/clp_py_utils/clp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,10 @@ def _get_env_var(name: str) -> str:
return value


class Telemetry(BaseModel):
disable: bool = False


class ClpConfig(BaseModel):
container_image_ref: NonEmptyStr | None = None

Expand Down Expand Up @@ -840,6 +844,7 @@ class ClpConfig(BaseModel):
logs_directory: SerializablePath = CLP_DEFAULT_LOG_DIRECTORY_PATH
tmp_directory: SerializablePath = CLP_DEFAULT_TMP_DIRECTORY_PATH
aws_config_directory: SerializablePath | None = None
telemetry: Telemetry = Telemetry()

_container_image_id_path: SerializablePath = PrivateAttr(
default=CLP_PACKAGE_CONTAINER_IMAGE_ID_PATH
Expand Down
9 changes: 9 additions & 0 deletions components/clp-rust-utils/src/clp_config/package/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub struct Config {
pub stream_output: StreamOutput,
pub logs_input: LogsInput,
pub archive_output: ArchiveOutput,
pub telemetry: Telemetry,
}

impl Default for Config {
Expand All @@ -37,6 +38,7 @@ impl Default for Config {
config: FsIngestion::default(),
},
archive_output: ArchiveOutput::default(),
telemetry: Telemetry::default(),
}
}
}
Expand Down Expand Up @@ -316,6 +318,13 @@ pub enum LogsInput {
},
}

/// Mirror of `clp_py_utils.clp_config.Telemetry`.
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq)]
#[serde(default)]
pub struct Telemetry {
pub disable: bool,
}

#[cfg(test)]
mod tests {
use super::LogsInput;
Expand Down
2 changes: 1 addition & 1 deletion components/clp-rust-utils/src/database/mysql.rs
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not sure if this change is correct.
Without this addition of ,ignore, running cargo test --workspace would fail

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::clp_config::package::{
///
/// # Examples
///
/// ```rust
/// ```rust,ignore
/// impl_sqlx_type!(IngestedS3ObjectMetadataStatus => str);
/// ```
#[macro_export]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,8 @@
#
## Location of the AWS tools' config files (e.g., `~/.aws`)
#aws_config_directory: null
#
## Anonymous usage telemetry. Set to true to disable.
## See: https://docs.yscope.com/clp/main/user-guide/telemetry
#telemetry:
# disable: false
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,8 @@ log_ingestor: null
#
## Location of the AWS tools' config files (e.g., `~/.aws`)
#aws_config_directory: null
#
## Anonymous usage telemetry. Set to true to disable.
## See: https://docs.yscope.com/clp/main/user-guide/telemetry
#telemetry:
# disable: false
86 changes: 86 additions & 0 deletions components/package-template/src/sbin/start-clp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,94 @@ common_env_path="$script_dir/.common-env.sh"
# shellcheck source=.common-env.sh
source "$common_env_path"

# --- Telemetry consent prompt ---
# Determines whether to show the first-run telemetry consent prompt.
# The prompt is skipped if:
# 1. CLP_DISABLE_TELEMETRY or DO_NOT_TRACK env vars are set
# 2. telemetry.disable is explicitly set in clp-config.yaml
# 3. instance-id file already exists (not a first run)
# If shown and the user declines, telemetry.disable is written to clp-config.yaml.

telemetry_prompt_needed=false

# Find the config file path (mirror the default in start_clp.py)
clp_config_path="${CLP_HOME}/etc/clp-config.yaml"

# Check env vars first
clp_disable_telemetry_lower="${CLP_DISABLE_TELEMETRY:-}"
clp_disable_telemetry_lower="${clp_disable_telemetry_lower,,}"
if [[ "$clp_disable_telemetry_lower" == "true" ]] || [[ "$clp_disable_telemetry_lower" == "1" ]]; then
telemetry_prompt_needed=false
elif [[ "${DO_NOT_TRACK:-}" == "1" ]]; then
telemetry_prompt_needed=false
# Check if telemetry is already configured in the config file
elif [[ -f "$clp_config_path" ]] && grep -q -E '^telemetry:' "$clp_config_path" 2>/dev/null; then
telemetry_prompt_needed=false
# Check if instance-id exists (not first run)
elif [[ -f "${CLP_HOME}/var/log/instance-id" ]]; then
telemetry_prompt_needed=false
else
telemetry_prompt_needed=true
fi

if [[ "$telemetry_prompt_needed" == "true" ]]; then
if [[ -t 0 ]]; then
# Interactive: show the consent prompt
echo "================================================================================"
echo "CLP collects anonymous usage telemetry to help improve the software."
echo "This includes: CLP version, OS/architecture, deployment method, and"
echo "component health status. It does NOT include: log content, queries,"
echo "hostnames, IP addresses, or any personally identifiable"
echo "information."
echo ""
echo "Telemetry is sent to: https://telemetry.yscope.io"
echo "For details, see: https://docs.yscope.com/clp/main/user-guide/telemetry"
echo ""
echo "You can disable telemetry at any time by setting CLP_DISABLE_TELEMETRY=true"
echo "or by blocking https://telemetry.yscope.io at the network level."
echo ""
read -r -p "Enable anonymous telemetry to help improve CLP? [Y/n] " telemetry_response
echo "================================================================================"

if [[ "$telemetry_response" =~ ^[Nn]$ ]]; then
# User opted out — persist to config
if [[ -f "$clp_config_path" ]]; then
if grep -q "^telemetry:" "$clp_config_path" 2>/dev/null; then
# Replace existing telemetry block (key + indented lines)
sed -i '/^telemetry:/,/^[^[:space:]]/{/^telemetry:/!{/^[^[:space:]]/!d};}' \
"$clp_config_path"
sed -i 's/^telemetry:.*/telemetry:\n disable: true/' "$clp_config_path"
else
# Append telemetry block with grouped redirect
{
echo ""
echo "telemetry:"
echo " disable: true"
} >> "$clp_config_path"
fi
else
printf "telemetry:\n disable: true\n" > "$clp_config_path"
fi
echo "Telemetry has been disabled. You can re-enable it in ${clp_config_path}."
fi
fi
# Non-interactive: default to enabled (no prompt, no config write needed)
fi

# --- Export host OS info for telemetry ---
export CLP_HOST_OS="linux"
if [[ -f /etc/os-release ]]; then
CLP_HOST_OS_VERSION="$(. /etc/os-release && echo "${ID:-unknown}-${VERSION_ID:-unknown}")"
else
CLP_HOST_OS_VERSION="unknown"
fi
export CLP_HOST_OS_VERSION
export CLP_HOST_ARCH
CLP_HOST_ARCH="$(uname -m)"

docker compose -f "$CLP_HOME/docker-compose.runtime.yaml" \
run --rm "${CLP_COMPOSE_RUN_EXTRA_FLAGS[@]}" clp-runtime \
python3 \
-m clp_package_utils.scripts.start_clp \
"$@"

Loading
Loading