Skip to content

fix(queries): eliminate double-escaping in regex patterns #541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions aw-client-rust/src/classes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use log::warn;
use rand::Rng;
use serde::{Deserialize, Serialize};
use serde_json;

use super::blocking::AwClient as ActivityWatchClient;

Expand All @@ -14,15 +15,20 @@ pub type CategoryId = Vec<String>;
pub struct CategorySpec {
#[serde(rename = "type")]
pub spec_type: String,
#[serde(default)]
pub regex: String,
#[serde(default)]
pub ignore_case: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClassSetting {
#[serde(default)]
pub id: Option<i32>,
pub name: Vec<String>,
pub rule: CategorySpec,
#[serde(default)]
pub data: Option<serde_json::Value>,
}

/// Returns the default categorization classes
Expand Down Expand Up @@ -173,11 +179,16 @@ pub fn get_classes_from_server(host: &str, port: u16) -> Vec<(CategoryId, Catego
return default_classes();
}

let class_settings: Vec<ClassSetting> = serde_json::from_value(setting_value)
.unwrap_or_else(|_| {
warn!("Failed to deserialize classes setting, using default classes");
return vec![];
});
let class_settings: Vec<ClassSetting> = match serde_json::from_value(setting_value) {
Ok(classes) => classes,
Err(e) => {
warn!(
"Failed to deserialize classes setting: {}, using default classes",
e
);
return default_classes();
}
};

// Convert ClassSetting to (CategoryId, CategorySpec) format
class_settings
Expand Down
2 changes: 2 additions & 0 deletions aw-client-rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ impl AwClient {
.map(|(start, stop)| format!("{}/{}", start, stop))
.collect();

let query_lines: Vec<&str> = query.split('\n').collect();

// Result is a sequence, one element per timeperiod
self.client
.post(url)
Expand Down
77 changes: 48 additions & 29 deletions aw-client-rust/src/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,21 +165,40 @@ impl QueryParams {
}

/// Helper function to serialize classes in the format expected by the categorize function
/// This version builds the query string directly without JSON serialization to avoid double-escaping
fn serialize_classes(classes: &[ClassRule]) -> String {
// Convert Vec<(CategoryId, CategorySpec)> to the JSON format expected by categorize
let serialized_classes: Vec<(Vec<String>, serde_json::Value)> = classes
.iter()
.map(|(category_id, category_spec)| {
let spec_json = serde_json::json!({
"type": category_spec.spec_type,
"regex": category_spec.regex,
"ignore_case": category_spec.ignore_case
});
(category_id.clone(), spec_json)
})
.collect();

serde_json::to_string(&serialized_classes).unwrap_or_else(|_| "[]".to_string())
let mut parts = Vec::new();

for (category_id, category_spec) in classes {
// Build category array string manually: ["Work", "Programming"]
let category_str = format!(
"[{}]",
category_id
.iter()
.map(|s| format!("\"{}\"", s))
.collect::<Vec<_>>()
.join(", ")
);

// Build spec object manually to avoid JSON escaping regex patterns
let mut spec_parts = Vec::new();
spec_parts.push(format!("\"type\": \"{}\"", category_spec.spec_type));

// Only include regex for non-"none" types, and use raw pattern without escaping
if category_spec.spec_type != "none" {
spec_parts.push(format!("\"regex\": \"{}\"", category_spec.regex));
}

// Always include ignore_case field
spec_parts.push(format!("\"ignore_case\": {}", category_spec.ignore_case));

let spec_str = format!("{{{}}}", spec_parts.join(", "));

// Build the tuple [category, spec]
parts.push(format!("[{}, {}]", category_str, spec_str));
}

format!("[{}]", parts.join(", "))
}

fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
Expand All @@ -195,7 +214,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
if params.base.filter_afk {
query.push(format!(
"not_afk = flood(query_bucket(find_bucket(\"{}\")));
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
escape_doublequote(&params.bid_afk)
));
}
Expand All @@ -207,7 +226,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
if params.base.include_audible {
query.push(
"audible_events = filter_keyvals(browser_events, \"audible\", [true]);
not_afk = period_union(not_afk, audible_events)"
not_afk = period_union(not_afk, audible_events)"
.to_string(),
);
}
Expand All @@ -221,7 +240,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
// Add categorization if classes specified
if !params.base.classes.is_empty() {
query.push(format!(
"events = categorize(events, {})",
"events = categorize(events, {});",
serialize_classes(&params.base.classes)
));
}
Expand Down Expand Up @@ -252,7 +271,7 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
// Add categorization if classes specified
if !params.base.classes.is_empty() {
query.push(format!(
"events = categorize(events, {})",
"events = categorize(events, {});",
serialize_classes(&params.base.classes)
));
}
Expand All @@ -269,26 +288,26 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
}

fn build_browser_events(params: &DesktopQueryParams) -> String {
let mut query = String::from("browser_events = [];\n");
let mut query = String::from("browser_events = [];");

for browser_bucket in &params.base.bid_browsers {
for (browser_name, app_names) in BROWSER_APPNAMES.entries() {
if browser_bucket.contains(browser_name) {
query.push_str(&format!(
"events_{0} = flood(query_bucket(\"{1}\"));
window_{0} = filter_keyvals(events, \"app\", {2});
events_{0} = filter_period_intersect(events_{0}, window_{0});
events_{0} = split_url_events(events_{0});
browser_events = concat(browser_events, events_{0});
browser_events = sort_by_timestamp(browser_events);\n",
"
events_{0} = flood(query_bucket(\"{1}\"));
window_{0} = filter_keyvals(events, \"app\", {2});
events_{0} = filter_period_intersect(events_{0}, window_{0});
events_{0} = split_url_events(events_{0});
browser_events = concat(browser_events, events_{0});
browser_events = sort_by_timestamp(browser_events)",
browser_name,
escape_doublequote(browser_bucket),
serde_json::to_string(app_names).unwrap()
));
}
}
}

query
}

Expand Down Expand Up @@ -414,9 +433,9 @@ mod tests {
assert!(serialized.contains("Programming"));
assert!(serialized.contains("Google Docs"));
assert!(serialized.contains("GitHub|vim"));
assert!(serialized.contains("\"type\":\"regex\""));
assert!(serialized.contains("\"ignore_case\":false"));
assert!(serialized.contains("\"ignore_case\":true"));
assert!(serialized.contains("\"type\": \"regex\""));
assert!(serialized.contains("\"ignore_case\": false"));
assert!(serialized.contains("\"ignore_case\": true"));
}

#[test]
Expand Down
Loading