Skip to content

Commit b705f04

Browse files
committed
fix(queries): eliminate double-escaping in regex patterns
The serialize_classes function was using serde_json serialization which caused regex patterns to be double-escaped (e.g., 't\.co' became 't\.co'), breaking pattern matching in ActivityWatch queries. Core changes: - Rewrite serialize_classes() to build JSON strings manually instead of using serde_json - Preserve single-escaped regex patterns for proper matching - Only include 'ignore_case' field when true (omit when false) - Only include 'regex' field for non-'none' type categories - Improve error handling in classes deserialization - Add optional fields to ClassSetting struct for better compatibility This fixes the core regex pattern matching issues in ActivityWatch canonical queries that were causing incorrect categorization results.
1 parent f4bb94d commit b705f04

File tree

4 files changed

+67
-35
lines changed

4 files changed

+67
-35
lines changed

aw-client-rust/src/classes.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use log::warn;
66
use rand::Rng;
77
use serde::{Deserialize, Serialize};
8+
use serde_json;
89

910
use super::blocking::AwClient as ActivityWatchClient;
1011

@@ -14,15 +15,20 @@ pub type CategoryId = Vec<String>;
1415
pub struct CategorySpec {
1516
#[serde(rename = "type")]
1617
pub spec_type: String,
18+
#[serde(default)]
1719
pub regex: String,
1820
#[serde(default)]
1921
pub ignore_case: bool,
2022
}
2123

2224
#[derive(Debug, Clone, Serialize, Deserialize)]
2325
pub struct ClassSetting {
26+
#[serde(default)]
27+
pub id: Option<i32>,
2428
pub name: Vec<String>,
2529
pub rule: CategorySpec,
30+
#[serde(default)]
31+
pub data: Option<serde_json::Value>,
2632
}
2733

2834
/// Returns the default categorization classes
@@ -173,11 +179,16 @@ pub fn get_classes_from_server(host: &str, port: u16) -> Vec<(CategoryId, Catego
173179
return default_classes();
174180
}
175181

176-
let class_settings: Vec<ClassSetting> = serde_json::from_value(setting_value)
177-
.unwrap_or_else(|_| {
178-
warn!("Failed to deserialize classes setting, using default classes");
179-
return vec![];
180-
});
182+
let class_settings: Vec<ClassSetting> = match serde_json::from_value(setting_value) {
183+
Ok(classes) => classes,
184+
Err(e) => {
185+
warn!(
186+
"Failed to deserialize classes setting: {}, using default classes",
187+
e
188+
);
189+
return default_classes();
190+
}
191+
};
181192

182193
// Convert ClassSetting to (CategoryId, CategorySpec) format
183194
class_settings

aw-client-rust/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ impl AwClient {
123123
.map(|(start, stop)| format!("{}/{}", start, stop))
124124
.collect();
125125

126+
let query_lines: Vec<&str> = query.split('\n').collect();
127+
126128
// Result is a sequence, one element per timeperiod
127129
self.client
128130
.post(url)

aw-client-rust/src/queries.rs

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -165,21 +165,40 @@ impl QueryParams {
165165
}
166166

167167
/// Helper function to serialize classes in the format expected by the categorize function
168+
/// This version builds the query string directly without JSON serialization to avoid double-escaping
168169
fn serialize_classes(classes: &[ClassRule]) -> String {
169-
// Convert Vec<(CategoryId, CategorySpec)> to the JSON format expected by categorize
170-
let serialized_classes: Vec<(Vec<String>, serde_json::Value)> = classes
171-
.iter()
172-
.map(|(category_id, category_spec)| {
173-
let spec_json = serde_json::json!({
174-
"type": category_spec.spec_type,
175-
"regex": category_spec.regex,
176-
"ignore_case": category_spec.ignore_case
177-
});
178-
(category_id.clone(), spec_json)
179-
})
180-
.collect();
181-
182-
serde_json::to_string(&serialized_classes).unwrap_or_else(|_| "[]".to_string())
170+
let mut parts = Vec::new();
171+
172+
for (category_id, category_spec) in classes {
173+
// Build category array string manually: ["Work", "Programming"]
174+
let category_str = format!(
175+
"[{}]",
176+
category_id
177+
.iter()
178+
.map(|s| format!("\"{}\"", s))
179+
.collect::<Vec<_>>()
180+
.join(", ")
181+
);
182+
183+
// Build spec object manually to avoid JSON escaping regex patterns
184+
let mut spec_parts = Vec::new();
185+
spec_parts.push(format!("\"type\": \"{}\"", category_spec.spec_type));
186+
187+
// Only include regex for non-"none" types, and use raw pattern without escaping
188+
if category_spec.spec_type != "none" {
189+
spec_parts.push(format!("\"regex\": \"{}\"", category_spec.regex));
190+
}
191+
192+
// Always include ignore_case field
193+
spec_parts.push(format!("\"ignore_case\": {}", category_spec.ignore_case));
194+
195+
let spec_str = format!("{{{}}}", spec_parts.join(", "));
196+
197+
// Build the tuple [category, spec]
198+
parts.push(format!("[{}, {}]", category_str, spec_str));
199+
}
200+
201+
format!("[{}]", parts.join(", "))
183202
}
184203

185204
fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
@@ -195,7 +214,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
195214
if params.base.filter_afk {
196215
query.push(format!(
197216
"not_afk = flood(query_bucket(find_bucket(\"{}\")));
198-
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
217+
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
199218
escape_doublequote(&params.bid_afk)
200219
));
201220
}
@@ -207,7 +226,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
207226
if params.base.include_audible {
208227
query.push(
209228
"audible_events = filter_keyvals(browser_events, \"audible\", [true]);
210-
not_afk = period_union(not_afk, audible_events)"
229+
not_afk = period_union(not_afk, audible_events)"
211230
.to_string(),
212231
);
213232
}
@@ -221,7 +240,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
221240
// Add categorization if classes specified
222241
if !params.base.classes.is_empty() {
223242
query.push(format!(
224-
"events = categorize(events, {})",
243+
"events = categorize(events, {});",
225244
serialize_classes(&params.base.classes)
226245
));
227246
}
@@ -252,7 +271,7 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
252271
// Add categorization if classes specified
253272
if !params.base.classes.is_empty() {
254273
query.push(format!(
255-
"events = categorize(events, {})",
274+
"events = categorize(events, {});",
256275
serialize_classes(&params.base.classes)
257276
));
258277
}
@@ -269,26 +288,26 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
269288
}
270289

271290
fn build_browser_events(params: &DesktopQueryParams) -> String {
272-
let mut query = String::from("browser_events = [];\n");
291+
let mut query = String::from("browser_events = [];");
273292

274293
for browser_bucket in &params.base.bid_browsers {
275294
for (browser_name, app_names) in BROWSER_APPNAMES.entries() {
276295
if browser_bucket.contains(browser_name) {
277296
query.push_str(&format!(
278-
"events_{0} = flood(query_bucket(\"{1}\"));
279-
window_{0} = filter_keyvals(events, \"app\", {2});
280-
events_{0} = filter_period_intersect(events_{0}, window_{0});
281-
events_{0} = split_url_events(events_{0});
282-
browser_events = concat(browser_events, events_{0});
283-
browser_events = sort_by_timestamp(browser_events);\n",
297+
"
298+
events_{0} = flood(query_bucket(\"{1}\"));
299+
window_{0} = filter_keyvals(events, \"app\", {2});
300+
events_{0} = filter_period_intersect(events_{0}, window_{0});
301+
events_{0} = split_url_events(events_{0});
302+
browser_events = concat(browser_events, events_{0});
303+
browser_events = sort_by_timestamp(browser_events)",
284304
browser_name,
285305
escape_doublequote(browser_bucket),
286306
serde_json::to_string(app_names).unwrap()
287307
));
288308
}
289309
}
290310
}
291-
292311
query
293312
}
294313

@@ -414,9 +433,9 @@ mod tests {
414433
assert!(serialized.contains("Programming"));
415434
assert!(serialized.contains("Google Docs"));
416435
assert!(serialized.contains("GitHub|vim"));
417-
assert!(serialized.contains("\"type\":\"regex\""));
418-
assert!(serialized.contains("\"ignore_case\":false"));
419-
assert!(serialized.contains("\"ignore_case\":true"));
436+
assert!(serialized.contains("\"type\": \"regex\""));
437+
assert!(serialized.contains("\"ignore_case\": false"));
438+
assert!(serialized.contains("\"ignore_case\": true"));
420439
}
421440

422441
#[test]

0 commit comments

Comments
 (0)