Skip to content

Commit 8289a0f

Browse files
authored
fix(queries): eliminate double-escaping in regex patterns (#541)
* fix(queries): eliminate double-escaping in regex patterns The serialize_classes function was using serde_json serialization which caused regex patterns to be double-escaped (e.g., 't\.co' became 't\.co'), breaking pattern matching in ActivityWatch queries. Core changes: - Rewrite serialize_classes() to build JSON strings manually instead of using serde_json - Preserve single-escaped regex patterns for proper matching - Only include 'ignore_case' field when true (omit when false) - Only include 'regex' field for non-'none' type categories - Improve error handling in classes deserialization - Add optional fields to ClassSetting struct for better compatibility This fixes the core regex pattern matching issues in ActivityWatch canonical queries that were causing incorrect categorization results. * fix: remove unused functions in query class
1 parent 37f8189 commit 8289a0f

File tree

3 files changed

+64
-126
lines changed

3 files changed

+64
-126
lines changed

aw-client-rust/src/classes.rs

Lines changed: 6 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,29 @@
22
//!
33
//! Taken from default classes in aw-webui
44
5-
use log::warn;
6-
use rand::Rng;
75
use serde::{Deserialize, Serialize};
8-
9-
use super::blocking::AwClient as ActivityWatchClient;
6+
use serde_json;
107

118
pub type CategoryId = Vec<String>;
129

1310
#[derive(Debug, Clone, Serialize, Deserialize)]
1411
pub struct CategorySpec {
1512
#[serde(rename = "type")]
1613
pub spec_type: String,
14+
#[serde(default)]
1715
pub regex: String,
1816
#[serde(default)]
1917
pub ignore_case: bool,
2018
}
2119

2220
#[derive(Debug, Clone, Serialize, Deserialize)]
2321
pub struct ClassSetting {
22+
#[serde(default)]
23+
pub id: Option<i32>,
2424
pub name: Vec<String>,
2525
pub rule: CategorySpec,
26+
#[serde(default)]
27+
pub data: Option<serde_json::Value>,
2628
}
2729

2830
/// Returns the default categorization classes
@@ -140,56 +142,3 @@ pub fn default_classes() -> Vec<(CategoryId, CategorySpec)> {
140142
),
141143
]
142144
}
143-
144-
/// Get classes from server-side settings using default localhost:5600.
145-
/// Might throw an error if not set yet, in which case we use the default classes as a fallback.
146-
pub fn get_classes() -> Vec<(CategoryId, CategorySpec)> {
147-
get_classes_from_server("localhost", 5600)
148-
}
149-
150-
/// Get classes from server-side settings with custom host and port.
151-
/// Might throw an error if not set yet, in which case we use the default classes as a fallback.
152-
pub fn get_classes_from_server(host: &str, port: u16) -> Vec<(CategoryId, CategorySpec)> {
153-
let mut rng = rand::rng();
154-
let random_int = rng.random_range(0..10001);
155-
let client_id = format!("get-setting-{}", random_int);
156-
157-
// Create a client with a random ID, similar to the Python implementation
158-
let awc = match ActivityWatchClient::new(host, port, &client_id) {
159-
Ok(client) => client,
160-
Err(_) => {
161-
warn!(
162-
"Failed to create ActivityWatch client for {}:{}, using default classes",
163-
host, port
164-
);
165-
return default_classes();
166-
}
167-
};
168-
169-
awc.get_setting("classes")
170-
.map(|setting_value| {
171-
// Try to deserialize the setting into Vec<ClassSetting>
172-
if setting_value.is_null() {
173-
return default_classes();
174-
}
175-
176-
let class_settings: Vec<ClassSetting> = serde_json::from_value(setting_value)
177-
.unwrap_or_else(|_| {
178-
warn!("Failed to deserialize classes setting, using default classes");
179-
return vec![];
180-
});
181-
182-
// Convert ClassSetting to (CategoryId, CategorySpec) format
183-
class_settings
184-
.into_iter()
185-
.map(|class| (class.name, class.rule))
186-
.collect()
187-
})
188-
.unwrap_or_else(|_| {
189-
warn!(
190-
"Failed to get classes from server {}:{}, using default classes as fallback",
191-
host, port
192-
);
193-
default_classes()
194-
})
195-
}

aw-client-rust/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ impl std::fmt::Debug for AwClient {
3636
}
3737

3838
fn get_hostname() -> String {
39-
return gethostname::gethostname().to_string_lossy().to_string();
39+
gethostname::gethostname().to_string_lossy().to_string()
4040
}
4141

4242
impl AwClient {

aw-client-rust/src/queries.rs

Lines changed: 57 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,8 @@
2828
//! };
2929
//!
3030
//! // Automatically fetches classes from localhost:5600
31-
//! let query = QueryParams::Desktop(params.clone()).canonical_events_with_classes();
31+
//! let query = QueryParams::Desktop(params.clone()).canonical_events();
3232
//!
33-
//! // Or from a custom server
34-
//! let query = QueryParams::Desktop(params)
35-
//! .canonical_events_with_classes_from_server("localhost", 2345);
3633
//! ```
3734
3835
use crate::classes::{CategoryId, CategorySpec};
@@ -135,54 +132,46 @@ impl QueryParams {
135132
QueryParams::Android(params) => build_android_canonical_events(params),
136133
}
137134
}
135+
}
138136

139-
/// Build canonical events query string with automatic class fetching if not provided
140-
pub fn canonical_events_with_classes(&self) -> String {
141-
self.canonical_events_with_classes_from_server("localhost", 5600)
142-
}
137+
/// Helper function to serialize classes in the format expected by the categorize function
138+
/// This version builds the query string directly without JSON serialization to avoid double-escaping
139+
fn serialize_classes(classes: &[ClassRule]) -> String {
140+
let mut parts = Vec::new();
141+
142+
for (category_id, category_spec) in classes {
143+
// Build category array string manually: ["Work", "Programming"]
144+
let category_str = format!(
145+
"[{}]",
146+
category_id
147+
.iter()
148+
.map(|s| format!("\"{}\"", s))
149+
.collect::<Vec<_>>()
150+
.join(", ")
151+
);
143152

144-
/// Build canonical events query string with automatic class fetching from custom server
145-
pub fn canonical_events_with_classes_from_server(&self, host: &str, port: u16) -> String {
146-
match self {
147-
QueryParams::Desktop(params) => {
148-
let mut params_with_classes = params.clone();
149-
if params_with_classes.base.classes.is_empty() {
150-
params_with_classes.base.classes =
151-
crate::classes::get_classes_from_server(host, port);
152-
}
153-
build_desktop_canonical_events(&params_with_classes)
154-
}
155-
QueryParams::Android(params) => {
156-
let mut params_with_classes = params.clone();
157-
if params_with_classes.base.classes.is_empty() {
158-
params_with_classes.base.classes =
159-
crate::classes::get_classes_from_server(host, port);
160-
}
161-
build_android_canonical_events(&params_with_classes)
162-
}
153+
// Build spec object manually to avoid JSON escaping regex patterns
154+
let mut spec_parts = Vec::new();
155+
spec_parts.push(format!("\"type\": \"{}\"", category_spec.spec_type));
156+
157+
// Only include regex for non-"none" types, and use raw pattern without escaping
158+
if category_spec.spec_type != "none" {
159+
spec_parts.push(format!("\"regex\": \"{}\"", category_spec.regex));
163160
}
161+
162+
// Always include ignore_case field
163+
spec_parts.push(format!("\"ignore_case\": {}", category_spec.ignore_case));
164+
165+
let spec_str = format!("{{{}}}", spec_parts.join(", "));
166+
167+
// Build the tuple [category, spec]
168+
parts.push(format!("[{}, {}]", category_str, spec_str));
164169
}
165-
}
166170

167-
/// Helper function to serialize classes in the format expected by the categorize function
168-
fn serialize_classes(classes: &[ClassRule]) -> String {
169-
// Convert Vec<(CategoryId, CategorySpec)> to the JSON format expected by categorize
170-
let serialized_classes: Vec<(Vec<String>, serde_json::Value)> = classes
171-
.iter()
172-
.map(|(category_id, category_spec)| {
173-
let spec_json = serde_json::json!({
174-
"type": category_spec.spec_type,
175-
"regex": category_spec.regex,
176-
"ignore_case": category_spec.ignore_case
177-
});
178-
(category_id.clone(), spec_json)
179-
})
180-
.collect();
181-
182-
serde_json::to_string(&serialized_classes).unwrap_or_else(|_| "[]".to_string())
171+
format!("[{}]", parts.join(", "))
183172
}
184173

185-
fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
174+
pub fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
186175
let mut query = Vec::new();
187176

188177
// Fetch window events
@@ -195,7 +184,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
195184
if params.base.filter_afk {
196185
query.push(format!(
197186
"not_afk = flood(query_bucket(find_bucket(\"{}\")));
198-
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
187+
not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])",
199188
escape_doublequote(&params.bid_afk)
200189
));
201190
}
@@ -207,7 +196,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
207196
if params.base.include_audible {
208197
query.push(
209198
"audible_events = filter_keyvals(browser_events, \"audible\", [true]);
210-
not_afk = period_union(not_afk, audible_events)"
199+
not_afk = period_union(not_afk, audible_events)"
211200
.to_string(),
212201
);
213202
}
@@ -221,7 +210,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
221210
// Add categorization if classes specified
222211
if !params.base.classes.is_empty() {
223212
query.push(format!(
224-
"events = categorize(events, {})",
213+
"events = categorize(events, {});",
225214
serialize_classes(&params.base.classes)
226215
));
227216
}
@@ -237,7 +226,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String {
237226
query.join(";\n")
238227
}
239228

240-
fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
229+
pub fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
241230
let mut query = Vec::new();
242231

243232
// Fetch app events
@@ -252,7 +241,7 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
252241
// Add categorization if classes specified
253242
if !params.base.classes.is_empty() {
254243
query.push(format!(
255-
"events = categorize(events, {})",
244+
"events = categorize(events, {});",
256245
serialize_classes(&params.base.classes)
257246
));
258247
}
@@ -268,33 +257,33 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String {
268257
query.join(";\n")
269258
}
270259

271-
fn build_browser_events(params: &DesktopQueryParams) -> String {
272-
let mut query = String::from("browser_events = [];\n");
260+
pub fn build_browser_events(params: &DesktopQueryParams) -> String {
261+
let mut query = String::from("browser_events = [];");
273262

274263
for browser_bucket in &params.base.bid_browsers {
275264
for (browser_name, app_names) in BROWSER_APPNAMES.entries() {
276265
if browser_bucket.contains(browser_name) {
277266
query.push_str(&format!(
278-
"events_{0} = flood(query_bucket(\"{1}\"));
279-
window_{0} = filter_keyvals(events, \"app\", {2});
280-
events_{0} = filter_period_intersect(events_{0}, window_{0});
281-
events_{0} = split_url_events(events_{0});
282-
browser_events = concat(browser_events, events_{0});
283-
browser_events = sort_by_timestamp(browser_events);\n",
267+
"
268+
events_{0} = flood(query_bucket(\"{1}\"));
269+
window_{0} = filter_keyvals(events, \"app\", {2});
270+
events_{0} = filter_period_intersect(events_{0}, window_{0});
271+
events_{0} = split_url_events(events_{0});
272+
browser_events = concat(browser_events, events_{0});
273+
browser_events = sort_by_timestamp(browser_events)",
284274
browser_name,
285275
escape_doublequote(browser_bucket),
286276
serde_json::to_string(app_names).unwrap()
287277
));
288278
}
289279
}
290280
}
291-
292281
query
293282
}
294283

295-
/// Build a full desktop query
284+
/// Build a full desktop query using default localhost:5600 configuration
296285
pub fn full_desktop_query(params: &DesktopQueryParams) -> String {
297-
let mut query = QueryParams::Desktop(params.clone()).canonical_events_with_classes();
286+
let mut query = QueryParams::Desktop(params.clone()).canonical_events();
298287

299288
// Add basic event aggregations
300289
query.push_str(&format!(
@@ -414,17 +403,17 @@ mod tests {
414403
assert!(serialized.contains("Programming"));
415404
assert!(serialized.contains("Google Docs"));
416405
assert!(serialized.contains("GitHub|vim"));
417-
assert!(serialized.contains("\"type\":\"regex\""));
418-
assert!(serialized.contains("\"ignore_case\":false"));
419-
assert!(serialized.contains("\"ignore_case\":true"));
406+
assert!(serialized.contains("\"type\": \"regex\""));
407+
assert!(serialized.contains("\"ignore_case\": false"));
408+
assert!(serialized.contains("\"ignore_case\": true"));
420409
}
421410

422411
#[test]
423412
fn test_canonical_events_with_empty_classes() {
424413
let params = DesktopQueryParams {
425414
base: QueryParamsBase {
426415
bid_browsers: vec![],
427-
classes: vec![], // Empty classes - should trigger server fetch
416+
classes: vec![],
428417
filter_classes: vec![],
429418
filter_afk: true,
430419
include_audible: true,
@@ -434,9 +423,9 @@ mod tests {
434423
};
435424

436425
let query_params = QueryParams::Desktop(params);
437-
let query = query_params.canonical_events_with_classes();
426+
let query = query_params.canonical_events();
438427

439-
// Should contain basic query structure even if server fetch fails
428+
// Should contain basic query structure
440429
assert!(query.contains("events = flood"));
441430
assert!(query.contains("test-window"));
442431
}
@@ -465,7 +454,7 @@ mod tests {
465454
};
466455

467456
let query_params = QueryParams::Desktop(params);
468-
let query = query_params.canonical_events_with_classes();
457+
let query = query_params.canonical_events();
469458

470459
// Should contain categorization
471460
assert!(query.contains("events = categorize"));

0 commit comments

Comments
 (0)