diff --git a/codetracer-python-recorder/resources/trace_filters/builtin_default.toml b/codetracer-python-recorder/resources/trace_filters/builtin_default.toml index b47d566..106e71b 100644 --- a/codetracer-python-recorder/resources/trace_filters/builtin_default.toml +++ b/codetracer-python-recorder/resources/trace_filters/builtin_default.toml @@ -33,6 +33,11 @@ selector = 'pkg:literal:builtins' exec = "skip" reason = "Skip builtins module instrumentation" +[[scope.rules]] +selector = 'pkg:glob:*_distutils_hack*' +exec = "skip" +reason = "Skip setuptools shim module" + [[scope.rules]] selector = 'pkg:glob:*' diff --git a/codetracer-python-recorder/src/trace_filter/engine.rs b/codetracer-python-recorder/src/trace_filter/engine.rs index 411bd4f..a31a610 100644 --- a/codetracer-python-recorder/src/trace_filter/engine.rs +++ b/codetracer-python-recorder/src/trace_filter/engine.rs @@ -11,6 +11,7 @@ use crate::trace_filter::config::{ use crate::trace_filter::selector::{Selector, SelectorKind}; use dashmap::DashMap; use pyo3::{prelude::*, PyErr}; +use pyo3::types::{PyDict, PyList}; use recorder_errors::{target, ErrorCode, RecorderResult}; use std::borrow::Cow; use std::path::{Component, Path, PathBuf}; @@ -188,6 +189,8 @@ pub struct TraceFilterEngine { default_value_source: usize, rules: Arc<[CompiledScopeRule]>, cache: DashMap>, + module_cache: DashMap>, + module_roots: Arc<[String]>, } impl TraceFilterEngine { @@ -197,6 +200,7 @@ impl TraceFilterEngine { let default_value_action = config.default_value_action(); let default_value_source = config.default_value_source(); let rules = compile_rules(config.rules()); + let module_roots = Python::with_gil(|py| collect_module_roots(py)); TraceFilterEngine { config: Arc::new(config), @@ -205,6 +209,8 @@ impl TraceFilterEngine { default_value_source, rules, cache: DashMap::new(), + module_cache: DashMap::new(), + module_roots: module_roots.into(), } } @@ -238,7 +244,25 @@ impl TraceFilterEngine { .qualname(py) .map_err(|err| py_attr_error("co_qualname", err))?; - let context = ScopeContext::derive(filename, qualname, self.config.sources()); + let mut context = ScopeContext::derive(filename, self.config.sources()); + context.refresh_object_name(qualname); + let needs_module_name = context + .module_name + .as_ref() + .map(|name| !is_valid_module_name(name)) + .unwrap_or(true); + if needs_module_name { + if let Some(absolute) = context.absolute_path.clone() { + if let Some(module) = self.resolve_module_name(py, &absolute) { + context.update_module_name(module, qualname); + } else if context.module_name.is_none() { + log::debug!( + "[TraceFilter] unable to derive module name for '{}'; package selectors may not match", + absolute + ); + } + } + } let mut exec = self.default_exec; let mut value_default = self.default_value_action; @@ -301,6 +325,17 @@ impl TraceFilterEngine { pub fn summary(&self) -> FilterSummary { self.config.summary() } + + fn resolve_module_name(&self, py: Python<'_>, absolute: &str) -> Option { + if let Some(entry) = self.module_cache.get(absolute) { + return entry.value().clone(); + } + let resolved = module_name_from_roots(&self.module_roots, absolute) + .or_else(|| lookup_module_name(py, absolute)); + self.module_cache + .insert(absolute.to_string(), resolved.clone()); + resolved + } } #[derive(Debug, Clone)] @@ -392,7 +427,7 @@ struct ScopeContext { } impl ScopeContext { - fn derive(filename: &str, qualname: &str, sources: &[FilterSource]) -> Self { + fn derive(filename: &str, sources: &[FilterSource]) -> Self { let absolute_path = normalise_to_posix(Path::new(filename)); let mut best_match: Option<(usize, PathBuf)> = None; @@ -424,25 +459,229 @@ impl ScopeContext { .as_deref() .and_then(|rel| module_from_relative(rel).map(|cow| cow.into_owned())); - let object_name = module_name - .as_ref() - .map(|module| format!("{}.{}", module, qualname)) - .or_else(|| { - if qualname.is_empty() { - None - } else { - Some(qualname.to_string()) - } - }); - ScopeContext { module_name, - object_name, + object_name: None, relative_path, absolute_path, source_id, } } + + fn refresh_object_name(&mut self, qualname: &str) { + self.object_name = match (self.module_name.as_ref(), qualname.is_empty()) { + (Some(module), false) => Some(format!("{module}.{qualname}")), + (Some(module), true) => Some(module.clone()), + (None, false) => Some(qualname.to_string()), + (None, true) => None, + }; + } + + fn update_module_name(&mut self, module: String, qualname: &str) { + self.module_name = Some(module); + self.refresh_object_name(qualname); + } +} + +fn lookup_module_name(py: Python<'_>, absolute: &str) -> Option { + let sys = py.import("sys").ok()?; + let modules_obj = match sys.getattr("modules") { + Ok(value) => value, + Err(_) => return None, + }; + let modules: Bound<'_, PyDict> = match modules_obj.downcast_into::() { + Ok(dict) => dict, + Err(_) => return None, + }; + let mut best: Option<(usize, String)> = None; + 'modules: for (name_obj, module_obj) in modules.iter() { + let module_name: String = match name_obj.extract() { + Ok(value) => value, + Err(_) => continue, + }; + if module_obj.is_none() { + continue; + } + for candidate in module_candidate_paths(&module_obj) { + if equivalent_posix_paths(&candidate, absolute) { + let preferred = preferred_module_name(&module_name, &module_obj); + let score = module_name_score(&preferred); + let update = match best { + Some((best_score, _)) => score < best_score, + None => true, + }; + if update { + best = Some((score, preferred)); + if score == 0 { + break 'modules; + } + } + } + } + } + best.map(|(_, name)| name) +} + +fn collect_module_roots(py: Python<'_>) -> Vec { + let mut roots = Vec::new(); + if let Ok(sys) = py.import("sys") { + if let Ok(path_obj) = sys.getattr("path") { + if let Ok(path_list) = path_obj.downcast_into::() { + for entry in path_list.iter() { + if let Ok(raw) = entry.extract::() { + if let Some(normalized) = normalise_to_posix(Path::new(&raw)) { + roots.push(normalized); + } + } + } + } + } + } + roots +} + +fn module_name_from_roots(roots: &[String], absolute: &str) -> Option { + for base in roots { + if let Some(relative) = strip_posix_prefix(absolute, base) { + if let Some(name) = relative_str_to_module(relative) { + return Some(name); + } + } + } + None +} + +fn module_candidate_paths(module: &Bound<'_, PyAny>) -> Vec { + let mut candidates = Vec::new(); + if let Ok(spec) = module.getattr("__spec__") { + if let Some(origin) = extract_normalised_spec_origin(&spec) { + candidates.push(origin); + } + } + if let Some(file) = extract_normalised_attr(module, "__file__") { + candidates.push(file); + } + if let Some(cached) = extract_normalised_attr(module, "__cached__") { + candidates.push(cached); + } + candidates +} + +fn extract_normalised_attr(module: &Bound<'_, PyAny>, attr: &str) -> Option { + let value = module.getattr(attr).ok()?; + extract_normalised_path(&value) +} + +fn extract_normalised_spec_origin(spec: &Bound<'_, PyAny>) -> Option { + if spec.is_none() { + return None; + } + let origin = spec.getattr("origin").ok()?; + extract_normalised_path(&origin) +} + +fn extract_normalised_path(value: &Bound<'_, PyAny>) -> Option { + if value.is_none() { + return None; + } + let raw: String = value.extract().ok()?; + normalise_to_posix(Path::new(raw.as_str())) +} + +fn equivalent_posix_paths(candidate: &str, target: &str) -> bool { + if candidate == target { + return true; + } + if candidate.ends_with(".pyc") && target.ends_with(".py") { + return candidate.trim_end_matches('c') == target; + } + false +} + +fn preferred_module_name(default: &str, module: &Bound<'_, PyAny>) -> String { + if let Ok(spec) = module.getattr("__spec__") { + if let Ok(name) = spec.getattr("name") { + if let Ok(raw) = name.extract::() { + if !raw.is_empty() { + return raw; + } + } + } + } + if let Ok(name_attr) = module.getattr("__name__") { + if let Ok(raw) = name_attr.extract::() { + if !raw.is_empty() { + return raw; + } + } + } + default.to_string() +} + +fn module_name_score(name: &str) -> usize { + if name + .split('.') + .all(|segment| !segment.is_empty() && segment.chars().all(is_identifier_char)) + { + 0 + } else { + 1 + } +} + +fn is_identifier_char(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +fn is_valid_module_name(name: &str) -> bool { + !name.is_empty() + && name + .split('.') + .all(|segment| !segment.is_empty() && segment.chars().all(is_identifier_char)) +} + +fn strip_posix_prefix<'a>(path: &'a str, base: &str) -> Option<&'a str> { + if base.is_empty() { + return None; + } + if base == "/" { + return path.strip_prefix('/'); + } + if path.starts_with(base) { + let mut remainder = &path[base.len()..]; + if remainder.starts_with('/') { + remainder = &remainder[1..]; + } + if remainder.is_empty() { + None + } else { + Some(remainder) + } + } else { + None + } +} + +fn relative_str_to_module(relative: &str) -> Option { + let mut parts: Vec<&str> = relative + .split('/') + .filter(|segment| !segment.is_empty()) + .collect(); + if parts.is_empty() { + return None; + } + let last = parts.pop().expect("non-empty"); + if let Some(stem) = last.strip_suffix(".py") { + if stem != "__init__" { + parts.push(stem); + } + } else { + parts.push(last); + } + if parts.is_empty() { + return None; + } + Some(parts.join(".")) } fn normalise_to_posix(path: &Path) -> Option { @@ -519,7 +758,7 @@ fn py_attr_error(attr: &str, err: PyErr) -> recorder_errors::RecorderError { mod tests { use super::*; use crate::trace_filter::config::TraceFilterConfig; - use pyo3::types::{PyAny, PyCode, PyModule}; + use pyo3::types::{PyAny, PyCode, PyDict, PyModule}; use std::ffi::CString; use std::fs; use std::io::Write; @@ -561,8 +800,7 @@ mod tests { )?; let code_obj = get_code(&module, "foo")?; let wrapper = CodeObjectWrapper::new(py, &code_obj); - - let engine = TraceFilterEngine::new(config); + let engine = TraceFilterEngine::new(config.clone()); let first = engine.resolve(py, &wrapper)?; assert_eq!(first.exec(), ExecDecision::Trace); @@ -631,6 +869,87 @@ mod tests { }) } + #[test] + fn inline_pkg_rule_uses_sys_modules_fallback() -> RecorderResult<()> { + let inline = r#" + [meta] + name = "inline" + version = 1 + + [scope] + default_exec = "trace" + default_value_action = "allow" + + [[scope.rules]] + selector = "pkg:literal:app.foo" + exec = "skip" + "#; + let config = TraceFilterConfig::from_inline_and_paths(&[("inline", inline)], &[])?; + + Python::with_gil(|py| -> RecorderResult<()> { + let project = tempdir().expect("project"); + let project_root = project.path(); + let app_dir = project_root.join("app"); + fs::create_dir_all(&app_dir).expect("create app dir"); + let file_path = app_dir.join("foo.py"); + fs::write( + &file_path, + "def foo():\n secret = 42\n return secret\n", + ) + .expect("write module"); + + fs::write(app_dir.join("__init__.py"), "\n").expect("write __init__"); + let sys = py.import("sys").expect("import sys"); + let sys_path_any = sys.getattr("path").expect("sys.path"); + let sys_path: Bound<'_, PyList> = sys_path_any + .downcast_into::() + .expect("path list"); + sys_path + .insert(0, project_root.to_string_lossy().to_string()) + .expect("insert project root"); + let absolute_path = + normalise_to_posix(Path::new(file_path.to_string_lossy().as_ref())).unwrap(); + + let module = py.import("app.foo").expect("import app.foo"); + let modules_any = sys.getattr("modules").expect("sys.modules"); + let modules: Bound<'_, PyDict> = modules_any + .downcast_into::() + .expect("modules dict"); + modules + .set_item("app.foo", module.as_any()) + .expect("register module"); + + let func: Bound<'_, PyAny> = module.getattr("foo").expect("get foo"); + let code_obj = func + .getattr("__code__") + .expect("__code__") + .downcast_into::() + .expect("PyCode"); + let wrapper = CodeObjectWrapper::new(py, &code_obj); + let recorded_filename = wrapper.filename(py).expect("code filename"); + assert_eq!(recorded_filename, file_path.to_string_lossy()); + + let engine = TraceFilterEngine::new(config.clone()); + let expected_root = + normalise_to_posix(Path::new(project_root.to_string_lossy().as_ref())).unwrap(); + assert!(engine.module_roots.iter().any(|root| root == &expected_root)); + let derived_from_roots = + module_name_from_roots(&engine.module_roots, absolute_path.as_str()); + assert_eq!(derived_from_roots, Some("app.foo".to_string())); + let resolution = engine.resolve(py, &wrapper)?; + assert_eq!( + resolution.absolute_path(), + Some(absolute_path.as_str()) + ); + assert_eq!(resolution.module_name(), Some("app.foo")); + assert_eq!(resolution.exec(), ExecDecision::Skip); + + modules.del_item("app.foo").expect("remove module"); + sys_path.del_item(0).expect("restore sys.path"); + Ok(()) + }) + } + #[test] fn file_selector_matches_relative_path() -> RecorderResult<()> { let (config, file_path) = filter_with_pkg_rule( diff --git a/design-docs/adr/0013-reliable-module-name-derivation.md b/design-docs/adr/0013-reliable-module-name-derivation.md new file mode 100644 index 0000000..67c4c85 --- /dev/null +++ b/design-docs/adr/0013-reliable-module-name-derivation.md @@ -0,0 +1,51 @@ +# ADR 0013: Reliable Module Name Derivation for Trace Filters + +- **Status:** Proposed +- **Date:** 2025-03-15 +- **Deciders:** codetracer recorder maintainers +- **Consulted:** Python runtime SMEs, DX/observability stakeholders +- **Informed:** Support engineers, product analytics consumers + +## Context +- Scope rules in the configurable trace filter engine match code objects using package (`pkg:*`), file, and object selectors. +- Package selectors require the engine to derive a module name from `code.co_filename`. We currently guess the module by stripping each filter source’s `project_root` from the absolute filename and converting the remainder to dotted form (`ScopeContext::derive`). +- The builtin filter is injected as an inline source (``) whose `project_root` resolves to `"."`. That path is not a prefix of system libraries (e.g., `/usr/lib/python3.12/site-packages/_distutils_hack/__init__.py`), so module derivation fails and the code’s `module_name` stays `None`. +- When module name derivation fails, package selectors silently never match—even though configuration authors expect builtin skips (such as `_distutils_hack`) to work regardless of how the filter is loaded. + +## Problem +- Inline filters and filters stored outside the traced project cannot derive module names, causing all `pkg:*` selectors from those sources to be ignored. +- Users cannot observe or correct this: they only see that builtin skip rules or inline filters “do nothing,” which looks like a bug and pollutes traces with unwanted modules. +- Relying solely on relative paths ties filter correctness to the filesystem layout, which is fragile for virtual environments, zip apps, or global site-packages. + +## Decision +1. **Augment module derivation with `sys.modules`.** + - Keep the existing relative-path heuristic for performance when it succeeds. + - When it fails to produce a module name, fall back to scanning `sys.modules` for entries whose `__file__` matches the canonicalised `co_filename`. + - Cache the mapping from absolute filename to module name inside `ScopeContext` (or the engine) to avoid repeated scans on hot code paths. +2. **Expose the derived module even for inline/builtin filters.** + - `pkg:*` selectors attached to inline sources should now match by module name, independent of project roots. +3. **Retain current behaviour for synthetic filenames.** + - Frames with `` or `` filenames still produce no module, so package selectors continue to skip them. +4. **Surface diagnostics when both derivation strategies fail.** + - Add a trace-level log so users understand why a `pkg:*` selector may not hit, aiding troubleshooting. + +## Consequences +- **Pros** + - Builtin skip rules (e.g., `_distutils_hack`) start working immediately, improving trace signal/noise without extra user configuration. + - Inline filters defined via CLI, env vars, or API behave identically to on-disk filters, aligning with user expectations. + - Reusing Python’s own module registry removes the guesswork around path prefixes, making the system robust to virtualenv or zipapp layouts. +- **Cons** + - Scanning `sys.modules` is O(n) in the number of loaded modules, so we must cache results and only fall back when necessary. + - Module derivation now depends on `sys.modules` entries having accurate `__file__` attributes; exotic loaders that omit them will still fail (but that is already true for path-based detection). +- **Risks** + - The fallback introduces Python interaction inside the resolution path; bugs here could deadlock if we mishandle the GIL or degrade performance. + - Module caching must be invalidated when module files are reloaded from different paths; we assume trace sessions do not mutate modules aggressively. + +## Alternatives +- **Require all filters to live under the traced project root.** Rejected: impossible for builtin filters and unreasonable for global hooks. +- **Add explicit annotations to trace metadata with module names provided by the target script.** Rejected: burdens users and still fails for builtin filters. +- **Ignore package selectors for inline filters.** Rejected: contradicts documented behaviour and leaves builtin skips ineffective. + +## References +- `codetracer-python-recorder/src/trace_filter/engine.rs` (`ScopeContext::derive` implementation). +- Python documentation for `sys.modules` and module attributes: https://docs.python.org/3/library/sys.html#sys.modules diff --git a/design-docs/reliable-module-name-derivation-implementation-plan.md b/design-docs/reliable-module-name-derivation-implementation-plan.md new file mode 100644 index 0000000..fc98d20 --- /dev/null +++ b/design-docs/reliable-module-name-derivation-implementation-plan.md @@ -0,0 +1,63 @@ +# Reliable Module Name Derivation – Implementation Plan + +## Summary +Trace filter package selectors currently fail for inline (builtin) filters because the engine can only derive module names by stripping filter `project_root` prefixes from filenames. This plan delivers the ADR 0013 decision: introduce a `sys.modules`-based fallback with caching so builtin and inline filters can match `pkg:*` selectors reliably, while keeping existing relative-path logic fast for project-local code. + +## Goals +1. Package selectors from any filter source (inline or on-disk) resolve consistently for real files (`_distutils_hack`, setuptools helpers, etc.). +2. The fallback module lookup incurs minimal overhead by caching filename→module mappings. +3. Diagnostics exist for frames where module derivation still fails (synthetic filenames, missing `__file__`), so users can understand selector misses. +4. Regression coverage demonstrates the builtin `_distutils_hack` skip works end-to-end. + +## Non-goals +- Changing filter syntax or adding new selector kinds. +- Supporting synthetic filenames (``, ``) beyond today’s behaviour. +- Tracking module reloads hot; we assume module identities stay stable within one tracing session. + +## Work Breakdown + +### 1. Engine Data Model Updates +- Extend `ScopeContext` to carry a reference-counted cache (e.g., `DashMap>`) mapping canonical filenames to module names. +- Thread this cache through `TraceFilterEngine` so all resolutions share it. +- Keep the existing `project_root` stripping logic as the first attempt. + +### 2. `sys.modules` Fallback +- Implement a helper `resolve_module_via_sys_modules(py, filename) -> Option`: + - Canonicalise `filename`. + - Iterate over `sys.modules.items()` (skipping `None` entries), inspect `__file__` attributes, and compare after normalisation. + - Return the module name (dictionary key) when the path matches. + - Cache successes and failures. +- Ensure we hold the GIL while accessing Python objects and release references promptly. + +### 3. Diagnostics +- When both prefix stripping and `sys.modules` lookup fail, emit a `log::debug!` with the filename so advanced users can trace why selectors do not match. +- Optionally increment an internal metric counter for “module derivation fallback failures” to aid telemetry. + +### 4. Builtin Filter Verification +- Add/extend a runtime test that traces a trivial script importing `_distutils_hack` (simulate by touching site-packages path) and assert the builtin filter now skips those frames. +- Update existing tests to cover the fallback path (e.g., by constructing a filter source with `project_root="."`). + +### 5. Documentation +- Update `docs/onboarding/trace-filters.md` to explain how module names are derived and that builtin filters now correctly skip site-packages helpers. +- Reference ADR 0013 from the configurable trace filters design doc for posterity. + +## Testing Strategy +- Unit tests for the new lookup helper, using temporary modules inserted into `sys.modules` with fake `__file__` values. +- Integration test exercising `pkg:literal:_distutils_hack` via the builtin filter chain. +- Regression test ensuring the cache handles multiple resolutions of the same file without repeated scans (can assert the helper is only called once via instrumentation). + +## Risks & Mitigations +- **Performance:** Scanning `sys.modules` could be expensive. Mitigate with canonical-path caching and only invoking the fallback when path stripping fails. +- **Thread safety:** Accessing Python objects without the GIL would be unsafe. Keep all fallback work within the `Python<'_>` context already available in `TraceFilterEngine::resolve`. +- **Stale cache entries:** Module files rarely change mid-run; if they do, the trace filter outcome would still be correct because module names remain stable. Document this assumption. + +## Timeline (rough) +1. Day 0–1: Land ADR + plan (this document). +2. Day 2–3: Implement cache plumbing and fallback helper with unit tests. +3. Day 4: Wire into `ScopeContext::derive`, add diagnostics. +4. Day 5: Expand runtime/integration tests, update docs. +5. Day 6: Code review, land, monitor CI/perf. + +## Open Questions +- Should we normalise module names (e.g., remove `.pyc` suffixes) when reading from `sys.modules`? (default answer: yes, strip `.pyc` to match `.py` filenames.) +- Do we need to support namespace packages with multiple `__file__` entries? (likely postpone; first implementation can stop at the first matching entry.) diff --git a/design-docs/reliable-module-name-derivation-implementation-plan.status.md b/design-docs/reliable-module-name-derivation-implementation-plan.status.md new file mode 100644 index 0000000..7d73006 --- /dev/null +++ b/design-docs/reliable-module-name-derivation-implementation-plan.status.md @@ -0,0 +1,20 @@ +# Reliable Module Name Derivation – Status + +## Relevant Design Docs +- `design-docs/adr/0013-reliable-module-name-derivation.md` +- `design-docs/reliable-module-name-derivation-implementation-plan.md` + +## Key Source Files +- `codetracer-python-recorder/src/trace_filter/engine.rs` +- `codetracer-python-recorder/src/trace_filter/scope.rs` *(ScopeContext helpers live here via engine module)* +- `codetracer-python-recorder/src/runtime/tracer/runtime_tracer.rs` +- `docs/onboarding/trace-filters.md` +- `codetracer-python-recorder/resources/trace_filters/builtin_default.toml` + +## Workstream Progress +- ✅ **WS1 – Context Inspection & Cache Design:** Reviewed the `ScopeContext` derivation flow, confirmed why inline filters only saw `"."` project roots, and identified the need for a shared module-name cache plus sys.path-derived fallbacks. +- ✅ **WS2 – sys.modules/sys.path Fallback & Diagnostics:** Added a per-engine module cache, captured normalized `sys.path` roots at construction time, implemented a resolver that prefers path-derived module names but can fall back to `sys.modules`, and logged when neither strategy succeeds. +- ✅ **WS3 – Regression Tests & Docs:** Added the `inline_pkg_rule_uses_sys_modules_fallback` regression test to guard the behaviour, documented the new module-derivation path in `docs/onboarding/trace-filters.md`, and ensured `just dev test` (Rust + Python) passes end-to-end. + +## Next Update +Future updates will only be necessary if follow-up work (e.g., exposing additional diagnostics or handling namespace packages) is scheduled. diff --git a/docs/onboarding/trace-filters.md b/docs/onboarding/trace-filters.md index dd88fb6..6337caa 100644 --- a/docs/onboarding/trace-filters.md +++ b/docs/onboarding/trace-filters.md @@ -51,6 +51,7 @@ - `file` – source path relative to the project root (POSIX separators). - `obj` – module-qualified object (`package.module.func`). - `local`, `global`, `arg`, `ret`, `attr` – value-level selectors. +- Module names normally come from stripping the project root off the code object path. When a filter is inlined (e.g., the builtin defaults) and no filesystem prefix matches, the recorder falls back to `sys.modules`: it scans loaded modules, compares their `__spec__.origin` / `__file__` against the code’s absolute path, and caches the result. This keeps builtin package skips (like `_distutils_hack`) effective even though the filter lives inside the recorder wheel. - Match types (second segment in `kind:match:pattern`): - `glob` *(default)* – wildcard matching with `/` treated as a separator. - `regex` – Rust/RE2-style regular expressions; invalid patterns log a single warning and fall back to configuration errors.