Skip to content

Commit da7afc9

Browse files
feat: ignore pattern for store (#25)
* feat: ignore entries matching given regex pattern(s) * test: ensure regex ignore patterns are respected * docs(readme): include mention of ignore patterns for filtering * chore: avoid using deprecated `assert_cmd` function
1 parent 8ae4d18 commit da7afc9

File tree

6 files changed

+123
-28
lines changed

6 files changed

+123
-28
lines changed

Cargo.lock

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ argfile = { version = "0.2" }
1919
dirs = { version = "6.0" }
2020
humantime = { version = "2.3" }
2121

22-
# MIME types
22+
# Binary data
2323
image = "0.25"
2424
mime-sniffer = "0.1"
2525
content_inspector = "0.2"
@@ -37,8 +37,9 @@ tracing-subscriber = { features = [
3737
tracing-appender = { version = "0.2", git = "https://github.com/tokio-rs/tracing.git" }
3838

3939
# Misc
40-
unicode-segmentation = "1.10" # Limit preview width by grapheme clusters
40+
unicode-segmentation = { version = "1.12" } # Limit preview width by grapheme clusters
4141
miette = { version = "7.6", features = ["fancy"] } # Fancy errors
42+
regex = { version = "1.12" } # Support ignore patterns defined by the user
4243

4344
[dev-dependencies]
4445
pretty_assertions = "1.4"

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ In addition:
2727
- **Relative positions:** support for getting/deleting items by relative position in the saved history
2828
- **Entry size limits**: configurable minimum and maximum size for stored entries
2929
- **Entry age limit:** configurable max age for entries - automatically remove old clipboard entries
30-
- **Informative previews:** previews for binary data support many more types e.g. `video/mp4`, `application/pdf`, etc.
30+
- **Ignore entries:** avoid storing certain text data using regex patterns, e.g. `^<meta http-equiv=`
31+
- **Informative previews:** previews for binary data support many more types, e.g. `video/mp4`, `application/pdf`, etc.
3132

3233
## Requirements
3334

@@ -104,14 +105,15 @@ wl-paste --type image --watch clipvault store # Forward raw image data
104105
### Image data from browsers
105106

106107
When copying images from browsers, `wl-paste` will usually pass the data to `clipvault` as `text/html`.
107-
This is not ideal for copying images, and you may wish to have the raw image data copied instead.
108-
If so, you can either *only* forward image data, or, more realistically, use the below and filter
109-
out entries which start with `<meta http-equiv=` in your picker (check out some of the scripts
110-
in [extras](./extras) - I personally use the [rofi script](./extras/clipvault_rofi.sh)):
108+
This is not ideal for copying images, and you probably want to have the raw image data copied instead.
109+
If so, you can either *only* forward image data, or, more realistically, use the commands below to
110+
copy the image data directly, and ignore the `text/html`:
111111

112112
```sh
113-
wl-paste --watch clipvault store # Forward all data
114-
wl-paste --type image --watch clipvault store # Forward specifically raw image data
113+
# Forward all data, ignoring text that starts with "<meta http-equiv="
114+
wl-paste --watch clipvault store --ignore-pattern '^<meta http-equiv='
115+
# Forward specifically raw image data
116+
wl-paste --type image --watch clipvault store
115117
```
116118

117119
## Usage

src/cli.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::path::PathBuf;
22

33
use clap::{Parser, Subcommand, ValueHint, command};
4+
use regex::Regex;
45

56
use crate::defaults;
67

@@ -72,6 +73,18 @@ pub struct StoreArgs {
7273
/// Store sensitive values, ignoring e.g. CLIPBOARD_STATE="sensitive" set by wl-clipboard.
7374
#[arg(long, action, env = "CLIPVAULT_STORE_SENSITIVE")]
7475
pub store_sensitive: bool,
76+
77+
/// Entries which include any match for the given regex pattern will not be stored.
78+
///
79+
/// To specify multiple patterns, simply call the argument again with a new pattern. Be mindful
80+
/// of the fact that every regex pattern given will be tested against every text input.
81+
///
82+
/// e.g. clipvault --store --ignore-pattern '^<meta http-equiv=' --ignore-pattern 'ignore\n$'
83+
///
84+
/// Note that look-around and backreferences are not supported, as the Rust implementation
85+
/// of a regex engine used does not support those features.
86+
#[arg(long, action, env = "CLIPVAULT_IGNORE_PATTERN", num_args = 1)]
87+
pub ignore_pattern: Option<Vec<Regex>>,
7588
}
7689

7790
#[derive(Debug, clap::Args)]
@@ -110,7 +123,3 @@ pub struct GetDelArgs {
110123
#[arg(long, conflicts_with("input"), allow_hyphen_values(true))]
111124
pub index: Option<isize>,
112125
}
113-
114-
// impl Cli {
115-
// fn to_config
116-
// }

src/commands/store.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::{
33
path::Path,
44
};
55

6+
use content_inspector::ContentType;
67
use miette::{Context, IntoDiagnostic, Result, miette};
78
use tracing::instrument;
89

@@ -23,6 +24,7 @@ pub fn execute(path_db: &Path, args: StoreArgs) -> Result<()> {
2324
max_entry_length: max_bytes,
2425
min_entry_length: min_bytes,
2526
store_sensitive,
27+
ignore_pattern,
2628
} = args;
2729

2830
// Min conflicts with max
@@ -78,12 +80,27 @@ pub fn execute(path_db: &Path, args: StoreArgs) -> Result<()> {
7880
);
7981
return Ok(());
8082
}
83+
8184
// Ignore purely whitespace content
8285
if buf.trim_ascii().is_empty() {
8386
tracing::debug!("only ASCII whitespace content");
8487
return Ok(());
8588
}
8689

90+
// Check user-provided ignore pattern
91+
if let Some(regexes) = ignore_pattern
92+
&& matches!(
93+
content_inspector::inspect(&buf),
94+
ContentType::UTF_8 | ContentType::UTF_8_BOM
95+
)
96+
&& regexes
97+
.iter()
98+
.any(|re| re.is_match(&String::from_utf8_lossy(&buf)))
99+
{
100+
tracing::debug!("content matched an ignore pattern");
101+
return Ok(());
102+
}
103+
87104
// Only get DB connection after parsing STDIN - avoid locking
88105
let conn = &init_db(path_db)?;
89106

tests/cmd.rs

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use std::{os::unix::fs::MetadataExt, sync::LazyLock, time::Duration};
1+
use std::{io::BufRead, os::unix::fs::MetadataExt, sync::LazyLock, time::Duration};
22

3-
use assert_cmd::Command;
3+
use assert_cmd::{Command, cargo_bin};
44
use base64::{
55
Engine, alphabet,
66
engine::{self, GeneralPurposeConfig},
@@ -20,8 +20,7 @@ fn get_db() -> NamedTempFile {
2020

2121
/// Builds the command to be run, pointing at the given temporary file for the database.
2222
fn get_cmd(db: &NamedTempFile) -> Command {
23-
let mut cmd = Command::cargo_bin("clipvault").expect("failed to build cmd");
24-
23+
let mut cmd = Command::new(cargo_bin!());
2524
cmd.args(["--database", &db.path().to_string_lossy()]);
2625
cmd
2726
}
@@ -58,7 +57,7 @@ const ENCODED_BINARY: &[(&str, &[u8])] = &[
5857
];
5958

6059
#[test]
61-
fn test_cmd_store() {
60+
fn test_store_basic() {
6261
let db = &get_db();
6362

6463
// TEXT
@@ -107,10 +106,7 @@ fn test_store_max_entries() {
107106
}
108107
let assert = get_cmd(db).arg("list").assert();
109108
let output = assert.get_output();
110-
assert_eq!(
111-
limit as usize,
112-
String::from_utf8_lossy(&output.stdout).lines().count()
113-
);
109+
assert_eq!(limit as usize, output.stdout.lines().count());
114110
assert.success();
115111
}
116112

@@ -209,6 +205,76 @@ fn test_store_min_max_conflict() {
209205
.failure();
210206
}
211207

208+
#[test]
209+
fn test_store_ignore_pattern() {
210+
let db = &get_db();
211+
212+
let store_ignore = |patterns: &[&str], input: &[u8]| {
213+
let mut args = Vec::from(["store"]);
214+
for pat in patterns {
215+
args.push("--ignore-pattern");
216+
args.push(pat);
217+
}
218+
get_cmd(db).args(args).write_stdin(input).assert().success();
219+
};
220+
let count_stored = || {
221+
get_cmd(db)
222+
.arg("list")
223+
.output()
224+
.expect("couldn't list entries")
225+
.stdout
226+
.lines()
227+
.count()
228+
};
229+
230+
let mut expected = 0;
231+
232+
store_ignore(&["test"], b"testing");
233+
assert_eq!(count_stored(), expected);
234+
235+
expected += 1;
236+
store_ignore(&["test"], b"hello");
237+
assert_eq!(count_stored(), expected);
238+
239+
store_ignore(&["^abc"], b"abcdefg");
240+
assert_eq!(count_stored(), expected);
241+
242+
expected += 1;
243+
store_ignore(&["^abc"], b"def");
244+
assert_eq!(count_stored(), expected);
245+
246+
store_ignore(&["world$"], b"hello world");
247+
assert_eq!(count_stored(), expected);
248+
249+
expected += 1;
250+
store_ignore(&["world$"], b"goodbye earth");
251+
assert_eq!(count_stored(), expected);
252+
253+
let http_image = r#"<meta http-equiv="content-type" content="text/html; charset=utf-8"><img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Ftse4.mm.bing.net%2Fth%2Fid%2FOIP.NHEHZBx37DjUVrFOwDUNugHaE8%3Fpid%3DApi&amp;f=1&amp;ipt=dcffa9260288ea62423251c252ecf228037d6317b7911a5365b03d36ca0590fb&amp;ipo=images" alt="alsdkf dlas by dannicalifornia on DeviantArt" style="width: 100%; height: 180px; min-height: 180px; max-height: 180px;" loading="lazy">"#;
254+
store_ignore(
255+
&[r"^<meta http-equiv=", r"ignore\n$"],
256+
http_image.as_bytes(),
257+
);
258+
assert_eq!(count_stored(), expected);
259+
260+
store_ignore(
261+
&[r"^<meta http-equiv=", r"ignore\n$"],
262+
b"\nplease\tignore\n",
263+
);
264+
assert_eq!(count_stored(), expected);
265+
266+
expected += 1;
267+
store_ignore(&[r"^<meta http-equiv=", r"ignore\n$"], b"control");
268+
assert_eq!(count_stored(), expected);
269+
270+
// Fails with invalid regex
271+
get_cmd(db)
272+
.args(["store", "--ignore-pattern", "[["])
273+
.assert()
274+
.failure()
275+
.stderr(contains("regex parse error"));
276+
}
277+
212278
#[test]
213279
fn test_get_del() {
214280
let db = &get_db();
@@ -247,8 +313,7 @@ fn test_get_del() {
247313
get_cmd(db).arg("get").write_stdin("1").assert().failure();
248314

249315
let stdout = get_cmd(db).arg("list").output().unwrap().stdout;
250-
let str = String::from_utf8_lossy(&stdout);
251-
assert_eq!(str.lines().count(), (u8::MAX - 3) as usize);
316+
assert_eq!(stdout.lines().count(), (u8::MAX - 3) as usize);
252317
}
253318

254319
#[test]

0 commit comments

Comments
 (0)