Skip to content

Commit 31a527e

Browse files
committed
Fix: CoW compilation issues
1 parent ff62de5 commit 31a527e

File tree

9 files changed

+50
-39
lines changed

9 files changed

+50
-39
lines changed

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
"rapidfuzz",
2424
"rfind",
2525
"Skylake",
26+
"stringtape",
2627
"stringwars",
2728
"stringzilla",
2829
"strstr",
2930
"tfidf",
3031
"Wunsch"
3132
]
32-
}
33+
}

Cargo.lock

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ categories = ["text-processing", "development-tools::profiling"]
1313

1414
[dependencies]
1515
criterion = "0.7.0"
16-
stringtape = "2.3.0"
17-
stringzilla = { version = "4.2.0", default-features = false }
16+
stringtape = "2.4.0"
17+
stringzilla = { version = "4.2.1", default-features = false }
1818

1919
# To use a local version of StringZilla, set the path:
2020
# stringzilla = { path = "../StringZilla/", features = ["cpus"] }
@@ -90,18 +90,17 @@ version = "1.1.3"
9090
optional = true
9191

9292
[dependencies.regex]
93-
version = "1.11.3"
93+
version = "1.12.1"
9494
optional = false
9595

9696
[dependencies.rapidfuzz]
9797
version = "0.5.0"
9898
optional = true
9999

100100
[dependencies.fork_union]
101-
version = "2.2.10"
101+
version = "2.3.0"
102102
optional = true
103103

104-
105104
[dependencies.simhash]
106105
version = "0.2"
107106
optional = true
@@ -161,7 +160,6 @@ optional = true
161160
default-features = false
162161
features = ["lazy", "strings", "parquet"]
163162

164-
165163
[dependencies.zeroize]
166164
version = "1.8.2"
167165
optional = true
@@ -193,7 +191,7 @@ optional = true
193191

194192
[target.'cfg(target_os = "linux")'.dependencies]
195193
perf-event = "0.4"
196-
libc = "0.2.176"
194+
libc = "0.2.177"
197195

198196
[target.'cfg(target_arch = "x86_64")'.dependencies]
199197
gxhash = { version = "3.4.1", optional = true }

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,8 @@ cargo install cargo-criterion --locked
357357
To pull and compile all the dependencies, you can call:
358358

359359
```bash
360-
cargo build --all-features # to compile everything
361-
cargo check --all-features --all-targets # to fail on warnings
360+
RUSTFLAGS="-C target-cpu=native" cargo build --benches --all-features # to compile everything
361+
RUSTFLAGS="-C target-cpu=native" cargo check --benches --all-features --all-targets # to fail on warnings
362362
```
363363

364364
By default StringWars links `stringzilla` in CPU mode.

bench_find.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ RUSTFLAGS="-C target-cpu=native" \
3333
cargo criterion --features bench_find bench_find --jobs $(nproc)
3434
```
3535
"#]
36-
use std::env;
3736
use std::hint::black_box;
3837
use std::time::Duration;
3938

4039
use criterion::{Criterion, Throughput};
40+
use stringtape::BytesCowsAuto;
4141

4242
use aho_corasick::AhoCorasick;
4343
use bstr::ByteSlice;
@@ -66,7 +66,7 @@ fn configure_bench() -> Criterion {
6666
fn bench_substring_forward(
6767
g: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
6868
haystack: &[u8],
69-
needles: &[&[u8]],
69+
needles: &BytesCowsAuto,
7070
) {
7171
g.throughput(Throughput::Bytes(haystack.len() as u64));
7272

@@ -75,7 +75,7 @@ fn bench_substring_forward(
7575
if should_run("substring-forward/stringzilla::find") {
7676
g.bench_function("stringzilla::find", |b| {
7777
b.iter(|| {
78-
let token = black_box(*tokens.next().unwrap());
78+
let token = black_box(tokens.next().unwrap());
7979
let mut pos: usize = 0;
8080
while let Some(found) = sz::find(&haystack[pos..], token) {
8181
pos += found + token.len();
@@ -89,7 +89,7 @@ fn bench_substring_forward(
8989
if should_run("substring-forward/memmem::find") {
9090
g.bench_function("memmem::find", |b| {
9191
b.iter(|| {
92-
let token = black_box(*tokens.next().unwrap());
92+
let token = black_box(tokens.next().unwrap());
9393
let mut pos: usize = 0;
9494
while let Some(found) = memmem::find(&haystack[pos..], token) {
9595
pos += found + token.len();
@@ -103,7 +103,7 @@ fn bench_substring_forward(
103103
if should_run("substring-forward/memmem::Finder") {
104104
g.bench_function("memmem::Finder", |b| {
105105
b.iter(|| {
106-
let token = black_box(*tokens.next().unwrap());
106+
let token = black_box(tokens.next().unwrap());
107107
let finder = memmem::Finder::new(token);
108108
let mut pos: usize = 0;
109109
while let Some(found) = finder.find(&haystack[pos..]) {
@@ -118,7 +118,7 @@ fn bench_substring_forward(
118118
if should_run("substring-forward/std::str::find") {
119119
g.bench_function("std::str::find", |b| {
120120
b.iter(|| {
121-
let token = black_box(*tokens.next().unwrap());
121+
let token = black_box(tokens.next().unwrap());
122122
let mut pos = 0;
123123
while let Some(found) = haystack[pos..].find(token) {
124124
pos += found + token.len();
@@ -132,7 +132,7 @@ fn bench_substring_forward(
132132
fn bench_substring_backward(
133133
g: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
134134
haystack: &[u8],
135-
needles: &[&[u8]],
135+
needles: &BytesCowsAuto,
136136
) {
137137
g.throughput(Throughput::Bytes(haystack.len() as u64));
138138

@@ -141,7 +141,7 @@ fn bench_substring_backward(
141141
if should_run("substring-backward/stringzilla::rfind") {
142142
g.bench_function("stringzilla::rfind", |b| {
143143
b.iter(|| {
144-
let token = black_box(*tokens.next().unwrap());
144+
let token = black_box(tokens.next().unwrap());
145145
let mut pos: Option<usize> = Some(haystack.len());
146146
while let Some(end) = pos {
147147
if let Some(found) = sz::rfind(&haystack[..end], token) {
@@ -159,7 +159,7 @@ fn bench_substring_backward(
159159
if should_run("substring-backward/memmem::rfind") {
160160
g.bench_function("memmem::rfind", |b| {
161161
b.iter(|| {
162-
let token = black_box(*tokens.next().unwrap());
162+
let token = black_box(tokens.next().unwrap());
163163
let mut pos: Option<usize> = Some(haystack.len());
164164
while let Some(end) = pos {
165165
if let Some(found) = memmem::rfind(&haystack[..end], token) {
@@ -177,7 +177,7 @@ fn bench_substring_backward(
177177
if should_run("substring-backward/memmem::FinderRev") {
178178
g.bench_function("memmem::FinderRev", |b| {
179179
b.iter(|| {
180-
let token = black_box(*tokens.next().unwrap());
180+
let token = black_box(tokens.next().unwrap());
181181
let finder = memmem::FinderRev::new(token);
182182
let mut pos: Option<usize> = Some(haystack.len());
183183
while let Some(end) = pos {
@@ -196,7 +196,7 @@ fn bench_substring_backward(
196196
if should_run("substring-backward/std::str::rfind") {
197197
g.bench_function("std::str::rfind", |b| {
198198
b.iter(|| {
199-
let token = black_box(*tokens.next().unwrap());
199+
let token = black_box(tokens.next().unwrap());
200200
let mut pos: Option<usize> = Some(haystack.len());
201201
while let Some(end) = pos {
202202
if let Some(found) = haystack[..end].rfind(token) {
@@ -214,7 +214,7 @@ fn bench_substring_backward(
214214
fn bench_byteset_forward(
215215
g: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
216216
haystack: &[u8],
217-
needles: &[&[u8]],
217+
needles: &BytesCowsAuto,
218218
) {
219219
g.throughput(Throughput::Bytes(3 * haystack.len() as u64));
220220

bench_fingerprints.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ RUSTFLAGS="-C target-cpu=native" \
4747
use core::convert::TryInto;
4848
use std::collections::hash_map::DefaultHasher;
4949
use std::env;
50-
use std::fs;
5150
use std::hash::{Hash, Hasher};
5251

5352
use criterion::{Criterion, Throughput};

bench_sequence.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ use std::hint::black_box;
3434
use std::sync::Arc;
3535

3636
use criterion::{Criterion, SamplingMode};
37+
use stringtape::CharsCowsAuto;
3738

3839
use arrow::array::{ArrayRef, LargeStringArray};
3940
use arrow::compute::{lexsort_to_indices, SortColumn};
4041
use polars::prelude::*;
4142
use rayon::prelude::*;
42-
use stringtape::CharsCowsAuto;
4343
use stringzilla::sz;
4444

4545
mod utils;
@@ -246,8 +246,9 @@ fn main() {
246246
if tokens_bytes.is_empty() {
247247
panic!("No tokens found in the dataset.");
248248
}
249-
// Cast BytesCowsAuto to CharsCowsAuto for UTF-8 string benchmarks (StringTape 2.2+)
250-
let tokens = tokens_bytes
249+
// Leak BytesCowsAuto to get 'static lifetime, then cast to CharsCowsAuto for UTF-8 string benchmarks
250+
let tokens_bytes_static: &'static _ = Box::leak(Box::new(tokens_bytes));
251+
let tokens = tokens_bytes_static
251252
.as_chars()
252253
.expect("Dataset must be valid UTF-8");
253254

bench_similarities.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ RUSTFLAGS="-C target-cpu=native" \
4444
"#]
4545
use core::convert::TryInto;
4646
use std::env;
47-
use std::fs;
4847

4948
use criterion::{Criterion, Throughput};
5049
use fork_union::count_logical_cores;

utils.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pub fn reclaim_memory() {
2727
/// Can be cast to CharsCowsAuto for UTF-8 string benchmarks (StringTape 2.2+).
2828
/// Supports `STRINGWARS_MAX_TOKENS` to limit the number of tokens loaded.
2929
/// Logs dataset statistics to stderr.
30+
#[allow(dead_code)]
3031
pub fn load_dataset() -> BytesCowsAuto<'static> {
3132
let dataset_path =
3233
env::var("STRINGWARS_DATASET").expect("STRINGWARS_DATASET environment variable not set");
@@ -151,6 +152,7 @@ pub fn load_dataset() -> BytesCowsAuto<'static> {
151152
}
152153

153154
/// Format large numbers with thousand separators for readability
155+
#[allow(dead_code)]
154156
fn format_number(n: u64) -> String {
155157
let s = n.to_string();
156158
let mut result = String::new();
@@ -222,6 +224,7 @@ pub fn should_run(name: &str) -> bool {
222224
use std::sync::atomic::{AtomicU64, Ordering};
223225

224226
// Simple SI scaling helper
227+
#[allow(dead_code)]
225228
#[cfg(any(
226229
feature = "bench_similarities",
227230
feature = "bench_fingerprints",
@@ -242,6 +245,7 @@ fn scale_si(mut v: f64) -> (f64, &'static str) {
242245
}
243246
}
244247

248+
#[allow(dead_code)]
245249
#[cfg(any(
246250
feature = "bench_similarities",
247251
feature = "bench_fingerprints",
@@ -260,6 +264,7 @@ fn format_seconds(value: f64) -> String {
260264
}
261265
}
262266

267+
#[allow(dead_code)]
263268
#[cfg(feature = "bench_similarities")]
264269
pub struct CupsFormatter;
265270
#[cfg(feature = "bench_similarities")]
@@ -325,6 +330,7 @@ impl ValueFormatter for CupsFormatter {
325330
}
326331
}
327332

333+
#[allow(dead_code)]
328334
#[cfg(feature = "bench_fingerprints")]
329335
pub struct HashesFormatter;
330336
#[cfg(feature = "bench_fingerprints")]
@@ -421,6 +427,7 @@ impl ValueFormatter for HashesFormatter {
421427

422428
// Measurement wrappers that mirror WallTime but override formatting.
423429

430+
#[allow(dead_code)]
424431
#[cfg(feature = "bench_similarities")]
425432
#[derive(Clone, Default)]
426433
pub struct CupsWallTime;
@@ -455,6 +462,7 @@ impl Measurement for CupsWallTime {
455462
}
456463
}
457464

465+
#[allow(dead_code)]
458466
#[cfg(feature = "bench_fingerprints")]
459467
#[derive(Clone, Default)]
460468
pub struct HashesWallTime;
@@ -490,21 +498,25 @@ impl Measurement for HashesWallTime {
490498
}
491499

492500
// Global ratio to let the formatter print both hashes/s and bytes/s
501+
#[allow(dead_code)]
493502
#[cfg(feature = "bench_fingerprints")]
494503
static FINGERPRINTS_BYTES_PER_HASH_BITS: AtomicU64 = AtomicU64::new(0);
495504

505+
#[allow(dead_code)]
496506
#[cfg(feature = "bench_fingerprints")]
497507
pub fn set_fingerprints_bytes_per_hash(v: f64) {
498508
FINGERPRINTS_BYTES_PER_HASH_BITS.store(v.to_bits(), Ordering::Relaxed);
499509
}
500510

511+
#[allow(dead_code)]
501512
#[cfg(feature = "bench_fingerprints")]
502513
fn get_bytes_per_hash() -> f64 {
503514
let bits = FINGERPRINTS_BYTES_PER_HASH_BITS.load(Ordering::Relaxed);
504515
f64::from_bits(bits)
505516
}
506517

507518
// Comparisons/sec formatter: k/M/G cmp/s
519+
#[allow(dead_code)]
508520
#[cfg(feature = "bench_sequence")]
509521
pub struct ComparisonsFormatter;
510522

@@ -570,6 +582,7 @@ impl ValueFormatter for ComparisonsFormatter {
570582
}
571583
}
572584

585+
#[allow(dead_code)]
573586
#[cfg(feature = "bench_sequence")]
574587
#[derive(Clone, Default)]
575588
pub struct ComparisonsWallTime;

0 commit comments

Comments
 (0)