Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only.
[dev-dependencies]
quickcheck = "0.7"
criterion = "0.5"
proptest = "1.7.0"

[[bench]]
name = "chars"
Expand All @@ -36,3 +37,8 @@ harness = false
[[bench]]
name = "word_bounds"
harness = false

[[bench]]
name = "unicode_word_indices"
harness = false

4 changes: 2 additions & 2 deletions benches/chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}

for file in FILES {
group.bench_with_input(
BenchmarkId::new("scalar", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
|b, content| b.iter(|| scalar(content)),
);
}
Expand Down
1 change: 1 addition & 0 deletions benches/texts/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2018-07-12 13:59:01 UTC | ERROR | (worker.go:131 in process) | Too many errors for endpoint 'dummy/api/v1/check_run?api_key=*************************00000': retrying later
37 changes: 37 additions & 0 deletions benches/unicode_word_indices.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};

use std::fs;
use unicode_segmentation::UnicodeSegmentation;

const FILES: &[&str] = &[
"log", //"arabic",
"english",
//"hindi",
"japanese",
//"korean",
//"mandarin",
//"russian",
//"source_code",
];

#[inline(always)]
fn grapheme(text: &str) {
for w in text.unicode_word_indices() {
black_box(w);
}
}

fn bench_all(c: &mut Criterion) {
let mut group = c.benchmark_group("unicode_word_indices");

for file in FILES {
let input = fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap();
group.throughput(criterion::Throughput::Bytes(input.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(file), &input, |b, content| {
b.iter(|| grapheme(content))
});
}
}

criterion_group!(benches, bench_all);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion benches/word_bounds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn bench_all(c: &mut Criterion) {
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
&fs::read_to_string(format!("benches/texts/{file}.txt",)).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}
Expand Down
4 changes: 2 additions & 2 deletions benches/words.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ fn bench_all(c: &mut Criterion) {
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}

for file in FILES {
group.bench_with_input(
BenchmarkId::new("scalar", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
&fs::read_to_string(format!("benches/texts/{file}.txt")).unwrap(),
|b, content| b.iter(|| scalar(content)),
);
}
Expand Down
21 changes: 13 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,16 @@
)]
#![no_std]

#[cfg(test)]
extern crate std;

pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
pub use grapheme::{GraphemeIndices, Graphemes};
pub use sentence::{USentenceBoundIndices, USentenceBounds, UnicodeSentences};
pub use tables::UNICODE_VERSION;
pub use word::{UWordBoundIndices, UWordBounds, UnicodeWordIndices, UnicodeWords};
pub use word::{UWordBoundIndices, UWordBounds};

use crate::word::{UnicodeWordIndices, UnicodeWords};

mod grapheme;
mod sentence;
Expand Down Expand Up @@ -248,7 +253,7 @@ pub trait UnicodeSegmentation {

impl UnicodeSegmentation for str {
#[inline]
fn graphemes(&self, is_extended: bool) -> Graphemes {
fn graphemes(&self, is_extended: bool) -> Graphemes<'_> {
grapheme::new_graphemes(self, is_extended)
}

Expand All @@ -258,32 +263,32 @@ impl UnicodeSegmentation for str {
}

#[inline]
fn unicode_words(&self) -> UnicodeWords {
fn unicode_words(&self) -> UnicodeWords<'_> {
word::new_unicode_words(self)
}

#[inline]
fn unicode_word_indices(&self) -> UnicodeWordIndices {
fn unicode_word_indices(&self) -> UnicodeWordIndices<'_> {
word::new_unicode_word_indices(self)
}

#[inline]
fn split_word_bounds(&self) -> UWordBounds {
fn split_word_bounds(&self) -> UWordBounds<'_> {
word::new_word_bounds(self)
}

#[inline]
fn split_word_bound_indices(&self) -> UWordBoundIndices {
fn split_word_bound_indices(&self) -> UWordBoundIndices<'_> {
word::new_word_bound_indices(self)
}

#[inline]
fn unicode_sentences(&self) -> UnicodeSentences {
fn unicode_sentences(&self) -> UnicodeSentences<'_> {
sentence::new_unicode_sentences(self)
}

#[inline]
fn split_sentence_bounds(&self) -> USentenceBounds {
fn split_sentence_bounds(&self) -> USentenceBounds<'_> {
sentence::new_sentence_bounds(self)
}

Expand Down
Loading