Skip to content

Commit ef8a3f5

Browse files
cramertjcopybara-github
authored andcommitted
Add Interner
PiperOrigin-RevId: 715419261 Change-Id: I373096e898643d5bbb41151c8608cf75b72ccb23
1 parent b96ca12 commit ef8a3f5

File tree

3 files changed

+222
-0
lines changed

3 files changed

+222
-0
lines changed

common/BUILD

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ load(
55
"@rules_rust//rust:defs.bzl",
66
"rust_library",
77
"rust_proc_macro",
8+
"rust_test",
89
)
910
load("//common:crubit_wrapper_macros_oss.bzl", "crubit_cc_test", "crubit_rust_test")
1011

@@ -127,6 +128,24 @@ cc_library(
127128
],
128129
)
129130

131+
rust_library(
132+
name = "interner",
133+
srcs = ["interner.rs"],
134+
deps = [
135+
"@crate_index//:bumpalo",
136+
],
137+
)
138+
139+
rust_test(
140+
name = "interner_test",
141+
srcs = ["interner_test.rs"],
142+
deps = [
143+
":interner",
144+
"@crate_index//:bumpalo",
145+
"@crate_index//:googletest",
146+
],
147+
)
148+
130149
rust_library(
131150
name = "memoized",
132151
srcs = ["memoized.rs"],

common/interner.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Part of the Crubit project, under the Apache License v2.0 with LLVM
2+
// Exceptions. See /LICENSE for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
use bumpalo::Bump;
6+
use std::cell::RefCell;
7+
use std::collections::HashSet;
8+
use std::hash::Hash;
9+
10+
/// An interned value which performs by-pointer comparison.
11+
///
12+
/// Note that all values for a given type must originate from the same interner
13+
/// or they will compare not-equal.
14+
pub struct Interned<'arena, T: ?Sized>(&'arena T);
15+
16+
impl<T: ?Sized> Copy for Interned<'_, T> {}
17+
impl<T: ?Sized> Clone for Interned<'_, T> {
18+
fn clone(&self) -> Self {
19+
*self
20+
}
21+
}
22+
23+
impl<'arena, T: ?Sized + 'arena> Interned<'arena, T> {
24+
/// Access the inner reference.
25+
///
26+
/// Note that this can also be accessed using `*interned`.
27+
#[inline(always)]
28+
pub fn inner(&self) -> &'arena T {
29+
self.0
30+
}
31+
32+
#[inline(always)]
33+
fn ptr(&self) -> *const () {
34+
self.0 as *const T as *const ()
35+
}
36+
}
37+
38+
impl<T: ?Sized + std::fmt::Debug> std::fmt::Debug for Interned<'_, T> {
39+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40+
self.0.fmt(f)
41+
}
42+
}
43+
44+
impl<'arena, T: ?Sized + 'arena> std::ops::Deref for Interned<'arena, T> {
45+
type Target = &'arena T;
46+
fn deref(&self) -> &Self::Target {
47+
&self.0
48+
}
49+
}
50+
51+
impl<T: ?Sized> PartialEq for Interned<'_, T> {
52+
fn eq(&self, other: &Self) -> bool {
53+
self.ptr() == other.ptr()
54+
}
55+
}
56+
57+
impl<T: ?Sized> Eq for Interned<'_, T> {}
58+
59+
impl<T: ?Sized> std::cmp::PartialOrd for Interned<'_, T> {
60+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
61+
Some(self.cmp(other))
62+
}
63+
}
64+
65+
impl<T: ?Sized> std::cmp::Ord for Interned<'_, T> {
66+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
67+
self.ptr().cmp(&other.ptr())
68+
}
69+
}
70+
71+
impl<T: ?Sized> std::hash::Hash for Interned<'_, T> {
72+
fn hash<H>(&self, state: &mut H)
73+
where
74+
H: std::hash::Hasher,
75+
{
76+
std::ptr::hash(self.ptr(), state)
77+
}
78+
}
79+
80+
/// An interner for values of type `T`.
81+
pub struct Interner<'arena, T: ?Sized> {
82+
alloc: &'arena Bump,
83+
map: RefCell<HashSet<&'arena T>>,
84+
}
85+
86+
impl<'arena, T: ?Sized> Interner<'arena, T> {
87+
/// Create a new interner which will store interned values in `alloc`.
88+
pub fn new(alloc: &'arena Bump) -> Self {
89+
Self { alloc, map: Default::default() }
90+
}
91+
}
92+
93+
impl<'arena, T: Hash + Eq + Copy> Interner<'arena, T> {
94+
/// Intern `value` or access the interned copy if one exists.
95+
///
96+
/// Note: the `Copy` bound on this method is not strictly required, but
97+
/// the interner will not run `Drop` glue for its contents, so users must
98+
/// ensure that they do not store types which need to be dropped.
99+
pub fn intern(&self, value: T) -> Interned<'arena, T> {
100+
let mut map = self.map.borrow_mut();
101+
if let Some(interned) = map.get(&value) {
102+
return Interned(interned);
103+
}
104+
let v = self.alloc.alloc(value);
105+
map.insert(v);
106+
Interned(v)
107+
}
108+
}
109+
110+
impl<'arena, Elem: Hash + Eq + Copy> Interner<'arena, [Elem]> {
111+
/// Intern `value` or access the interned copy if one exists.
112+
pub fn intern_slice(&self, value: &[Elem]) -> Interned<'arena, [Elem]> {
113+
let mut map = self.map.borrow_mut();
114+
if let Some(interned) = map.get(value) {
115+
return Interned(interned);
116+
}
117+
let v = self.alloc.alloc_slice_copy(value);
118+
map.insert(v);
119+
Interned(v)
120+
}
121+
}
122+
123+
impl<'arena> Interner<'arena, str> {
124+
/// Intern `value` or access the interned copy if one exists.
125+
pub fn intern_str(&self, value: &str) -> Interned<'arena, str> {
126+
let mut map = self.map.borrow_mut();
127+
if let Some(interned) = map.get(value) {
128+
return Interned(interned);
129+
}
130+
let v = self.alloc.alloc_str(value);
131+
map.insert(v);
132+
Interned(v)
133+
}
134+
}

common/interner_test.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Part of the Crubit project, under the Apache License v2.0 with LLVM
2+
// Exceptions. See /LICENSE for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
use bumpalo::Bump;
6+
use googletest::{expect_eq, expect_ne, gtest};
7+
use interner::Interner;
8+
9+
#[gtest]
10+
fn test_intern_does_not_change_value() {
11+
let bump = Bump::default();
12+
let interner = Interner::new(&bump);
13+
let values = [2, 4, 6, 0, 1, 2];
14+
for value in values {
15+
let interned = interner.intern(value);
16+
expect_eq!(**interned, value);
17+
}
18+
}
19+
20+
#[gtest]
21+
fn test_multiple_intern_calls_with_same_value_return_same_reference() {
22+
let bump = Bump::default();
23+
let interner = Interner::new(&bump);
24+
let entries = [1, 2, 3, 4, 5];
25+
let interned = entries.map(|v| interner.intern(v));
26+
let interned_again = entries.map(|v| interner.intern(v));
27+
for (first, second) in interned.iter().zip(interned_again.iter()) {
28+
expect_eq!(**first as *const i32, **second as *const i32);
29+
expect_eq!(first, second);
30+
}
31+
}
32+
33+
#[gtest]
34+
fn test_intern_with_different_values_are_not_equal() {
35+
let bump = Bump::default();
36+
let interner = Interner::new(&bump);
37+
let first = interner.intern(1);
38+
let second = interner.intern(2);
39+
expect_ne!(first, second);
40+
}
41+
42+
#[gtest]
43+
fn test_multiple_intern_calls_with_same_slice_value_return_same_reference() {
44+
let bump = Bump::default();
45+
let interner = Interner::<[i32]>::new(&bump);
46+
47+
let data = [1, 2, 3, 1, 2];
48+
expect_eq!(interner.intern_slice(&data[0..2]), interner.intern_slice(&data[3..5]));
49+
}
50+
51+
#[gtest]
52+
fn test_intern_slice_with_same_data_but_different_length_returns_different_reference() {
53+
let bump = Bump::default();
54+
let interner = Interner::<[i32]>::new(&bump);
55+
56+
let data = [1, 2, 3, 1, 2];
57+
expect_ne!(interner.intern_slice(&data[0..2]), interner.intern_slice(&data[0..3]));
58+
}
59+
60+
#[gtest]
61+
fn test_multiple_intern_calls_with_same_str_return_same_reference() {
62+
let bump = Bump::default();
63+
let interner = Interner::<str>::new(&bump);
64+
let matches: Vec<&str> = "aaaabcxxxabcyyyabcdef".matches("abc").collect();
65+
for match_ in matches {
66+
let interned = interner.intern_str(match_);
67+
expect_eq!(interner.intern_str("abc"), interned);
68+
}
69+
}

0 commit comments

Comments
 (0)