Skip to content

Commit 3309d3f

Browse files
committed
[oxide] Expose experimental Rust parser setup (#11116)
* make `sequential` and `parallel` version of a new (tmp) `parse_candidate_strings` * use bitmasks for the strategy Only sending a number over the wire instead of a serialized objects. * use cleaner match syntax
1 parent 225a3fa commit 3309d3f

File tree

2 files changed

+128
-16
lines changed

2 files changed

+128
-16
lines changed

oxide/crates/core/src/lib.rs

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::parser::Extractor;
2+
use fxhash::FxHashSet;
23
use rayon::prelude::*;
34
use std::path::PathBuf;
45
use tracing::event;
@@ -11,14 +12,7 @@ pub mod parser;
1112
pub mod utility;
1213
pub mod variant;
1314

14-
#[derive(Debug, Clone)]
15-
pub struct ChangedContent {
16-
pub file: Option<PathBuf>,
17-
pub content: Option<String>,
18-
pub extension: String,
19-
}
20-
21-
pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
15+
fn init_tracing() {
2216
if matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || value.eq("1") || value.eq("true") || value.contains("tailwind"))
2317
{
2418
tracing_subscriber::fmt()
@@ -27,10 +21,63 @@ pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>)
2721
.compact()
2822
.init();
2923
}
24+
}
25+
26+
#[derive(Debug, Clone)]
27+
pub struct ChangedContent {
28+
pub file: Option<PathBuf>,
29+
pub content: Option<String>,
30+
pub extension: String,
31+
}
32+
33+
#[derive(Debug)]
34+
pub enum IO {
35+
Sequential = 0b0001,
36+
Parallel = 0b0010,
37+
}
3038

39+
impl From<u8> for IO {
40+
fn from(item: u8) -> Self {
41+
match item & 0b0011 {
42+
0b0001 => IO::Sequential,
43+
0b0010 => IO::Parallel,
44+
_ => unimplemented!("Unknown 'IO' strategy"),
45+
}
46+
}
47+
}
48+
49+
#[derive(Debug)]
50+
pub enum Parsing {
51+
Sequential = 0b0100,
52+
Parallel = 0b1000,
53+
}
54+
55+
impl From<u8> for Parsing {
56+
fn from(item: u8) -> Self {
57+
match item & 0b1100 {
58+
0b0100 => Parsing::Sequential,
59+
0b1000 => Parsing::Parallel,
60+
_ => unimplemented!("Unknown 'Parsing' strategy"),
61+
}
62+
}
63+
}
64+
65+
pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
66+
init_tracing();
3167
parse_all_blobs(read_all_files(changed_content))
3268
}
3369

70+
pub fn parse_candidate_strings(input: Vec<ChangedContent>, options: u8) -> Vec<String> {
71+
init_tracing();
72+
73+
match (IO::from(options), Parsing::from(options)) {
74+
(IO::Sequential, Parsing::Sequential) => parse_all_blobs_sync(read_all_files_sync(input)),
75+
(IO::Sequential, Parsing::Parallel) => parse_all_blobs_sync(read_all_files(input)),
76+
(IO::Parallel, Parsing::Sequential) => parse_all_blobs(read_all_files_sync(input)),
77+
(IO::Parallel, Parsing::Parallel) => parse_all_blobs(read_all_files(input)),
78+
}
79+
}
80+
3481
#[tracing::instrument(skip(changed_content))]
3582
fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
3683
event!(
@@ -49,6 +96,24 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
4996
.collect()
5097
}
5198

99+
#[tracing::instrument(skip(changed_content))]
100+
fn read_all_files_sync(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
101+
event!(
102+
tracing::Level::INFO,
103+
"Reading {:?} file(s)",
104+
changed_content.len()
105+
);
106+
107+
changed_content
108+
.into_iter()
109+
.map(|c| match (c.file, c.content) {
110+
(Some(file), None) => std::fs::read(file).unwrap(),
111+
(None, Some(content)) => content.into_bytes(),
112+
_ => Default::default(),
113+
})
114+
.collect()
115+
}
116+
52117
#[tracing::instrument(skip(blobs))]
53118
fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
54119
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
@@ -72,3 +137,27 @@ fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
72137
result.sort();
73138
result
74139
}
140+
141+
#[tracing::instrument(skip(blobs))]
142+
fn parse_all_blobs_sync(blobs: Vec<Vec<u8>>) -> Vec<String> {
143+
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
144+
let input = &input[..];
145+
146+
let mut result: Vec<String> = input
147+
.iter()
148+
.map(|input| Extractor::unique(input, Default::default()))
149+
.fold(FxHashSet::default(), |mut a, b| {
150+
a.extend(b);
151+
a
152+
})
153+
.into_iter()
154+
.map(|s| {
155+
// SAFETY: When we parsed the candidates, we already guaranteed that the byte slices
156+
// are valid, therefore we don't have to re-check here when we want to convert it back
157+
// to a string.
158+
unsafe { String::from_utf8_unchecked(s.to_vec()) }
159+
})
160+
.collect();
161+
result.sort();
162+
result
163+
}

oxide/crates/node/src/lib.rs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use napi::bindgen_prelude::ToNapiValue;
12
use std::path::PathBuf;
23

34
#[macro_use]
@@ -11,16 +12,38 @@ pub struct ChangedContent {
1112
pub extension: String,
1213
}
1314

15+
impl From<ChangedContent> for tailwindcss_core::ChangedContent {
16+
fn from(changed_content: ChangedContent) -> Self {
17+
tailwindcss_core::ChangedContent {
18+
file: changed_content.file.map(PathBuf::from),
19+
content: changed_content.content,
20+
extension: changed_content.extension,
21+
}
22+
}
23+
}
24+
1425
#[napi]
1526
pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>) -> Vec<String> {
1627
tailwindcss_core::parse_candidate_strings_from_files(
17-
changed_content
18-
.into_iter()
19-
.map(|changed_content| tailwindcss_core::ChangedContent {
20-
file: changed_content.file.map(PathBuf::from),
21-
content: changed_content.content,
22-
extension: changed_content.extension,
23-
})
24-
.collect(),
28+
changed_content.into_iter().map(Into::into).collect(),
2529
)
2630
}
31+
32+
#[derive(Debug)]
33+
#[napi]
34+
pub enum IO {
35+
Sequential = 0b0001,
36+
Parallel = 0b0010,
37+
}
38+
39+
#[derive(Debug)]
40+
#[napi]
41+
pub enum Parsing {
42+
Sequential = 0b0100,
43+
Parallel = 0b1000,
44+
}
45+
46+
#[napi]
47+
pub fn parse_candidate_strings(input: Vec<ChangedContent>, strategy: u8) -> Vec<String> {
48+
tailwindcss_core::parse_candidate_strings(input.into_iter().map(Into::into).collect(), strategy)
49+
}

0 commit comments

Comments
 (0)