Skip to content

Commit 254a16d

Browse files
authored
Get wasm-pack supposedly working (#16)
1 parent 28b35d2 commit 254a16d

File tree

18 files changed

+396
-10
lines changed

18 files changed

+396
-10
lines changed

Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,10 @@ unnested_or_patterns = "warn"
109109
unreadable_literal = "warn"
110110
verbose_bit_mask = "warn"
111111
verbose_file_reads = "warn"
112+
113+
# Profiles must be set at the workspace level
114+
[profile.dev]
115+
# Tell `rustc` to optimize for small code size to
116+
# work around "too many locals" error from wasm-pack
117+
# https://github.com/wasm-bindgen/wasm-bindgen/issues/3451#issuecomment-1562982835
118+
opt-level = "s"

crates/tree-sitter-qmd/tree-sitter-markdown/src/scanner.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#include <assert.h>
33
#include <ctype.h>
44
#include <string.h>
5-
#include <wchar.h>
65
#include <wctype.h>
76

87
// For explanation of the tokens see grammar.js
@@ -200,7 +199,7 @@ static size_t roundup_32(size_t x) {
200199
}
201200

202201
typedef struct {
203-
unsigned own_size;
202+
unsigned own_size;
204203
// Size of the serialized state of the scanner.
205204
// This is used to determine if we're too close to hitting
206205
// tree-sitter's maximum serialized size limit of 1024 bytes,
@@ -452,7 +451,7 @@ static bool parse_fenced_div_marker(Scanner *s, TSLexer *lexer,
452451
//
453452
// otherwise, it can only be a valid marker for the end of a fenced div
454453

455-
while (!lexer->eof(lexer) &&
454+
while (!lexer->eof(lexer) &&
456455
(lexer->lookahead == ' ' || lexer->lookahead == '\t')) {
457456
advance(s, lexer);
458457
}
@@ -831,7 +830,7 @@ static bool parse_ordered_list_marker(Scanner *s, TSLexer *lexer,
831830
size_t digits = 1;
832831
bool dont_interrupt = lexer->lookahead != '1';
833832
advance(s, lexer);
834-
while (isdigit(lexer->lookahead)) {
833+
while (iswdigit(lexer->lookahead)) {
835834
dont_interrupt = true;
836835
digits++;
837836
advance(s, lexer);
@@ -1145,7 +1144,8 @@ static bool parse_html_block(Scanner *s, TSLexer *lexer,
11451144
if (next_symbol_valid) {
11461145
// try block 1 names
11471146
for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_1; i++) {
1148-
if (strcmp(name, HTML_TAG_NAMES_RULE_1[i]) == 0) {
1147+
// FIXME: I'm guessing on the size here
1148+
if (strncmp(name, HTML_TAG_NAMES_RULE_1[i], name_length) == 0) {
11491149
if (starting_slash) {
11501150
if (valid_symbols[HTML_BLOCK_1_END]) {
11511151
lexer->result_symbol = HTML_BLOCK_1_END;
@@ -1174,7 +1174,8 @@ static bool parse_html_block(Scanner *s, TSLexer *lexer,
11741174
if (next_symbol_valid || tag_closed) {
11751175
// try block 2 names
11761176
for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_7; i++) {
1177-
if (strcmp(name, HTML_TAG_NAMES_RULE_7[i]) == 0 &&
1177+
// FIXME: I'm guessing on the size here
1178+
if (strncmp(name, HTML_TAG_NAMES_RULE_7[i], name_length) == 0 &&
11781179
valid_symbols[HTML_BLOCK_6_START]) {
11791180
lexer->result_symbol = HTML_BLOCK_6_START;
11801181
if (!s->simulate) {

crates/wasm-qmd-parser/Cargo.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,3 @@ console_error_panic_hook = { version = "0.1.7", optional = true }
2222

2323
[dev-dependencies]
2424
wasm-bindgen-test = "0.3.34"
25-
26-
[profile.release]
27-
# Tell `rustc` to optimize for small code size.
28-
opt-level = "s"
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
use std::{
2+
alloc::{self, Layout},
3+
ffi::{c_char, c_int, c_void},
4+
mem::align_of,
5+
ptr,
6+
};
7+
8+
/* -------------------------------- stdlib.h -------------------------------- */
9+
10+
#[no_mangle]
11+
pub unsafe extern "C" fn abort() {
12+
panic!("Aborted from C");
13+
}
14+
15+
#[no_mangle]
16+
pub unsafe extern "C" fn malloc(size: usize) -> *mut c_void {
17+
if size == 0 {
18+
return ptr::null_mut();
19+
}
20+
21+
let (layout, offset_to_data) = layout_for_size_prepended(size);
22+
let buf = alloc::alloc(layout);
23+
store_layout(buf, layout, offset_to_data)
24+
}
25+
26+
#[no_mangle]
27+
pub unsafe extern "C" fn calloc(count: usize, size: usize) -> *mut c_void {
28+
if count == 0 || size == 0 {
29+
return ptr::null_mut();
30+
}
31+
32+
let (layout, offset_to_data) = layout_for_size_prepended(size * count);
33+
let buf = alloc::alloc_zeroed(layout);
34+
store_layout(buf, layout, offset_to_data)
35+
}
36+
37+
#[no_mangle]
38+
pub unsafe extern "C" fn realloc(buf: *mut c_void, new_size: usize) -> *mut c_void {
39+
if buf.is_null() {
40+
malloc(new_size)
41+
} else if new_size == 0 {
42+
free(buf);
43+
ptr::null_mut()
44+
} else {
45+
let (old_buf, old_layout) = retrieve_layout(buf);
46+
let (new_layout, offset_to_data) = layout_for_size_prepended(new_size);
47+
let new_buf = alloc::realloc(old_buf, old_layout, new_layout.size());
48+
store_layout(new_buf, new_layout, offset_to_data)
49+
}
50+
}
51+
52+
#[no_mangle]
53+
pub unsafe extern "C" fn free(buf: *mut c_void) {
54+
if buf.is_null() {
55+
return;
56+
}
57+
let (buf, layout) = retrieve_layout(buf);
58+
alloc::dealloc(buf, layout);
59+
}
60+
61+
// In all these allocations, we store the layout before the data for later retrieval.
62+
// This is because we need to know the layout when deallocating the memory.
63+
// Here are some helper methods for that:
64+
65+
/// Given a pointer to the data, retrieve the layout and the pointer to the layout.
66+
unsafe fn retrieve_layout(buf: *mut c_void) -> (*mut u8, Layout) {
67+
let (_, layout_offset) = Layout::new::<Layout>()
68+
.extend(Layout::from_size_align(0, align_of::<*const u8>() * 2).unwrap())
69+
.unwrap();
70+
71+
let buf = (buf as *mut u8).offset(-(layout_offset as isize));
72+
let layout = *(buf as *mut Layout);
73+
74+
(buf, layout)
75+
}
76+
77+
/// Calculate a layout for a given size with space for storing a layout at the start.
78+
/// Returns the layout and the offset to the data.
79+
fn layout_for_size_prepended(size: usize) -> (Layout, usize) {
80+
Layout::new::<Layout>()
81+
.extend(Layout::from_size_align(size, align_of::<*const u8>() * 2).unwrap())
82+
.unwrap()
83+
}
84+
85+
/// Store a layout in the pointer, returning a pointer to where the data should be stored.
86+
unsafe fn store_layout(buf: *mut u8, layout: Layout, offset_to_data: usize) -> *mut c_void {
87+
*(buf as *mut Layout) = layout;
88+
(buf as *mut u8).offset(offset_to_data as isize) as *mut c_void
89+
}
90+
91+
/* -------------------------------- string.h -------------------------------- */
92+
93+
#[no_mangle]
94+
pub unsafe extern "C" fn memcpy(dest: *mut c_void, src: *const c_void, size: usize) -> *mut c_void {
95+
std::ptr::copy_nonoverlapping(src, dest, size);
96+
dest
97+
}
98+
99+
#[no_mangle]
100+
pub unsafe extern "C" fn memmove(
101+
dest: *mut c_void,
102+
src: *const c_void,
103+
size: usize,
104+
) -> *mut c_void {
105+
std::ptr::copy(src, dest, size);
106+
dest
107+
}
108+
109+
#[no_mangle]
110+
pub unsafe extern "C" fn memset(s: *mut c_void, c: i32, n: usize) -> *mut c_void {
111+
let slice = std::slice::from_raw_parts_mut(s as *mut u8, n);
112+
slice.fill(c as u8);
113+
s
114+
}
115+
116+
#[no_mangle]
117+
pub unsafe extern "C" fn memcmp(ptr1: *const c_void, ptr2: *const c_void, n: usize) -> c_int {
118+
let s1 = std::slice::from_raw_parts(ptr1 as *const u8, n);
119+
let s2 = std::slice::from_raw_parts(ptr2 as *const u8, n);
120+
121+
for (a, b) in s1.iter().zip(s2.iter()) {
122+
if *a != *b {
123+
return (*a as i32) - (*b as i32);
124+
}
125+
}
126+
127+
0
128+
}
129+
130+
#[no_mangle]
131+
pub unsafe extern "C" fn strncmp(ptr1: *const c_void, ptr2: *const c_void, n: usize) -> c_int {
132+
let s1 = std::slice::from_raw_parts(ptr1 as *const u8, n);
133+
let s2 = std::slice::from_raw_parts(ptr2 as *const u8, n);
134+
135+
for (a, b) in s1.iter().zip(s2.iter()) {
136+
if *a != *b || *a == 0 {
137+
return (*a as i32) - (*b as i32);
138+
}
139+
}
140+
141+
0
142+
}
143+
144+
/* -------------------------------- wctype.h -------------------------------- */
145+
146+
#[no_mangle]
147+
pub unsafe extern "C" fn iswspace(c: c_int) -> bool {
148+
char::from_u32(c as u32).map_or(false, |c| c.is_whitespace())
149+
}
150+
151+
#[no_mangle]
152+
pub unsafe extern "C" fn iswalnum(c: c_int) -> bool {
153+
char::from_u32(c as u32).map_or(false, |c| c.is_alphanumeric())
154+
}
155+
156+
#[no_mangle]
157+
pub unsafe extern "C" fn iswdigit(c: c_int) -> bool {
158+
char::from_u32(c as u32).map_or(false, |c| c.is_digit(10))
159+
}
160+
161+
#[no_mangle]
162+
pub unsafe extern "C" fn iswalpha(c: c_int) -> bool {
163+
char::from_u32(c as u32).map_or(false, |c| c.is_alphabetic())
164+
}
165+
166+
// Note: Not provided by https://github.com/cacticouncil/lilypad, but we needed
167+
// this one too. We could contribute this back upstream? Note that
168+
// `towlower()`'s C function docs say it is only guaranteed to work in 1:1
169+
// mapping cases, so that is what we reimplement here as well.
170+
// https://en.cppreference.com/w/c/string/wide/towlower
171+
#[no_mangle]
172+
pub unsafe extern "C" fn towlower(c: c_int) -> c_int {
173+
char::from_u32(c as u32).map_or(0, |c| {
174+
c.to_lowercase().next().map(|c| c as i32).unwrap_or(0)
175+
})
176+
}
177+
178+
/* --------------------------------- time.h --------------------------------- */
179+
180+
#[no_mangle]
181+
pub unsafe extern "C" fn clock() -> u64 {
182+
panic!("clock is not supported");
183+
}
184+
185+
/* --------------------------------- ctype.h -------------------------------- */
186+
187+
#[no_mangle]
188+
pub unsafe extern "C" fn isprint(c: c_int) -> bool {
189+
c >= 32 && c <= 126
190+
}
191+
192+
/* --------------------------------- stdio.h -------------------------------- */
193+
194+
#[no_mangle]
195+
pub unsafe extern "C" fn fprintf(_file: *mut c_void, _format: *const c_void, _args: ...) -> c_int {
196+
panic!("fprintf is not supported");
197+
}
198+
199+
#[no_mangle]
200+
pub unsafe extern "C" fn fputs(_s: *const c_void, _file: *mut c_void) -> c_int {
201+
panic!("fputs is not supported");
202+
}
203+
204+
#[no_mangle]
205+
pub unsafe extern "C" fn fputc(_c: c_int, _file: *mut c_void) -> c_int {
206+
panic!("fputc is not supported");
207+
}
208+
209+
#[no_mangle]
210+
pub unsafe extern "C" fn fdopen(_fd: c_int, _mode: *const c_void) -> *mut c_void {
211+
panic!("fdopen is not supported");
212+
}
213+
214+
#[no_mangle]
215+
pub unsafe extern "C" fn fclose(_file: *mut c_void) -> c_int {
216+
panic!("fclose is not supported");
217+
}
218+
219+
#[no_mangle]
220+
pub unsafe extern "C" fn fwrite(
221+
_ptr: *const c_void,
222+
_size: usize,
223+
_nmemb: usize,
224+
_stream: *mut c_void,
225+
) -> usize {
226+
panic!("fwrite is not supported");
227+
}
228+
229+
#[no_mangle]
230+
pub unsafe extern "C" fn vsnprintf(
231+
_buf: *mut c_char,
232+
_size: usize,
233+
_format: *const c_char,
234+
_args: ...
235+
) -> c_int {
236+
panic!("vsnprintf is not supported");
237+
}

crates/wasm-qmd-parser/src/lib.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
// For `vsnprintf()` and `fprintf()`, which are variadic.
2+
// Otherwise rustc yells at us that we need to enable this.
3+
#![feature(c_variadic)]
4+
5+
// Provide rust implementation of blessed stdlib functions to
6+
// tree-sitter itself and any grammars that have `scanner.c`.
7+
// Here is the list blessed for `scanner.c` usage:
8+
// https://github.com/tree-sitter/tree-sitter/blob/master/lib/src/wasm/stdlib-symbols.txt
9+
// But note that we need a few extra for tree-sitter itself.
10+
#[cfg(target_arch = "wasm32")]
11+
pub mod c_shim;
12+
113
mod utils;
214

315
use std::io;
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#pragma once
2+
3+
#define assert(ignore) ((void)0)
4+
#define static_assert(cnd, msg) assert(cnd && msg)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#pragma once
2+
3+
int isprint(int c);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#pragma once
2+
#include <stdint.h>
3+
4+
#define TS_LITTLE_ENDIAN 1
5+
6+
static inline uint16_t le16toh(uint16_t x) { return x; }
7+
static inline uint16_t be16toh(uint16_t x)
8+
{
9+
#if defined(__GNUC__) || defined(__clang__)
10+
return __builtin_bswap16(x);
11+
#else
12+
return (x << 8) | (x >> 8);
13+
#endif
14+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#pragma once
2+
3+
#define PRId32 "d"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
#define bool _Bool
4+
#define true 1
5+
#define false 0

0 commit comments

Comments
 (0)