Skip to content

Commit 87ed9b9

Browse files
committed
[function-grep] Added easier way to create languages using tree sitter tags
Also added builtin support for js
1 parent db0d826 commit 87ed9b9

File tree

3 files changed

+103
-15
lines changed

3 files changed

+103
-15
lines changed

function-grep/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ rust-version = "1.75.0"
1212
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1313

1414
[features]
15-
default = ["rust", "python", "c", "ocaml", "java", "ruby", "go", "c-sharp"]
15+
default = ["rust", "python", "c", "ocaml", "java", "ruby", "go", "c-sharp", "javascript"]
1616
rust = ["dep:tree-sitter-rust"]
1717
ocaml = ["dep:tree-sitter-ocaml"]
1818
c = ["dep:tree-sitter-c"]
@@ -21,6 +21,7 @@ python = ["dep:tree-sitter-python"]
2121
ruby = ["dep:tree-sitter-ruby"]
2222
go = ["dep:tree-sitter-go"]
2323
c-sharp = ["dep:tree-sitter-c-sharp"]
24+
javascript = ["dep:tree-sitter-javascript"]
2425

2526
[dependencies]
2627
tree-sitter = ">=0.23.0"
@@ -31,6 +32,7 @@ tree-sitter-python = { version = "0.23.2", optional = true }
3132
tree-sitter-ruby = { version = "0.23.0", optional = true }
3233
tree-sitter-go = { version = "0.23.1", optional = true }
3334
tree-sitter-c-sharp = { version = "0.23.0", optional = true }
35+
tree-sitter-javascript = { version = "0.23.0", optional = true }
3436
tree-sitter-rust = { version = "0.23.0", optional = true }
3537
tree-sitter-tags = "0.23"
3638

function-grep/src/supported_languages.rs

Lines changed: 95 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
use std::ops::Deref;
1+
use std::{ops::Deref, str, sync::atomic::AtomicUsize};
22
use tree_sitter::{Language as TsLanguage, Node, Query, QueryError, Range};
3+
use tree_sitter_tags::{Tag, TagsConfiguration, TagsContext};
34
// TODO: better api less boxing and more results
45
// TODO: better way to do variable assigned to function or just abondon it? (the problem is with
56
// languages that allow mutliple assignments how do you match up only the identifiers that
@@ -36,6 +37,12 @@ pub struct LanguageInformation {
3637
language: TsLanguage,
3738
}
3839
#[allow(missing_debug_implementations)]
40+
// TODO: find cleaner (no double parse) way to use this, "fork" tree sitter tags, or make your own
41+
// standard
42+
pub trait TreeSitterTags: Assoc<Type = Tags> + HasLanguageInformation {
43+
fn tag_query(&self) -> impl ToString;
44+
}
45+
#[allow(missing_debug_implementations)]
3946
pub trait TreeSitterQuery: Assoc<Type = TreeSitter> + HasLanguageInformation {
4047
// TODO: type saftey for query
4148
/// Given an identifier(name)
@@ -77,6 +84,8 @@ pub struct Identifier;
7784
// TODO: hide in docs?
7885
#[allow(missing_debug_implementations)]
7986
pub struct TreeSitter;
87+
#[allow(missing_debug_implementations)]
88+
pub struct Tags;
8089
// TODO: hide in docs?
8190
trait InstantiateHelper<Type> {
8291
fn instantiate(&self, search: Box<str>) -> QueryFunction;
@@ -87,10 +96,7 @@ pub trait Assoc {
8796
type Type;
8897
}
8998
impl<T: IdentifierQuery> InstantiateHelper<Identifier> for T {
90-
fn instantiate(
91-
&self,
92-
search: Box<str>,
93-
) -> Box<dyn for<'x, 'y> Fn(Node<'x>, &'y [u8]) -> Box<[Range]> + Send + Sync> {
99+
fn instantiate(&self, search: Box<str>) -> QueryFunction {
94100
let query = Query::new(&self.language(), &self.query_string().to_string()).unwrap();
95101
let method_field = query
96102
.capture_index_for_name(&self.query_name().to_string())
@@ -112,10 +118,7 @@ impl<T: IdentifierQuery> InstantiateHelper<Identifier> for T {
112118
}
113119
}
114120
impl<T: TreeSitterQuery> InstantiateHelper<TreeSitter> for T {
115-
fn instantiate(
116-
&self,
117-
search: Box<str>,
118-
) -> Box<dyn for<'x, 'y> Fn(Node<'x>, &'y [u8]) -> Box<[Range]> + Send + Sync> {
121+
fn instantiate(&self, search: Box<str>) -> QueryFunction {
119122
let query = Query::new(
120123
&self.language(),
121124
&self.query_string_function(search.as_ref()),
@@ -124,17 +127,64 @@ impl<T: TreeSitterQuery> InstantiateHelper<TreeSitter> for T {
124127
Box::new(move |node, code| {
125128
let mut query_cursor = tree_sitter::QueryCursor::new();
126129
let matches = query_cursor.matches(&query, node, code);
130+
127131
let ranges = matches.map(|m| m.captures[0].node.range());
128132
ranges.collect()
129133
})
130134
}
131135
}
132-
136+
struct TagsConfigurationThreadSafe(TagsConfiguration);
137+
unsafe impl Send for TagsConfigurationThreadSafe {}
138+
unsafe impl Sync for TagsConfigurationThreadSafe {}
139+
impl TagsConfigurationThreadSafe {
140+
pub fn generate_tags<'a>(
141+
&'a self,
142+
context: &'a mut TagsContext,
143+
source: &'a [u8],
144+
cancellation_flag: Option<&'a AtomicUsize>,
145+
) -> Result<
146+
(
147+
impl Iterator<Item = Result<Tag, tree_sitter_tags::Error>> + 'a,
148+
bool,
149+
),
150+
tree_sitter_tags::Error,
151+
> {
152+
context.generate_tags(&self.0, source, cancellation_flag)
153+
}
154+
pub fn syntax_type_name(&self, id: u32) -> &str {
155+
self.0.syntax_type_name(id)
156+
}
157+
}
158+
impl<T: TreeSitterTags> InstantiateHelper<Tags> for T {
159+
fn instantiate(&self, search: Box<str>) -> QueryFunction {
160+
let tag_config = TagsConfigurationThreadSafe(
161+
TagsConfiguration::new(self.language(), &self.tag_query().to_string(), "").unwrap(),
162+
);
163+
Box::new(move |node, code| {
164+
let mut tag_context = TagsContext::new();
165+
let name = &*search;
166+
// TODO: don't double parse
167+
let tags = tag_config
168+
.generate_tags(&mut tag_context, code, None)
169+
.unwrap()
170+
.0;
171+
let ranges = tags
172+
.filter_map(Result::ok)
173+
.filter(|tag| {
174+
["method", "function"]
175+
.contains(&tag_config.syntax_type_name(tag.syntax_type_id))
176+
&& str::from_utf8(&code[tag.name_range.clone()]).unwrap_or("") == name
177+
})
178+
.filter_map(|tag| {
179+
node.descendant_for_byte_range(tag.range.start, tag.range.end)
180+
.map(|node| node.range())
181+
});
182+
ranges.collect()
183+
})
184+
}
185+
}
133186
impl<T: Assoc + InstantiateHelper<T::Type> + HasLanguageInformation> SupportedLanguage for T {
134-
fn instantiate(
135-
&self,
136-
search: Box<str>,
137-
) -> Box<dyn for<'x, 'y> Fn(Node<'x>, &'y [u8]) -> Box<[Range]> + Send + Sync> {
187+
fn instantiate(&self, search: Box<str>) -> QueryFunction {
138188
self.instantiate(search)
139189
}
140190
}
@@ -257,6 +307,33 @@ macro_rules! construct_language {
257307
}
258308
}
259309

310+
};
311+
($name:ident($tslang:expr).[$($ext:ident)+]?=$tags:expr ) => {
312+
#[derive(Debug, Clone, Copy)]
313+
pub struct $name;
314+
impl $crate::supported_languages::HasLanguageInformation for $name {
315+
316+
fn language_name(&self) -> &'static str {
317+
stringify!($name)
318+
}
319+
320+
fn file_exts(&self) -> &'static [&'static str] {
321+
&[$(stringify!($ext)),+]
322+
}
323+
324+
fn language(&self) -> tree_sitter::Language {
325+
$tslang.into()
326+
}
327+
}
328+
impl $crate::supported_languages::Assoc for $name {
329+
type Type = $crate::supported_languages::Tags;
330+
}
331+
impl $crate::supported_languages::TreeSitterTags for $name {
332+
fn tag_query(&self) -> impl ToString {
333+
$tags
334+
}
335+
}
336+
260337
};
261338
($name:ident($tslang:expr).[$($ext:ident)+]?=$query_name:ident->$query:literal ) => {
262339
#[derive(Debug, Clone, Copy)]
@@ -410,6 +487,8 @@ construct_language!(OCaml(tree_sitter_ocaml::language_ocaml()).[ml]?="method-nam
410487
@method-defintion
411488
)");
412489

490+
#[cfg(feature = "javascript")]
491+
construct_language!(JavaScript(tree_sitter_javascript::LANGUAGE).[js]?=tree_sitter_javascript::TAGS_QUERY);
413492
#[must_use]
414493
/// Use this to obtain some defualt languages (what languages are presend depend of the features
415494
/// you allow).
@@ -431,5 +510,7 @@ pub fn predefined_languages() -> &'static [&'static dyn SupportedLanguage] {
431510
&Go,
432511
#[cfg(feature = "ruby")]
433512
&Ruby,
513+
#[cfg(feature = "javascript")]
514+
&JavaScript,
434515
]
435516
}

function-grep/tests/test.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
function name(params) {
2+
}
3+
4+
function name(params) {
5+
}

0 commit comments

Comments
 (0)