@@ -4,6 +4,7 @@ use regex::{Matches, Regex};
44use std:: collections:: HashSet ;
55use std:: sync:: LazyLock ;
66use std:: { collections:: HashMap , sync:: Arc } ;
7+ use unicase:: UniCase ;
78
89use crate :: base:: field_attrs;
910use crate :: { fields_value, ops:: sdk:: * } ;
@@ -31,10 +32,10 @@ struct LanguageConfig {
3132}
3233
3334fn add_language < ' a > (
34- output : & ' a mut HashMap < & ' static str , Arc < LanguageConfig > > ,
35+ output : & ' a mut HashMap < UniCase < & ' static str > , Arc < LanguageConfig > > ,
3536 name : & ' static str ,
3637 aliases : impl IntoIterator < Item = & ' static str > ,
37- lang_fn : tree_sitter_language :: LanguageFn ,
38+ lang_fn : impl Into < tree_sitter :: Language > ,
3839 terminal_node_kinds : impl IntoIterator < Item = & ' a str > ,
3940) {
4041 let tree_sitter_lang: tree_sitter:: Language = lang_fn. into ( ) ;
@@ -58,49 +59,143 @@ fn add_language<'a>(
5859 terminal_node_kind_ids,
5960 } ) ;
6061 for name in std:: iter:: once ( name) . chain ( aliases. into_iter ( ) ) {
61- if output. insert ( name, config. clone ( ) ) . is_some ( ) {
62+ if output. insert ( name. into ( ) , config. clone ( ) ) . is_some ( ) {
6263 panic ! ( "Language `{name}` already exists" ) ;
6364 }
6465 }
6566}
6667
67- static TREE_SITTER_LANGUAGE_BY_LANG : LazyLock < HashMap < & ' static str , Arc < LanguageConfig > > > =
68+ static TREE_SITTER_LANGUAGE_BY_LANG : LazyLock < HashMap < UniCase < & ' static str > , Arc < LanguageConfig > > > =
6869 LazyLock :: new ( || {
6970 let mut map = HashMap :: new ( ) ;
71+ add_language ( & mut map, "C" , [ ".c" ] , tree_sitter_c:: LANGUAGE , [ ] ) ;
7072 add_language (
7173 & mut map,
72- "Python" ,
73- [ "py" , "python" ] ,
74- tree_sitter_python:: LANGUAGE ,
74+ "C++" ,
75+ [ ".cpp" , ".cc" , ".cxx" , ".h" , ".hpp" , "cpp" ] ,
76+ tree_sitter_c:: LANGUAGE ,
77+ [ ] ,
78+ ) ;
79+ add_language (
80+ & mut map,
81+ "C#" ,
82+ [ ".cs" , "cs" ] ,
83+ tree_sitter_c_sharp:: LANGUAGE ,
84+ [ ] ,
85+ ) ;
86+ add_language ( & mut map, "CSS" , [ ".css" ] , tree_sitter_css:: LANGUAGE , [ ] ) ;
87+ add_language (
88+ & mut map,
89+ "Fortran" ,
90+ [ ".f" , ".f90" , ".f95" , ".f03" , "f" , "f90" , "f95" , "f03" ] ,
91+ tree_sitter_fortran:: LANGUAGE ,
92+ [ ] ,
93+ ) ;
94+ add_language (
95+ & mut map,
96+ "Go" ,
97+ [ ".go" , "golang" ] ,
98+ tree_sitter_go:: LANGUAGE ,
99+ [ ] ,
100+ ) ;
101+ add_language (
102+ & mut map,
103+ "HTML" ,
104+ [ ".html" , ".htm" ] ,
105+ tree_sitter_html:: LANGUAGE ,
75106 [ ] ,
76107 ) ;
108+ add_language ( & mut map, "Java" , [ ".java" ] , tree_sitter_java:: LANGUAGE , [ ] ) ;
77109 add_language (
78110 & mut map,
79111 "JavaScript" ,
80- [ "JS" , " js", "Javascript" , "javascript "] ,
112+ [ ". js" , "js " ] ,
81113 tree_sitter_javascript:: LANGUAGE ,
82114 [ ] ,
83115 ) ;
116+ add_language ( & mut map, "JSON" , [ ".json" ] , tree_sitter_json:: LANGUAGE , [ ] ) ;
84117 add_language (
85118 & mut map,
86- "TypeScript" ,
87- [ "TS" , "ts" , "Typescript" , "typescript" ] ,
88- tree_sitter_typescript:: LANGUAGE_TYPESCRIPT ,
119+ "Markdown" ,
120+ [ ".md" , "md" ] ,
121+ tree_sitter_md:: LANGUAGE ,
122+ [ "inline" ] ,
123+ ) ;
124+ add_language (
125+ & mut map,
126+ "Pascal" ,
127+ [ ".pas" , "pas" , ".dpr" , "dpr" , "Delphi" ] ,
128+ tree_sitter_pascal:: LANGUAGE ,
129+ [ ] ,
130+ ) ;
131+ add_language ( & mut map, "PHP" , [ ".php" ] , tree_sitter_php:: LANGUAGE_PHP , [ ] ) ;
132+ add_language (
133+ & mut map,
134+ "Python" ,
135+ [ ".py" ] ,
136+ tree_sitter_python:: LANGUAGE ,
137+ [ ] ,
138+ ) ;
139+ add_language ( & mut map, "R" , [ ".r" ] , tree_sitter_r:: LANGUAGE , [ ] ) ;
140+ add_language ( & mut map, "Ruby" , [ ".rb" ] , tree_sitter_ruby:: LANGUAGE , [ ] ) ;
141+ add_language (
142+ & mut map,
143+ "Rust" ,
144+ [ ".rs" , "rs" ] ,
145+ tree_sitter_rust:: LANGUAGE ,
146+ [ ] ,
147+ ) ;
148+ add_language (
149+ & mut map,
150+ "Scala" ,
151+ [ ".scala" ] ,
152+ tree_sitter_scala:: LANGUAGE ,
153+ [ ] ,
154+ ) ;
155+ add_language (
156+ & mut map,
157+ "SCSS" ,
158+ [ ".scss" ] ,
159+ tree_sitter_scss:: language ( ) ,
160+ [ ] ,
161+ ) ;
162+ add_language ( & mut map, "SQL" , [ ".sql" ] , tree_sitter_sequel:: LANGUAGE , [ ] ) ;
163+ add_language (
164+ & mut map,
165+ "Swift" ,
166+ [ ".swift" ] ,
167+ tree_sitter_swift:: LANGUAGE ,
168+ [ ] ,
169+ ) ;
170+ add_language (
171+ & mut map,
172+ "TOML" ,
173+ [ ".toml" ] ,
174+ tree_sitter_toml_ng:: LANGUAGE ,
89175 [ ] ,
90176 ) ;
91177 add_language (
92178 & mut map,
93179 "TSX" ,
94- [ "tsx" ] ,
180+ [ ". tsx" ] ,
95181 tree_sitter_typescript:: LANGUAGE_TSX ,
96182 [ ] ,
97183 ) ;
98184 add_language (
99185 & mut map,
100- "Markdown" ,
101- [ "md" , "markdown" ] ,
102- tree_sitter_md:: LANGUAGE . into ( ) ,
103- [ "inline" ] ,
186+ "TypeScript" ,
187+ [ ".ts" , "ts" ] ,
188+ tree_sitter_typescript:: LANGUAGE_TYPESCRIPT ,
189+ [ ] ,
190+ ) ;
191+ add_language ( & mut map, "XML" , [ ".xml" ] , tree_sitter_xml:: LANGUAGE_XML , [ ] ) ;
192+ add_language ( & mut map, "DTD" , [ ".dtd" ] , tree_sitter_xml:: LANGUAGE_DTD , [ ] ) ;
193+ add_language (
194+ & mut map,
195+ "YAML" ,
196+ [ ".yaml" , ".yml" ] ,
197+ tree_sitter_yaml:: LANGUAGE ,
198+ [ ] ,
104199 ) ;
105200 map
106201 } ) ;
@@ -416,7 +511,7 @@ impl SimpleFunctionExecutor for Executor {
416511 . optional ( )
417512 . map ( |v| anyhow:: Ok ( v. as_str ( ) ?. as_ref ( ) ) )
418513 . transpose ( ) ?
419- . and_then ( |lang| TREE_SITTER_LANGUAGE_BY_LANG . get ( lang) )
514+ . and_then ( |lang| TREE_SITTER_LANGUAGE_BY_LANG . get ( & UniCase :: new ( lang) ) )
420515 } ;
421516
422517 let recursive_chunker = RecursiveChunker {
0 commit comments