Skip to content

Commit b0cd307

Browse files
authored
feat: improve support of SQL, PLSQL and others (#281)
1 parent 469297e commit b0cd307

File tree

2 files changed

+93
-6
lines changed

2 files changed

+93
-6
lines changed

src/main/kotlin/app/extractors/Heuristics.kt

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ val ObjectiveCRegex = Regex(
2929
)
3030
var Perl5Regex = Regex("\\buse\\s+(?:strict\\b|v?5\\.)")
3131
var Perl6Regex = Regex("^\\s*(?:use\\s+v6\\b|\\bmodule\\b|\\b(?:my\\s+)?class\\b)")
32+
33+
var PLSQLRegexs = setOf(
34+
Regex("\\\$\\\$PLSQL_|XMLTYPE|sysdate|systimestamp|\\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)", RegexOption.IGNORE_CASE),
35+
Regex("constructor\\W+function", RegexOption.IGNORE_CASE)
36+
)
37+
var NotSQLRegex = Regex("begin|boolean|package|exception", RegexOption.IGNORE_CASE)
38+
3239
var PythonRegex = Regex("(^(import|from|class|def)\\s)", RegexOption.MULTILINE)
3340
var RustRegex = Regex("^(use |fn |mod |pub |macro_rules|impl|#!?\\[)")
3441
var RenderScriptRegex = Regex("#include|#pragma\\s+(rs|version)|__attribute__")
@@ -73,6 +80,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
7380
"bat" to { _ ->
7481
CommonExtractor("dosbatch")
7582
},
83+
"bdy" to { _ ->
84+
CommonExtractor("plsql")
85+
},
7686
"c" to { _ ->
7787
CExtractor()
7888
},
@@ -115,6 +125,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
115125
"cpy" to { _ ->
116126
CommonExtractor("cobol")
117127
},
128+
"cql" to { _ ->
129+
CommonExtractor("sql")
130+
},
118131
"cs" to { lines ->
119132
val buf = toBuf(lines)
120133
if (Regex("![\\w\\s]+methodsFor: ").matches(buf)) CommonExtractor("smalltalk")
@@ -143,6 +156,17 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
143156
else if (MakefileRegex.matches(buf)) CommonExtractor("makefile")
144157
else null
145158
},
159+
"db2" to { _ ->
160+
CommonExtractor("sqlpl")
161+
},
162+
"ddl" to { lines ->
163+
val buf = toBuf(lines)
164+
if (PLSQLRegexs.any { re -> re.containsMatchIn(buf)})
165+
CommonExtractor("plsql") // Oracle
166+
else if (!NotSQLRegex.containsMatchIn(buf))
167+
CommonExtractor("sql") // Generic SQL
168+
else null
169+
},
146170
"edn" to { _ ->
147171
CommonExtractor("clojure")
148172
},
@@ -189,6 +213,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
189213
"factor" to { _ ->
190214
CommonExtractor("factor")
191215
},
216+
"fnc" to { _ ->
217+
CommonExtractor("plsql")
218+
},
192219
"for" to { lines ->
193220
val buf = toBuf(lines)
194221
if (ForthRegex.matches(buf)) CommonExtractor("forth")
@@ -349,6 +376,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
349376
"mm" to { _ ->
350377
ObjectiveCExtractor()
351378
},
379+
"mysql" to { _ ->
380+
CommonExtractor("sql")
381+
},
352382
"nb" to { _ ->
353383
CommonExtractor("wolframlanguage")
354384
},
@@ -364,6 +394,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
364394
"pas" to { _ ->
365395
CommonExtractor("pascal")
366396
},
397+
"pck" to { _ ->
398+
CommonExtractor("plsql")
399+
},
367400
"pde" to { _ ->
368401
CommonExtractor("processing")
369402
},
@@ -386,13 +419,28 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
386419
"phps" to { _ ->
387420
PhpExtractor()
388421
},
422+
"pkb" to { _ ->
423+
CommonExtractor("plsql")
424+
},
425+
"pks" to { _ ->
426+
CommonExtractor("plsql")
427+
},
389428
"pl" to { lines ->
390429
val buf = toBuf(lines)
391430
if (Regex("^[^#]*:-").matches(buf)) CommonExtractor("prolog")
392431
else if (Perl5Regex.matches(buf)) CommonExtractor("perl")
393432
else if (Perl6Regex.matches(buf)) CommonExtractor("perl6")
394433
else null
395434
},
435+
"plb" to { _ ->
436+
CommonExtractor("plsql")
437+
},
438+
"pls" to { _ ->
439+
CommonExtractor("plsql")
440+
},
441+
"plsql" to { _ ->
442+
CommonExtractor("plsql")
443+
},
396444
"pm" to { lines ->
397445
val buf = toBuf(lines)
398446
if (Perl5Regex.matches(buf)) CommonExtractor("perl")
@@ -406,6 +454,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
406454
"pp" to { _ ->
407455
CommonExtractor("puppet")
408456
},
457+
"prc" to { _ ->
458+
CommonExtractor("plsql")
459+
},
409460
"pro" to { lines ->
410461
val buf = toBuf(lines)
411462
if (Regex("^[^\\[#]+:-").matches(buf)) CommonExtractor("prolog")
@@ -497,8 +548,21 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
497548
"sh" to { _ ->
498549
CommonExtractor("shell")
499550
},
500-
"sql" to { _ ->
501-
CommonExtractor("sql")
551+
"spc" to { _ ->
552+
CommonExtractor("plsql")
553+
},
554+
"sql" to { lines ->
555+
val buf = toBuf(lines)
556+
if (Regex("^\\\\i\\b|AS \\$\\$|LANGUAGE '?plpgsql'?", RegexOption.IGNORE_CASE).containsMatchIn(buf) ||
557+
Regex("SECURITY (DEFINER|INVOKER)", RegexOption.IGNORE_CASE).containsMatchIn(buf) ||
558+
Regex("BEGIN( WORK| TRANSACTION)?;", RegexOption.IGNORE_CASE).containsMatchIn(buf))
559+
CommonExtractor("plpgsql") // Postgres
560+
else if (Regex("(alter module)|(language sql)|(begin( NOT)+ atomic)", RegexOption.IGNORE_CASE).containsMatchIn(buf) ||
561+
Regex("signal SQLSTATE '[0-9]+'", RegexOption.IGNORE_CASE).containsMatchIn(buf))
562+
CommonExtractor("sqlpl") // IBM db2
563+
else if (PLSQLRegexs.any { re -> re.containsMatchIn(buf)})
564+
CommonExtractor("plsql") // Oracle
565+
else CommonExtractor("sql") // Generic SQL
502566
},
503567
"ss" to { _ ->
504568
CommonExtractor("scheme")
@@ -509,12 +573,24 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
509573
"swift" to { _ ->
510574
SwiftExtractor()
511575
},
576+
"tab" to { _ ->
577+
CommonExtractor("sql")
578+
},
512579
"tcl" to { _ ->
513580
CommonExtractor("tcl")
514581
},
515582
"tex" to { _ ->
516583
CommonExtractor("tex")
517584
},
585+
"tpb" to { _ ->
586+
CommonExtractor("plsql")
587+
},
588+
"tps" to { _ ->
589+
CommonExtractor("plsql")
590+
},
591+
"trg" to { _ ->
592+
CommonExtractor("plsql")
593+
},
518594
"ts" to { lines ->
519595
if (Regex("<TS\\b").matches(toBuf(lines))) CommonExtractor("xml")
520596
else CommonExtractor("typescript")
@@ -527,6 +603,9 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
527603
CommonExtractor("xml")
528604
else null
529605
},
606+
"udf" to { _ ->
607+
CommonExtractor("sql")
608+
},
530609
"v" to { _ ->
531610
CommonExtractor("verilog")
532611
},
@@ -539,9 +618,15 @@ val Heuristics = mapOf<String, (List<String>) -> ExtractorInterface?>(
539618
"vim" to { _ ->
540619
CommonExtractor("viml")
541620
},
621+
"viw" to { _ ->
622+
CommonExtractor("sql")
623+
},
542624
"vue" to { _ ->
543625
CommonExtractor("vue")
544626
},
627+
"vw" to { _ ->
628+
CommonExtractor("plsql")
629+
},
545630
"xtend" to { _ ->
546631
CommonExtractor("xtend")
547632
}

src/test/kotlin/test/tests/extractors/IgnoredSamplesWildcards.kt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,6 @@ var ignoredSamplesWildcards = listOf(
263263
"*/P4/*",
264264
"*/PAWN/*",
265265
"*/PHP/*",
266-
"*/PLSQL/*",
267-
"*/PLpgSQL/*",
268266
"*/POV-Ray SDL/*",
269267
"*/Pan/*",
270268
"*/Papyrus/*",
@@ -324,8 +322,12 @@ var ignoredSamplesWildcards = listOf(
324322
"*/SMT/*",
325323
"*/SPARQL/*",
326324
"*/SQF/*",
327-
"*/SQL/*",
328-
"*/SQLPL/*",
325+
"*/SQL/hostcache_set_state.inc",
326+
"*/SQL/AvailableInSearchSel.prc",
327+
"*/SQLPL/check_reorg.sql",
328+
"*/SQLPL/runstats.sql",
329+
"*/SQLPL/sleep.sql",
330+
"*/SQLPL/create_stuff.sql",
329331
"*/SRecode Template/*",
330332
"*/STON/*",
331333
"*/Sage/*",

0 commit comments

Comments
 (0)