1
1
import { browser } from "$app/environment" ;
2
2
import posthog from "posthog-js" ;
3
- import type { ShikiTransformer , SpecialLanguage } from "shiki" ;
3
+ import type { LanguageRegistration , ShikiTransformer } from "shiki" ;
4
+
5
+ /**
6
+ * Pre-load the languages by returning regular expressions from language
7
+ * registrations.
8
+ *
9
+ * @param languages a set of languages and their associated registrations.
10
+ * @returns a set of languages and their associated regular expressions to test code against.
11
+ */
12
+ export function loadLanguages (
13
+ languages : Record < string , LanguageRegistration [ ] >
14
+ ) : Record < string , string [ ] > {
15
+ return Object . fromEntries (
16
+ Object . entries ( languages ) . map ( ( [ language , registrations ] ) => {
17
+ const regexps : string [ ] = [ ] ;
18
+ for ( const registration of registrations ) {
19
+ const patterns = registration . patterns ;
20
+ const visitedIncludes = new Set < string > ( ) ;
21
+ for ( const pattern of patterns ) {
22
+ // Pattern with #include
23
+ if ( pattern . include ) {
24
+ if ( visitedIncludes . has ( pattern . include ) ) continue ;
25
+ visitedIncludes . add ( pattern . include ) ;
26
+ const repoValue = registration . repository [ pattern . include . slice ( 1 ) ] ;
27
+ if ( repoValue ) {
28
+ if ( repoValue . match ) regexps . push ( repoValue . match . toString ( ) ) ;
29
+ if ( repoValue . begin ) regexps . push ( repoValue . begin . toString ( ) ) ;
30
+ if ( repoValue . end ) regexps . push ( repoValue . end . toString ( ) ) ;
31
+ if ( repoValue . patterns ) patterns . push ( ...repoValue . patterns ) ;
32
+ }
33
+ continue ;
34
+ }
35
+ // Custom pattern
36
+ if ( pattern . match ) regexps . push ( pattern . match . toString ( ) ) ;
37
+ if ( pattern . begin ) regexps . push ( pattern . begin . toString ( ) ) ;
38
+ if ( pattern . end ) regexps . push ( pattern . end . toString ( ) ) ;
39
+ }
40
+ }
41
+ return [ language , regexps ] ;
42
+ } )
43
+ ) ;
44
+ }
4
45
5
46
/**
6
47
* Detects the programming or markup language based on the given code snippet.
7
48
*
8
49
* @param code the code snippet to analyze and detect the language from.
50
+ * @param languages the pre-loaded languages and their associated regexps.
9
51
* @returns The detected language as a string, or undefined if no language
10
52
* could be determined.
11
53
*/
12
- export function detectLanguage ( code : string ) : ( SpecialLanguage | ( string & { } ) ) | undefined {
13
- const match = code
14
- . split ( "\n" , 1 ) [ 0 ]
15
- ?. trim ( )
16
- ?. match ( / ^ (?: \/ \/ | # ) ? [ ^ ! ] + ?\. ( [ A - Z a - z \d ] { 1 , 10 } ) $ / ) ;
17
- if ( match ) return match [ 1 ] ;
18
-
19
- const hasHTML = / < \/ [ a - z A - Z \d - ] + > / . test ( code ) ;
20
- const hasJS = / ( l e t | v a r | c o n s t | = | \/ \/ ) / . test ( code ) ;
54
+ export function detectLanguage (
55
+ code : string ,
56
+ languages : Record < string , string [ ] >
57
+ ) : string | undefined {
58
+ let languageCandidate : string | undefined = undefined ;
59
+ let highestRate = 0 ;
60
+ let highestTotal = 0 ;
21
61
22
- if ( hasHTML && hasJS ) return "svelte" ;
23
- if ( hasHTML ) return "html" ;
24
- if ( hasJS ) return / ( : [ A - Z ] | t y p e | i n t e r f a c e ) / . test ( code ) ? "ts" : "js" ;
25
- if ( / [ a - z - ] + : \S + / . test ( code ) ) return "css" ;
62
+ for ( const [ language , regexps ] of Object . entries ( languages ) ) {
63
+ if ( ! regexps . length ) continue ;
64
+ const matchesCount = regexps
65
+ . map ( regexp => {
66
+ try {
67
+ return code . match ( regexp ) ?. length ?? 0 ;
68
+ } catch {
69
+ return 0 ;
70
+ }
71
+ } )
72
+ . reduce ( ( acc , b ) => acc + b , 0 ) ;
73
+ const successRate = matchesCount / regexps . length ;
74
+ if (
75
+ successRate > highestRate ||
76
+ ( successRate === highestRate && regexps . length > highestTotal )
77
+ ) {
78
+ languageCandidate = language ;
79
+ highestRate = successRate ;
80
+ highestTotal = regexps . length ;
81
+ }
82
+ }
83
+ return languageCandidate ;
26
84
}
27
85
28
86
/**
@@ -39,32 +97,36 @@ export const transformerTrimCode: ShikiTransformer = {
39
97
* in code blocks. Useful for handling code snippets with "diff" language and converting them
40
98
* to a detected programming language.
41
99
*/
42
- export const transformerLanguageDetection : ShikiTransformer = {
43
- preprocess ( code , options ) {
44
- if ( options . lang === "diff" ) {
45
- const cleanedCode = code
46
- . split ( "\n" )
47
- . map ( line => line . replace ( / ^ [ + - ] / , "" ) )
48
- . join ( "\n" ) ;
49
- const detectedLanguage = detectLanguage ( cleanedCode ) ;
50
- if ( ! detectedLanguage ) {
51
- if ( browser )
52
- posthog . captureException ( new Error ( "Failed to determine diff language" ) , {
53
- code
54
- } ) ;
55
- return ;
100
+ export function transformerLanguageDetection (
101
+ languages : Record < string , string [ ] >
102
+ ) : ShikiTransformer {
103
+ return {
104
+ preprocess ( code , options ) {
105
+ if ( options . lang === "diff" ) {
106
+ const cleanedCode = code
107
+ . split ( "\n" )
108
+ . map ( line => line . replace ( / ^ [ + - ] / , "" ) )
109
+ . join ( "\n" ) ;
110
+ const detectedLanguage = detectLanguage ( cleanedCode , languages ) ;
111
+ if ( ! detectedLanguage ) {
112
+ if ( browser )
113
+ posthog . captureException ( new Error ( "Failed to determine diff language" ) , {
114
+ code
115
+ } ) ;
116
+ return ;
117
+ }
118
+ options . lang = detectedLanguage ;
119
+ return code ;
56
120
}
57
- options . lang = detectedLanguage ;
58
- return code ;
121
+ } ,
122
+ pre ( node ) {
123
+ node . properties [ "data-language" ] = this . options . lang
124
+ . toLowerCase ( )
125
+ . replace ( / ^ j s $ / , "javascript" )
126
+ . replace ( / ^ t s $ / , "typescript" ) ;
59
127
}
60
- } ,
61
- pre ( node ) {
62
- node . properties [ "data-language" ] = this . options . lang
63
- . toLowerCase ( )
64
- . replace ( / ^ j s $ / , "javascript" )
65
- . replace ( / ^ t s $ / , "typescript" ) ;
66
- }
67
- } ;
128
+ } ;
129
+ }
68
130
69
131
/**
70
132
* Replicate the behavior of Shiki's `transformerNotationDiff`,
0 commit comments