11import { browser } from "$app/environment" ;
22import posthog from "posthog-js" ;
3- import type { ShikiTransformer , SpecialLanguage } from "shiki" ;
3+ import type { LanguageRegistration , ShikiTransformer } from "shiki" ;
4+
5+ /**
6+ * Pre-load the languages by returning regular expressions from language
7+ * registrations.
8+ *
9+ * @param languages a set of languages and their associated registrations.
10+ * @returns a set of languages and their associated regular expressions to test code against.
11+ */
12+ export function loadLanguages (
13+ languages : Record < string , LanguageRegistration [ ] >
14+ ) : Record < string , string [ ] > {
15+ return Object . fromEntries (
16+ Object . entries ( languages ) . map ( ( [ language , registrations ] ) => {
17+ const regexps : string [ ] = [ ] ;
18+ for ( const registration of registrations ) {
19+ const patterns = registration . patterns ;
20+ const visitedIncludes = new Set < string > ( ) ;
21+ for ( const pattern of patterns ) {
22+ // Pattern with #include
23+ if ( pattern . include ) {
24+ if ( visitedIncludes . has ( pattern . include ) ) continue ;
25+ visitedIncludes . add ( pattern . include ) ;
26+ const repoValue = registration . repository [ pattern . include . slice ( 1 ) ] ;
27+ if ( repoValue ) {
28+ if ( repoValue . match ) regexps . push ( repoValue . match . toString ( ) ) ;
29+ if ( repoValue . begin ) regexps . push ( repoValue . begin . toString ( ) ) ;
30+ if ( repoValue . end ) regexps . push ( repoValue . end . toString ( ) ) ;
31+ if ( repoValue . patterns ) patterns . push ( ...repoValue . patterns ) ;
32+ }
33+ continue ;
34+ }
35+ // Custom pattern
36+ if ( pattern . match ) regexps . push ( pattern . match . toString ( ) ) ;
37+ if ( pattern . begin ) regexps . push ( pattern . begin . toString ( ) ) ;
38+ if ( pattern . end ) regexps . push ( pattern . end . toString ( ) ) ;
39+ }
40+ }
41+ return [ language , regexps ] ;
42+ } )
43+ ) ;
44+ }
445
546/**
647 * Detects the programming or markup language based on the given code snippet.
748 *
849 * @param code the code snippet to analyze and detect the language from.
50+ * @param languages the pre-loaded languages and their associated regexps.
951 * @returns The detected language as a string, or undefined if no language
1052 * could be determined.
1153 */
12- export function detectLanguage ( code : string ) : ( SpecialLanguage | ( string & { } ) ) | undefined {
13- const match = code
14- . split ( "\n" , 1 ) [ 0 ]
15- ?. trim ( )
16- ?. match ( / ^ (?: \/ \/ | # ) ? [ ^ ! ] + ?\. ( [ A - Z a - z \d ] { 1 , 10 } ) $ / ) ;
17- if ( match ) return match [ 1 ] ;
18-
19- const hasHTML = / < \/ [ a - z A - Z \d - ] + > / . test ( code ) ;
20- const hasJS = / ( l e t | v a r | c o n s t | = | \/ \/ ) / . test ( code ) ;
54+ export function detectLanguage (
55+ code : string ,
56+ languages : Record < string , string [ ] >
57+ ) : string | undefined {
58+ let languageCandidate : string | undefined = undefined ;
59+ let highestRate = 0 ;
60+ let highestTotal = 0 ;
2161
22- if ( hasHTML && hasJS ) return "svelte" ;
23- if ( hasHTML ) return "html" ;
24- if ( hasJS ) return / ( : [ A - Z ] | t y p e | i n t e r f a c e ) / . test ( code ) ? "ts" : "js" ;
25- if ( / [ a - z - ] + : \S + / . test ( code ) ) return "css" ;
62+ for ( const [ language , regexps ] of Object . entries ( languages ) ) {
63+ if ( ! regexps . length ) continue ;
64+ const matchesCount = regexps
65+ . map ( regexp => {
66+ try {
67+ return code . match ( regexp ) ?. length ?? 0 ;
68+ } catch {
69+ return 0 ;
70+ }
71+ } )
72+ . reduce ( ( acc , b ) => acc + b , 0 ) ;
73+ const successRate = matchesCount / regexps . length ;
74+ if (
75+ successRate > highestRate ||
76+ ( successRate === highestRate && regexps . length > highestTotal )
77+ ) {
78+ languageCandidate = language ;
79+ highestRate = successRate ;
80+ highestTotal = regexps . length ;
81+ }
82+ }
83+ return languageCandidate ;
2684}
2785
2886/**
@@ -39,32 +97,36 @@ export const transformerTrimCode: ShikiTransformer = {
3997 * in code blocks. Useful for handling code snippets with "diff" language and converting them
4098 * to a detected programming language.
4199 */
42- export const transformerLanguageDetection : ShikiTransformer = {
43- preprocess ( code , options ) {
44- if ( options . lang === "diff" ) {
45- const cleanedCode = code
46- . split ( "\n" )
47- . map ( line => line . replace ( / ^ [ + - ] / , "" ) )
48- . join ( "\n" ) ;
49- const detectedLanguage = detectLanguage ( cleanedCode ) ;
50- if ( ! detectedLanguage ) {
51- if ( browser )
52- posthog . captureException ( new Error ( "Failed to determine diff language" ) , {
53- code
54- } ) ;
55- return ;
100+ export function transformerLanguageDetection (
101+ languages : Record < string , string [ ] >
102+ ) : ShikiTransformer {
103+ return {
104+ preprocess ( code , options ) {
105+ if ( options . lang === "diff" ) {
106+ const cleanedCode = code
107+ . split ( "\n" )
108+ . map ( line => line . replace ( / ^ [ + - ] / , "" ) )
109+ . join ( "\n" ) ;
110+ const detectedLanguage = detectLanguage ( cleanedCode , languages ) ;
111+ if ( ! detectedLanguage ) {
112+ if ( browser )
113+ posthog . captureException ( new Error ( "Failed to determine diff language" ) , {
114+ code
115+ } ) ;
116+ return ;
117+ }
118+ options . lang = detectedLanguage ;
119+ return code ;
56120 }
57- options . lang = detectedLanguage ;
58- return code ;
121+ } ,
122+ pre ( node ) {
123+ node . properties [ "data-language" ] = this . options . lang
124+ . toLowerCase ( )
125+ . replace ( / ^ j s $ / , "javascript" )
126+ . replace ( / ^ t s $ / , "typescript" ) ;
59127 }
60- } ,
61- pre ( node ) {
62- node . properties [ "data-language" ] = this . options . lang
63- . toLowerCase ( )
64- . replace ( / ^ j s $ / , "javascript" )
65- . replace ( / ^ t s $ / , "typescript" ) ;
66- }
67- } ;
128+ } ;
129+ }
68130
69131/**
70132 * Replicate the behavior of Shiki's `transformerNotationDiff`,
0 commit comments