Skip to content

Commit dbfe348

Browse files
feat(details): make syntax highlighting leverage Shiki
It is now MUCH more reliable, but marginally slower, even though the perf/speed ratio is good enough
1 parent f567f7c commit dbfe348

File tree

2 files changed

+117
-39
lines changed

2 files changed

+117
-39
lines changed

src/routes/[pid=pid]/[org]/[repo]/[id=number]/PageRenderer.svelte

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,24 @@
1111
import githubLight from "@shikijs/themes/github-light-default";
1212
import { createHighlighterCoreSync } from "shiki";
1313
import { createJavaScriptRegexEngine } from "shiki/engine/javascript";
14+
import { loadLanguages } from "./syntax-highlighting";
1415
1516
const highlighter = createHighlighterCoreSync({
1617
langs: [svelte, typescript, javascript, html, css, json, shell, diff],
1718
themes: [githubLight, githubDark],
1819
engine: createJavaScriptRegexEngine()
1920
});
21+
22+
const loadedLanguages = loadLanguages({
23+
svelte,
24+
typescript,
25+
javascript,
26+
html,
27+
css,
28+
json,
29+
shell,
30+
diff
31+
});
2032
</script>
2133

2234
<script lang="ts">
@@ -72,7 +84,11 @@
7284
highlighter,
7385
{
7486
themes: { light: "github-light-default", dark: "github-dark-default" },
75-
transformers: [transformerTrimCode, transformerLanguageDetection, transformerDiffMarking]
87+
transformers: [
88+
transformerTrimCode,
89+
transformerLanguageDetection(loadedLanguages),
90+
transformerDiffMarking
91+
]
7692
} satisfies Parameters<typeof rehypeShikiFromHighlighter>[1]
7793
]
7894
};

src/routes/[pid=pid]/[org]/[repo]/[id=number]/syntax-highlighting.ts

Lines changed: 100 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,86 @@
11
import { browser } from "$app/environment";
22
import posthog from "posthog-js";
3-
import type { ShikiTransformer, SpecialLanguage } from "shiki";
3+
import type { LanguageRegistration, ShikiTransformer } from "shiki";
4+
5+
/**
6+
* Pre-load the languages by returning regular expressions from language
7+
* registrations.
8+
*
9+
* @param languages a set of languages and their associated registrations.
10+
* @returns a set of languages and their associated regular expressions to test code against.
11+
*/
12+
export function loadLanguages(
13+
languages: Record<string, LanguageRegistration[]>
14+
): Record<string, string[]> {
15+
return Object.fromEntries(
16+
Object.entries(languages).map(([language, registrations]) => {
17+
const regexps: string[] = [];
18+
for (const registration of registrations) {
19+
const patterns = registration.patterns;
20+
const visitedIncludes = new Set<string>();
21+
for (const pattern of patterns) {
22+
// Pattern with #include
23+
if (pattern.include) {
24+
if (visitedIncludes.has(pattern.include)) continue;
25+
visitedIncludes.add(pattern.include);
26+
const repoValue = registration.repository[pattern.include.slice(1)];
27+
if (repoValue) {
28+
if (repoValue.match) regexps.push(repoValue.match.toString());
29+
if (repoValue.begin) regexps.push(repoValue.begin.toString());
30+
if (repoValue.end) regexps.push(repoValue.end.toString());
31+
if (repoValue.patterns) patterns.push(...repoValue.patterns);
32+
}
33+
continue;
34+
}
35+
// Custom pattern
36+
if (pattern.match) regexps.push(pattern.match.toString());
37+
if (pattern.begin) regexps.push(pattern.begin.toString());
38+
if (pattern.end) regexps.push(pattern.end.toString());
39+
}
40+
}
41+
return [language, regexps];
42+
})
43+
);
44+
}
445

546
/**
647
* Detects the programming or markup language based on the given code snippet.
748
*
849
* @param code the code snippet to analyze and detect the language from.
50+
* @param languages the pre-loaded languages and their associated regexps.
951
* @returns The detected language as a string, or undefined if no language
1052
* could be determined.
1153
*/
12-
export function detectLanguage(code: string): (SpecialLanguage | (string & {})) | undefined {
13-
const match = code
14-
.split("\n", 1)[0]
15-
?.trim()
16-
?.match(/^(?:\/\/|#) ?[^ !]+?\.([A-Za-z\d]{1,10})$/);
17-
if (match) return match[1];
18-
19-
const hasHTML = /<\/[a-zA-Z\d-]+>/.test(code);
20-
const hasJS = / (let|var|const|=|\/\/) /.test(code);
54+
export function detectLanguage(
55+
code: string,
56+
languages: Record<string, string[]>
57+
): string | undefined {
58+
let languageCandidate: string | undefined = undefined;
59+
let highestRate = 0;
60+
let highestTotal = 0;
2161

22-
if (hasHTML && hasJS) return "svelte";
23-
if (hasHTML) return "html";
24-
if (hasJS) return /(: [A-Z]|type |interface )/.test(code) ? "ts" : "js";
25-
if (/[a-z-]+: \S+/.test(code)) return "css";
62+
for (const [language, regexps] of Object.entries(languages)) {
63+
if (!regexps.length) continue;
64+
const matchesCount = regexps
65+
.map(regexp => {
66+
try {
67+
return code.match(regexp)?.length ?? 0;
68+
} catch {
69+
return 0;
70+
}
71+
})
72+
.reduce((acc, b) => acc + b, 0);
73+
const successRate = matchesCount / regexps.length;
74+
if (
75+
successRate > highestRate ||
76+
(successRate === highestRate && regexps.length > highestTotal)
77+
) {
78+
languageCandidate = language;
79+
highestRate = successRate;
80+
highestTotal = regexps.length;
81+
}
82+
}
83+
return languageCandidate;
2684
}
2785

2886
/**
@@ -39,32 +97,36 @@ export const transformerTrimCode: ShikiTransformer = {
3997
* in code blocks. Useful for handling code snippets with "diff" language and converting them
4098
* to a detected programming language.
4199
*/
42-
export const transformerLanguageDetection: ShikiTransformer = {
43-
preprocess(code, options) {
44-
if (options.lang === "diff") {
45-
const cleanedCode = code
46-
.split("\n")
47-
.map(line => line.replace(/^[+-]/, ""))
48-
.join("\n");
49-
const detectedLanguage = detectLanguage(cleanedCode);
50-
if (!detectedLanguage) {
51-
if (browser)
52-
posthog.captureException(new Error("Failed to determine diff language"), {
53-
code
54-
});
55-
return;
100+
export function transformerLanguageDetection(
101+
languages: Record<string, string[]>
102+
): ShikiTransformer {
103+
return {
104+
preprocess(code, options) {
105+
if (options.lang === "diff") {
106+
const cleanedCode = code
107+
.split("\n")
108+
.map(line => line.replace(/^[+-]/, ""))
109+
.join("\n");
110+
const detectedLanguage = detectLanguage(cleanedCode, languages);
111+
if (!detectedLanguage) {
112+
if (browser)
113+
posthog.captureException(new Error("Failed to determine diff language"), {
114+
code
115+
});
116+
return;
117+
}
118+
options.lang = detectedLanguage;
119+
return code;
56120
}
57-
options.lang = detectedLanguage;
58-
return code;
121+
},
122+
pre(node) {
123+
node.properties["data-language"] = this.options.lang
124+
.toLowerCase()
125+
.replace(/^js$/, "javascript")
126+
.replace(/^ts$/, "typescript");
59127
}
60-
},
61-
pre(node) {
62-
node.properties["data-language"] = this.options.lang
63-
.toLowerCase()
64-
.replace(/^js$/, "javascript")
65-
.replace(/^ts$/, "typescript");
66-
}
67-
};
128+
};
129+
}
68130

69131
/**
70132
* Replicate the behavior of Shiki's `transformerNotationDiff`,

0 commit comments

Comments
 (0)