Skip to content

Commit bb2d4a8

Browse files
authored
Merge branch 'main' into feature/repo-keyboard-shortcuts
2 parents bd469f0 + 89bfddc commit bb2d4a8

File tree

10 files changed

+144
-137
lines changed

10 files changed

+144
-137
lines changed

custom/conf/app.example.ini

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,8 +2485,9 @@ LEVEL = Info
24852485
;[highlight.mapping]
24862486
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24872487
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2488-
;; Extension mapping to highlight class
2489-
;; e.g. .toml=ini
2488+
;; Extension mapping to highlight class, for example:
2489+
;; .toml = ini
2490+
;; .my-js = JavaScript
24902491

24912492
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24922493
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

modules/analyze/code_language.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
package analyze
55

66
import (
7-
"path/filepath"
7+
"path"
88

99
"github.com/go-enry/go-enry/v2"
1010
)
1111

1212
// GetCodeLanguage detects code language based on file name and content
13+
// It can be slow when the content is used for detection
1314
func GetCodeLanguage(filename string, content []byte) string {
1415
if language, ok := enry.GetLanguageByExtension(filename); ok {
1516
return language
@@ -23,5 +24,5 @@ func GetCodeLanguage(filename string, content []byte) string {
2324
return enry.OtherLanguage
2425
}
2526

26-
return enry.GetLanguage(filepath.Base(filename), content)
27+
return enry.GetLanguage(path.Base(filename), content)
2728
}

modules/highlight/highlight.go

Lines changed: 87 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"html/template"
1313
"io"
1414
"path"
15-
"path/filepath"
1615
"strings"
1716
"sync"
1817

@@ -25,35 +24,32 @@ import (
2524
"github.com/alecthomas/chroma/v2/formatters/html"
2625
"github.com/alecthomas/chroma/v2/lexers"
2726
"github.com/alecthomas/chroma/v2/styles"
28-
lru "github.com/hashicorp/golang-lru/v2"
27+
"github.com/go-enry/go-enry/v2"
2928
)
3029

3130
// don't index files larger than this many bytes for performance purposes
3231
const sizeLimit = 1024 * 1024
3332

34-
var (
35-
// For custom user mapping
36-
highlightMapping = map[string]string{}
37-
38-
once sync.Once
39-
40-
cache *lru.TwoQueueCache[string, any]
33+
type globalVarsType struct {
34+
highlightMapping map[string]string
35+
githubStyles *chroma.Style
36+
}
4137

42-
githubStyles = styles.Get("github")
38+
var (
39+
globalVarsMu sync.Mutex
40+
globalVarsPtr *globalVarsType
4341
)
4442

45-
// NewContext loads custom highlight map from local config
46-
func NewContext() {
47-
once.Do(func() {
48-
highlightMapping = setting.GetHighlightMapping()
49-
50-
// The size 512 is simply a conservative rule of thumb
51-
c, err := lru.New2Q[string, any](512)
52-
if err != nil {
53-
panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
54-
}
55-
cache = c
56-
})
43+
func globalVars() *globalVarsType {
44+
// in the future, the globalVars might need to be re-initialized when settings change, so don't use sync.Once here
45+
globalVarsMu.Lock()
46+
defer globalVarsMu.Unlock()
47+
if globalVarsPtr == nil {
48+
globalVarsPtr = &globalVarsType{}
49+
globalVarsPtr.githubStyles = styles.Get("github")
50+
globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
51+
}
52+
return globalVarsPtr
5753
}
5854

5955
// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
@@ -88,59 +84,85 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
8884
}
8985
}
9086

91-
// Code returns an HTML version of code string with chroma syntax highlighting classes and the matched lexer name
92-
func Code(fileName, language, code string) (output template.HTML, lexerName string) {
93-
NewContext()
94-
95-
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
96-
// preserve literal newline in blame view
97-
if code == "" || code == "\n" {
98-
return "\n", ""
87+
func getChromaLexerByLanguage(fileName, lang string) chroma.Lexer {
88+
lang, _, _ = strings.Cut(lang, "?") // maybe, the value from gitattributes might contain `?` parameters?
89+
ext := path.Ext(fileName)
90+
// the "lang" might come from enry, it has different naming for some languages
91+
switch lang {
92+
case "F#":
93+
lang = "FSharp"
94+
case "Pascal":
95+
lang = "ObjectPascal"
96+
case "C":
97+
if ext == ".C" || ext == ".H" {
98+
lang = "C++"
99+
}
99100
}
101+
// lexers.Get is slow if the language name can't be matched directly: it does extra "Match" call to iterate all lexers
102+
return lexers.Get(lang)
103+
}
100104

101-
if len(code) > sizeLimit {
102-
return template.HTML(template.HTMLEscapeString(code)), ""
105+
// GetChromaLexerWithFallback returns a chroma lexer by given file name, language and code content. All parameters can be optional.
106+
// When code content is provided, it will be slow if no lexer is found by file name or language.
107+
// If no lexer is found, it will return the fallback lexer.
108+
func GetChromaLexerWithFallback(fileName, lang string, code []byte) (lexer chroma.Lexer) {
109+
if lang != "" {
110+
lexer = getChromaLexerByLanguage(fileName, lang)
103111
}
104112

105-
var lexer chroma.Lexer
106-
107-
if len(language) > 0 {
108-
lexer = lexers.Get(language)
113+
if lexer == nil {
114+
fileExt := path.Ext(fileName)
115+
if val, ok := globalVars().highlightMapping[fileExt]; ok {
116+
lexer = getChromaLexerByLanguage(fileName, val) // use mapped value to find lexer
117+
}
118+
}
109119

120+
if lexer == nil {
121+
// when using "code" to detect, analyze.GetCodeLanguage is slower, it iterates many rules to detect language from content
122+
// this is the old logic: use enry to detect language, and use chroma to render, but their naming is different for some languages
123+
enryLanguage := analyze.GetCodeLanguage(fileName, code)
124+
lexer = getChromaLexerByLanguage(fileName, enryLanguage)
110125
if lexer == nil {
111-
// Attempt stripping off the '?'
112-
if before, _, ok := strings.Cut(language, "?"); ok {
113-
lexer = lexers.Get(before)
126+
if enryLanguage != enry.OtherLanguage {
127+
log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", enryLanguage, fileName)
114128
}
129+
lexer = lexers.Match(fileName) // lexers.Match will search by its basename and extname
115130
}
116131
}
117132

118-
if lexer == nil {
119-
if val, ok := highlightMapping[path.Ext(fileName)]; ok {
120-
// use mapped value to find lexer
121-
lexer = lexers.Get(val)
122-
}
133+
return util.IfZero(lexer, lexers.Fallback)
134+
}
135+
136+
func renderCode(fileName, language, code string, slowGuess bool) (output template.HTML, lexerName string) {
137+
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
138+
// preserve literal newline in blame view
139+
if code == "" || code == "\n" {
140+
return "\n", ""
123141
}
124142

125-
if lexer == nil {
126-
if l, ok := cache.Get(fileName); ok {
127-
lexer = l.(chroma.Lexer)
128-
}
143+
if len(code) > sizeLimit {
144+
return template.HTML(template.HTMLEscapeString(code)), ""
129145
}
130146

131-
if lexer == nil {
132-
lexer = lexers.Match(fileName)
133-
if lexer == nil {
134-
lexer = lexers.Fallback
135-
}
136-
cache.Add(fileName, lexer)
147+
var codeForGuessLexer []byte
148+
if slowGuess {
149+
// it is slower to guess lexer by code content, so only do it when necessary
150+
codeForGuessLexer = util.UnsafeStringToBytes(code)
137151
}
152+
lexer := GetChromaLexerWithFallback(fileName, language, codeForGuessLexer)
153+
return RenderCodeByLexer(lexer, code), formatLexerName(lexer.Config().Name)
154+
}
138155

139-
return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
156+
func RenderCodeFast(fileName, language, code string) (output template.HTML, lexerName string) {
157+
return renderCode(fileName, language, code, false)
140158
}
141159

142-
// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
143-
func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
160+
func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexerName string) {
161+
return renderCode(fileName, language, code, true)
162+
}
163+
164+
// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
165+
func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
144166
formatter := html.New(html.WithClasses(true),
145167
html.WithLineNumbers(false),
146168
html.PreventSurroundingPre(true),
@@ -155,7 +177,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
155177
return template.HTML(template.HTMLEscapeString(code))
156178
}
157179
// style not used for live site but need to pass something
158-
err = formatter.Format(htmlw, githubStyles, iterator)
180+
err = formatter.Format(htmlw, globalVars().githubStyles, iterator)
159181
if err != nil {
160182
log.Error("Can't format code: %v", err)
161183
return template.HTML(template.HTMLEscapeString(code))
@@ -167,44 +189,18 @@ func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
167189
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
168190
}
169191

170-
// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
171-
func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
172-
NewContext()
173-
192+
// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
193+
func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, string, error) {
174194
if len(code) > sizeLimit {
175-
return PlainText(code), "", nil
195+
return RenderPlainText(code), "", nil
176196
}
177197

178198
formatter := html.New(html.WithClasses(true),
179199
html.WithLineNumbers(false),
180200
html.PreventSurroundingPre(true),
181201
)
182202

183-
var lexer chroma.Lexer
184-
185-
// provided language overrides everything
186-
if language != "" {
187-
lexer = lexers.Get(language)
188-
}
189-
190-
if lexer == nil {
191-
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
192-
lexer = lexers.Get(val)
193-
}
194-
}
195-
196-
if lexer == nil {
197-
guessLanguage := analyze.GetCodeLanguage(fileName, code)
198-
199-
lexer = lexers.Get(guessLanguage)
200-
if lexer == nil {
201-
lexer = lexers.Match(fileName)
202-
if lexer == nil {
203-
lexer = lexers.Fallback
204-
}
205-
}
206-
}
207-
203+
lexer := GetChromaLexerWithFallback(fileName, language, code)
208204
lexerName := formatLexerName(lexer.Config().Name)
209205

210206
iterator, err := lexer.Tokenise(nil, string(code))
@@ -218,7 +214,7 @@ func File(fileName, language string, code []byte) ([]template.HTML, string, erro
218214
lines := make([]template.HTML, 0, len(tokensLines))
219215
for _, tokens := range tokensLines {
220216
iterator = chroma.Literator(tokens...)
221-
err = formatter.Format(htmlBuf, githubStyles, iterator)
217+
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
222218
if err != nil {
223219
return nil, "", fmt.Errorf("can't format code: %w", err)
224220
}
@@ -229,8 +225,8 @@ func File(fileName, language string, code []byte) ([]template.HTML, string, erro
229225
return lines, lexerName, nil
230226
}
231227

232-
// PlainText returns non-highlighted HTML for code
233-
func PlainText(code []byte) []template.HTML {
228+
// RenderPlainText returns non-highlighted HTML for code
229+
func RenderPlainText(code []byte) []template.HTML {
234230
r := bufio.NewReader(bytes.NewReader(code))
235231
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
236232
for {

modules/highlight/highlight_test.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ c=2
112112

113113
for _, tt := range tests {
114114
t.Run(tt.name, func(t *testing.T) {
115-
out, lexerName, err := File(tt.name, "", []byte(tt.code))
115+
out, lexerName, err := RenderFullFile(tt.name, "", []byte(tt.code))
116116
assert.NoError(t, err)
117117
assert.Equal(t, tt.want, out)
118118
assert.Equal(t, tt.lexerName, lexerName)
@@ -176,7 +176,7 @@ c=2`),
176176

177177
for _, tt := range tests {
178178
t.Run(tt.name, func(t *testing.T) {
179-
out := PlainText([]byte(tt.code))
179+
out := RenderPlainText([]byte(tt.code))
180180
assert.Equal(t, tt.want, out)
181181
})
182182
}
@@ -199,3 +199,36 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
199199
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
200200
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
201201
}
202+
203+
func TestGetChromaLexer(t *testing.T) {
204+
globalVars().highlightMapping[".my-html"] = "HTML"
205+
t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
206+
207+
cases := []struct {
208+
fileName string
209+
language string
210+
content string
211+
expected string
212+
}{
213+
{"test.py", "", "", "Python"},
214+
215+
{"any-file", "javascript", "", "JavaScript"},
216+
{"any-file", "", "/* vim: set filetype=python */", "Python"},
217+
{"any-file", "", "", "fallback"},
218+
219+
{"test.fs", "", "", "Forth"},
220+
{"test.fs", "F#", "", "FSharp"},
221+
{"test.fs", "", "let x = 1", "FSharp"},
222+
223+
{"test.c", "", "", "C"},
224+
{"test.C", "", "", "C++"},
225+
{"OLD-CODE.PAS", "", "", "ObjectPascal"},
226+
{"test.my-html", "", "", "HTML"},
227+
}
228+
for _, c := range cases {
229+
lexer := GetChromaLexerWithFallback(c.fileName, c.language, []byte(c.content))
230+
if assert.NotNil(t, lexer, "case: %+v", c) {
231+
assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
232+
}
233+
}
234+
}

modules/indexer/code/search.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
7272

7373
func HighlightSearchResultCode(filename, language string, lineNums []int, code string) []*ResultLine {
7474
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
75-
hl, _ := highlight.Code(filename, language, code)
75+
hl, _ := highlight.RenderCodeFast(filename, language, code)
7676
highlightedLines := strings.Split(string(hl), "\n")
7777

78-
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
78+
// The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n`
7979
lines := make([]*ResultLine, min(len(highlightedLines), len(lineNums)))
8080
for i := range lines {
8181
lines[i] = &ResultLine{

0 commit comments

Comments
 (0)