Skip to content

Commit 687a9fc

Browse files
committed
feat: add concurrency again
reverted the markdown parsing changes for supporting plain/unformatted URLs as it was breaking parsing in other cases, leading to fewer results. will have another crack at it.
1 parent ce302f9 commit 687a9fc

File tree

6 files changed

+195
-204
lines changed

6 files changed

+195
-204
lines changed

internal/search/search.go

Lines changed: 159 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,6 @@ func Search(query string, sourceName string, s settings.Settings) ([]Result, err
234234
}
235235
}
236236

237-
var allResults []Result
238-
var mu sync.Mutex
239-
240237
// Initialize goldmark with GitHub Flavored Markdown
241238
md := goldmark.New(
242239
goldmark.WithExtensions(extension.GFM),
@@ -245,151 +242,191 @@ func Search(query string, sourceName string, s settings.Settings) ([]Result, err
245242
),
246243
)
247244

248-
// Search in each enabled source
245+
// Create a channel for collecting results
246+
resultChan := make(chan []Result, len(enabledSources))
247+
var wg sync.WaitGroup
248+
249+
// Determine number of workers (ensure at least 1)
250+
numWorkers := s.SearchConcurrency
251+
if numWorkers < 1 {
252+
numWorkers = 1
253+
log.Info("Using default concurency", "concurency", numWorkers)
254+
} else {
255+
log.Info("Using configured concurency", "concurency", numWorkers)
256+
}
257+
258+
// Create a semaphore to limit concurrent goroutines
259+
semaphore := make(chan struct{}, numWorkers)
260+
261+
// Process each source
249262
for _, source := range enabledSources {
250-
sourcePath := source.Path
251-
log.Info("Searching in source", "name", source.Name, "path", sourcePath)
263+
wg.Add(1)
264+
semaphore <- struct{}{} // Acquire semaphore
252265

253-
// Walk through all markdown files in the source
254-
err := filepath.Walk(sourcePath, func(path string, info os.FileInfo, err error) error {
255-
if err != nil {
256-
log.Error("Error accessing path", "path", path, "error", err)
257-
return nil // Skip this file but continue walking
258-
}
266+
go func(src sources.Source) {
267+
defer wg.Done()
268+
defer func() { <-semaphore }() // Release semaphore
259269

260-
// Skip directories and non-markdown files
261-
if info.IsDir() || !strings.HasSuffix(path, ".md") {
262-
return nil
263-
}
270+
sourcePath := src.Path
271+
log.Info("Searching in source", "name", src.Name, "path", sourcePath)
264272

265-
// Skip non-content files
266-
if !isContentFile(path) {
267-
log.Debug("Skipping non-content file", "path", path)
268-
return nil
269-
}
273+
var sourceResults []Result
274+
var sourceMu sync.Mutex
270275

271-
log.Debug("Processing markdown file", "path", path)
272-
content, err := os.ReadFile(path)
273-
if err != nil {
274-
log.Error("Error reading file", "path", path, "error", err)
275-
return nil
276-
}
276+
// Walk through all markdown files in the source
277+
err := filepath.Walk(sourcePath, func(path string, info os.FileInfo, err error) error {
278+
if err != nil {
279+
log.Error("Error accessing path", "path", path, "error", err)
280+
return nil // Skip this file but continue walking
281+
}
277282

278-
// Parse the markdown content
279-
doc := md.Parser().Parse(text.NewReader(content))
280-
281-
// Track headings by level
282-
headings := make(map[int]string)
283-
var currentLevel int
284-
var currentContext string
285-
var contextNode ast.Node
286-
var insideHeading bool
287-
288-
// Walk through the AST
289-
ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
290-
if !entering {
291-
if n == contextNode {
292-
currentContext = ""
293-
contextNode = nil
294-
}
295-
if _, ok := n.(*ast.Heading); ok {
296-
insideHeading = false
297-
}
298-
return ast.WalkContinue, nil
283+
// Skip directories and non-markdown files
284+
if info.IsDir() || !strings.HasSuffix(path, ".md") {
285+
return nil
299286
}
300287

301-
switch v := n.(type) {
302-
case *ast.Heading:
303-
headingText := getNodeText(v, content)
304-
cleanHeading := common.CleanCategory(headingText)
305-
currentLevel = v.Level
306-
headings[currentLevel] = cleanHeading
307-
currentContext = headingText
308-
contextNode = v
309-
insideHeading = true
310-
311-
case *ast.Paragraph, *ast.ListItem:
312-
currentContext = getNodeText(v, content)
313-
contextNode = v
314-
315-
case *ast.Link:
316-
// Get the link destination
317-
destination := v.Destination
318-
if len(destination) == 0 {
319-
return ast.WalkContinue, nil
320-
}
321-
url := string(destination)
288+
// Skip non-content files
289+
if !isContentFile(path) {
290+
log.Debug("Skipping non-content file", "path", path)
291+
return nil
292+
}
322293

323-
// Get link text
324-
linkText := getNodeText(v, content)
325-
if linkText == "" {
326-
linkText = url
327-
}
294+
log.Debug("Processing markdown file", "path", path)
295+
content, err := os.ReadFile(path)
296+
if err != nil {
297+
log.Error("Error reading file", "path", path, "error", err)
298+
return nil
299+
}
328300

329-
// Skip single-character link texts
330-
if len(strings.TrimSpace(linkText)) <= 1 {
301+
// Parse the markdown content
302+
doc := md.Parser().Parse(text.NewReader(content))
303+
304+
// Track headings by level
305+
headings := make(map[int]string)
306+
var currentLevel int
307+
var currentContext string
308+
var contextNode ast.Node
309+
var insideHeading bool
310+
311+
// Walk through the AST
312+
ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
313+
if !entering {
314+
if n == contextNode {
315+
currentContext = ""
316+
contextNode = nil
317+
}
318+
if _, ok := n.(*ast.Heading); ok {
319+
insideHeading = false
320+
}
331321
return ast.WalkContinue, nil
332322
}
333323

334-
// Use the current context as description
335-
var description string
336-
if currentContext != "" {
337-
description = common.CleanDescription(currentContext)
338-
} else {
339-
description = linkText
340-
}
324+
switch v := n.(type) {
325+
case *ast.Heading:
326+
headingText := getNodeText(v, content)
327+
cleanHeading := common.CleanCategory(headingText)
328+
currentLevel = v.Level
329+
headings[currentLevel] = cleanHeading
330+
currentContext = headingText
331+
contextNode = v
332+
insideHeading = true
333+
334+
case *ast.Paragraph, *ast.ListItem:
335+
currentContext = getNodeText(v, content)
336+
contextNode = v
337+
338+
case *ast.Link:
339+
// Get the link destination
340+
destination := v.Destination
341+
if len(destination) == 0 {
342+
return ast.WalkContinue, nil
343+
}
344+
url := string(destination)
341345

342-
// Search in both description and link text
343-
matches := fuzzy.Find(query, []string{description, linkText})
344-
if len(matches) > 0 && matches[0].Score >= s.MinFuzzyScore {
345-
if isLocalURL(url) {
346+
// Get link text
347+
linkText := getNodeText(v, content)
348+
if linkText == "" {
349+
linkText = url
350+
}
351+
352+
// Skip single-character link texts
353+
if len(strings.TrimSpace(linkText)) <= 1 {
346354
return ast.WalkContinue, nil
347355
}
348356

349-
// Find the nearest parent heading
350-
category := "n/a"
351-
// If we're inside a heading, look for the parent heading
352-
if insideHeading {
353-
for level := currentLevel - 1; level >= 1; level-- {
354-
if parent, ok := headings[level]; ok {
355-
category = common.CleanCategory(parent)
356-
break
357-
}
358-
}
357+
// Use the current context as description
358+
var description string
359+
if currentContext != "" {
360+
description = common.CleanDescription(currentContext)
359361
} else {
360-
// Otherwise, look for the nearest heading
361-
for level := currentLevel; level >= 1; level-- {
362-
if parent, ok := headings[level]; ok {
363-
category = common.CleanCategory(parent)
364-
break
362+
description = linkText
363+
}
364+
365+
// Search in both description and link text
366+
matches := fuzzy.Find(query, []string{description, linkText})
367+
if len(matches) > 0 && matches[0].Score >= s.MinFuzzyScore {
368+
if isLocalURL(url) {
369+
return ast.WalkContinue, nil
370+
}
371+
372+
// Find the nearest parent heading
373+
category := "n/a"
374+
// If we're inside a heading, look for the parent heading
375+
if insideHeading {
376+
for level := currentLevel - 1; level >= 1; level-- {
377+
if parent, ok := headings[level]; ok {
378+
category = common.CleanCategory(parent)
379+
break
380+
}
381+
}
382+
} else {
383+
// Otherwise, look for the nearest heading
384+
for level := currentLevel; level >= 1; level-- {
385+
if parent, ok := headings[level]; ok {
386+
category = common.CleanCategory(parent)
387+
break
388+
}
365389
}
366390
}
367-
}
368391

369-
mu.Lock()
370-
allResults = append(allResults, Result{
371-
URL: url,
372-
Name: linkText,
373-
Description: description,
374-
Line: description,
375-
Score: matches[0].Score,
376-
Category: category,
377-
Source: source.Name,
378-
})
379-
mu.Unlock()
392+
sourceMu.Lock()
393+
sourceResults = append(sourceResults, Result{
394+
URL: url,
395+
Name: linkText,
396+
Description: description,
397+
Line: description,
398+
Score: matches[0].Score,
399+
Category: category,
400+
Source: src.Name,
401+
})
402+
sourceMu.Unlock()
403+
}
380404
}
381-
}
382405

383-
return ast.WalkContinue, nil
406+
return ast.WalkContinue, nil
407+
})
408+
409+
return nil
384410
})
385411

386-
return nil
387-
})
412+
if err != nil {
413+
log.Error("Error walking source", "source", src.Name, "error", err)
414+
}
415+
416+
resultChan <- sourceResults
417+
}(source)
418+
}
419+
420+
// Wait for all goroutines to complete
421+
go func() {
422+
wg.Wait()
423+
close(resultChan)
424+
}()
388425

389-
if err != nil {
390-
log.Error("Error walking source", "source", source.Name, "error", err)
391-
continue
392-
}
426+
// Collect results from all sources
427+
var allResults []Result
428+
for results := range resultChan {
429+
allResults = append(allResults, results...)
393430
}
394431

395432
// Sort results by score

internal/search/search_test.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,6 @@ func TestSearch(t *testing.T) {
6868
query: "advanced web",
6969
expected: []string{"https://advancedweb.hu/"},
7070
},
71-
{
72-
name: "find plain URL under triple heading",
73-
query: "bad company",
74-
expected: []string{"https://badcompany.com/"},
75-
},
7671
{
7772
name: "find multiple links under different triple headings",
7873
query: "company",

internal/settings/settings.go

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,19 @@ import (
1616

1717
// Settings represents the user settings
1818
type Settings struct {
19-
MinQueryLength int `json:"minQueryLength"`
20-
MaxQueryLength int `json:"maxQueryLength"`
21-
SearchDelay int `json:"searchDelay"`
22-
ShowScores bool `json:"showScores"`
23-
ResultsPerPage int `json:"resultsPerPage"`
24-
CacheDir string `json:"cache_dir"`
25-
AutoUpdate bool `json:"auto_update"`
26-
TruncateTitles bool `json:"truncateTitles"`
27-
MaxTitleLength int `json:"maxTitleLength"`
28-
CustomHeader string `json:"customHeader"`
29-
MinFuzzyScore int `json:"minFuzzyScore"`
30-
Sources []sources.Source `json:"sources"`
19+
MinQueryLength int `json:"minQueryLength"`
20+
MaxQueryLength int `json:"maxQueryLength"`
21+
SearchDelay int `json:"searchDelay"`
22+
ShowScores bool `json:"showScores"`
23+
ResultsPerPage int `json:"resultsPerPage"`
24+
CacheDir string `json:"cache_dir"`
25+
AutoUpdate bool `json:"auto_update"`
26+
TruncateTitles bool `json:"truncateTitles"`
27+
MaxTitleLength int `json:"maxTitleLength"`
28+
CustomHeader string `json:"customHeader"`
29+
MinFuzzyScore int `json:"minFuzzyScore"`
30+
SearchConcurrency int `json:"searchConcurrency"`
31+
Sources []sources.Source `json:"sources"`
3132
}
3233

3334
// DefaultSettings returns the default settings
@@ -44,18 +45,19 @@ func DefaultSettings() Settings {
4445
}
4546

4647
return Settings{
47-
MinQueryLength: 2,
48-
MaxQueryLength: 1000,
49-
SearchDelay: 300,
50-
ShowScores: true,
51-
ResultsPerPage: 10,
52-
CacheDir: filepath.Join(homeDir, ".local", "cache", "freectl"),
53-
AutoUpdate: true,
54-
TruncateTitles: true,
55-
MaxTitleLength: 100,
56-
CustomHeader: "find cool stuff",
57-
MinFuzzyScore: 0, // Default minimum score
58-
Sources: []sources.Source{},
48+
MinQueryLength: 2,
49+
MaxQueryLength: 1000,
50+
SearchDelay: 300,
51+
ShowScores: true,
52+
ResultsPerPage: 10,
53+
CacheDir: filepath.Join(homeDir, ".local", "cache", "freectl"),
54+
AutoUpdate: true,
55+
TruncateTitles: true,
56+
MaxTitleLength: 100,
57+
CustomHeader: "find cool stuff",
58+
MinFuzzyScore: 0, // Default minimum score
59+
SearchConcurrency: 1, // Default to 1 for sequential processing
60+
Sources: []sources.Source{},
5961
}
6062
}
6163

0 commit comments

Comments
 (0)