diff --git a/cmd/dcs-web/search/query-rewrite.go b/cmd/dcs-web/search/query-rewrite.go index ab2d236..367cb18 100644 --- a/cmd/dcs-web/search/query-rewrite.go +++ b/cmd/dcs-web/search/query-rewrite.go @@ -8,8 +8,10 @@ import ( ) var ( - start = regexp.MustCompile(`(?i)^\s*(-?(?:filetype|package|pkg|path|file)):(\S+)\s+`) - end = regexp.MustCompile(`(?i)\s+(-?(?:filetype|package|pkg|path|file)):(\S+)\s*$`) + start = regexp.MustCompile(`(?i)^\s*(-?(?:filetype|package|pkg|path|file|filename)):(\S+)\s+`) + end = regexp.MustCompile(`(?i)\s+(-?(?:filetype|package|pkg|path|file|filename)):(\S+)\s*$`) + // solo matches when the entire query is just a single keyword:value (no other search terms) + solo = regexp.MustCompile(`(?i)^\s*(-?(?:filetype|package|pkg|path|file|filename)):(\S+)\s*$`) ) func rewriteFilters(query url.Values, filtersRe *regexp.Regexp) url.Values { @@ -37,6 +39,9 @@ func rewriteFilters(query url.Values, filtersRe *regexp.Regexp) url.Values { if filtersRe == start { qstr = strings.TrimPrefix(qstr, matches[0]) + } else if filtersRe == solo { + // For solo matches, the entire query string is consumed + qstr = "" } else { qstr = strings.TrimSuffix(qstr, matches[0]) } @@ -53,6 +58,8 @@ func RewriteQuery(u url.URL) url.URL { // query is a copy which we will modify using Set() and use in the result query := rewriteFilters(u.Query(), start) query = rewriteFilters(query, end) + // Handle queries that are only a single keyword (no content search term) + query = rewriteFilters(query, solo) u.RawQuery = query.Encode() return u diff --git a/cmd/dcs-web/search/query-rewrite_test.go b/cmd/dcs-web/search/query-rewrite_test.go index 4d3a44f..94bb650 100644 --- a/cmd/dcs-web/search/query-rewrite_test.go +++ b/cmd/dcs-web/search/query-rewrite_test.go @@ -161,4 +161,27 @@ func TestRewriteQuery(t *testing.T) { if seen != 2 { t.Fatalf("Expected two elements in the hash of the -package keyword, saw %d", seen) } + + // Verify that filename: keyword is recognized + rewritten = rewrite(t, "/search?q=filename:CMakeLists.txt") + if filename := rewritten.Query().Get("filename"); filename != "CMakeLists.txt" { + t.Fatalf("Expected filename %q, got %q", "CMakeLists.txt", filename) + } + + // Verify that filename: keyword with regex is parsed correctly + // Note: + is URL-encoded as %2B since + in URLs means space + rewritten = rewrite(t, "/search?q=filename:debian/.%2B\\.doc-base.*") + if filename := rewritten.Query().Get("filename"); filename != "debian/.+\\.doc-base.*" { + t.Fatalf("Expected filename %q, got %q", "debian/.+\\.doc-base.*", filename) + } + + // Verify that filename: keyword can be combined with content search + rewritten = rewrite(t, "/search?q=searchterm+filename:Makefile") + querystr = rewritten.Query().Get("q") + if querystr != "searchterm" { + t.Fatalf("Expected search query %q, got %q", "searchterm", querystr) + } + if filename := rewritten.Query().Get("filename"); filename != "Makefile" { + t.Fatalf("Expected filename %q, got %q", "Makefile", filename) + } } diff --git a/internal/index/read.go b/internal/index/read.go index fc3eda5..3843b39 100644 --- a/internal/index/read.go +++ b/internal/index/read.go @@ -81,6 +81,27 @@ func (dr *DocidReader) Lookup(docid uint32) (string, error) { return dr.last.fn, nil } +// ForEachFilename iterates over all filenames in the index and calls the +// callback function for each one. If the callback returns an error, iteration +// stops and the error is returned. The docid is passed along with each filename. +func (dr *DocidReader) ForEachFilename(fn func(docid uint32, filename string) error) error { + data := dr.f.Data[:dr.indexOffset] + var docid uint32 + start := 0 + for i := 0; i < len(data); i++ { + if data[i] == '\n' { + filename := string(data[start:i]) + if err := fn(docid, filename); err != nil { + return err + } + docid++ + start = i + 1 + } + } + return nil +} + + type reusableBuffer struct { u []uint32 } diff --git a/internal/sourcebackend/sourcebackend.go b/internal/sourcebackend/sourcebackend.go index 4d703ca..74559b0 100644 --- a/internal/sourcebackend/sourcebackend.go +++ b/internal/sourcebackend/sourcebackend.go @@ -146,6 +146,51 @@ type Server struct { UsePositionalIndex bool } +// FilenameMatch represents a single filename match result +type FilenameMatch struct { + Path string // Full path including package + PackageName string // Source package name + Ranking float32 // Ranking score for ordering results +} + +// SearchFilenames searches for files matching the given filename pattern regex. +// Returns all matching file paths without searching file contents. +func (s *Server) SearchFilenames(pattern string) ([]FilenameMatch, error) { + s.mu.Lock() + defer s.mu.Unlock() + + filenameRegexp, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid filename pattern: %v", err) + } + + var matches []FilenameMatch + + err = s.Index.DocidMap.ForEachFilename(func(docid uint32, filename string) error { + if filenameRegexp.MatchString(filename, true, true) != -1 { + // Extract package name (first path component before /) + pkgName := filename + if idx := strings.Index(filename, "/"); idx != -1 { + pkgName = filename[:idx] + } + + matches = append(matches, FilenameMatch{ + Path: filename, + PackageName: pkgName, + Ranking: 1.0, // Default ranking, could be enhanced based on match position + }) + } + return nil + }) + + if err != nil { + return nil, err + } + + return matches, nil +} + + // Serves a single file for displaying it in /show func (s *Server) File(ctx context.Context, in *sourcebackendpb.FileRequest) (*sourcebackendpb.FileReply, error) { log.Printf("requested filename *%s*\n", in.Path)