Skip to content

Commit cc8b502

Browse files
committed
feat(api): expose endpoint to get raw file
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 43426c0 commit cc8b502

File tree

4 files changed

+72
-1
lines changed

4 files changed

+72
-1
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,14 @@ curl -X GET $BASE_URL/collections/myCollection/entries/file.txt
255255

256256
Returns `collection`, `entry`, `chunks` (array of `id`, `content`, `metadata`), and `count`.
257257

258+
- **Get Entry Raw File**:
259+
260+
```sh
261+
curl -X GET $BASE_URL/collections/myCollection/entries/file.pdf/raw
262+
```
263+
264+
Returns the original uploaded binary file with the appropriate Content-Type header.
265+
258266
- **Search Collection**:
259267

260268
```sh

pkg/client/client.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,24 @@ func (c *Client) GetEntryContent(collection, entry string) ([]EntryChunk, error)
143143
return result.Data.Chunks, nil
144144
}
145145

146+
// GetEntryRawFile returns the original uploaded binary file as a ReadCloser.
147+
// The caller is responsible for closing the returned ReadCloser.
148+
func (c *Client) GetEntryRawFile(collection, entry string) (io.ReadCloser, error) {
149+
apiURL := fmt.Sprintf("%s/api/collections/%s/entries/%s/raw", c.BaseURL, collection, url.PathEscape(entry))
150+
151+
resp, err := http.Get(apiURL)
152+
if err != nil {
153+
return nil, fmt.Errorf("requesting raw file: %w", err)
154+
}
155+
156+
if resp.StatusCode != http.StatusOK {
157+
resp.Body.Close()
158+
return nil, fmt.Errorf("raw file download failed (status %d)", resp.StatusCode)
159+
}
160+
161+
return resp.Body, nil
162+
}
163+
146164
// DeleteEntry deletes an Entry in a collection and return the entries left
147165
func (c *Client) DeleteEntry(collection, entry string) ([]string, error) {
148166
url := fmt.Sprintf("%s/api/collections/%s/entry/delete", c.BaseURL, collection)

rag/persistency.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,22 @@ func (db *PersistentKB) GetEntryContent(entry string) ([]types.Result, error) {
235235
return results, nil
236236
}
237237

238+
// GetEntryFilePath returns the filesystem path of the stored file for the given entry.
239+
func (db *PersistentKB) GetEntryFilePath(entry string) (string, error) {
240+
db.Lock()
241+
defer db.Unlock()
242+
243+
entry = filepath.Base(entry)
244+
if _, ok := db.index[entry]; !ok {
245+
return "", fmt.Errorf("entry not found: %s", entry)
246+
}
247+
fpath := filepath.Join(db.assetDir, entry)
248+
if _, err := os.Stat(fpath); err != nil {
249+
return "", fmt.Errorf("entry file not found: %s", entry)
250+
}
251+
return fpath, nil
252+
}
253+
238254
// GetEntryFileContent returns the full content of the stored file (same text that was chunked, without overlap)
239255
// and the number of chunks it occupies. This avoids returning overlapping chunk content.
240256
func (db *PersistentKB) GetEntryFileContent(entry string) (content string, chunkCount int, err error) {
@@ -282,7 +298,11 @@ func (db *PersistentKB) storeFile(entry string, metadata map[string]string) erro
282298
beforeCount := db.Engine.Count()
283299
results, err := db.store(metadata, fileName)
284300
if err != nil {
285-
return fmt.Errorf("failed to store file: %w", err)
301+
// File is already copied to assetDir. Index it with no chunks so it
302+
// still appears in ListDocuments and can be served via GetEntryFilePath.
303+
xlog.Warn("Chunking failed, storing file without chunks", "entry", entry, "error", err)
304+
db.index[fileName] = nil
305+
return db.save()
286306
}
287307
afterCount := db.Engine.Count()
288308
xlog.Info("Stored file", "entry", entry, "fileName", fileName, "results_count", len(results), "count_before", beforeCount, "count_after", afterCount, "added_count", afterCount-beforeCount)

routes.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ func registerAPIRoutes(e *echo.Echo, openAIClient *openai.Client, maxChunkingSiz
127127
e.GET("/api/collections", listCollections)
128128
e.GET("/api/collections/:name/entries", listFiles(collections))
129129
e.GET("/api/collections/:name/entries/:entry", getEntryContent(collections))
130+
e.GET("/api/collections/:name/entries/:entry/raw", getEntryRawFile(collections))
130131
e.POST("/api/collections/:name/search", search(collections))
131132
e.POST("/api/collections/:name/reset", reset(collections))
132133
e.DELETE("/api/collections/:name/entry/delete", deleteEntryFromCollection(collections))
@@ -309,6 +310,30 @@ func getEntryContent(collections collectionList) func(c echo.Context) error {
309310
}
310311
}
311312

313+
// getEntryRawFile returns the original uploaded binary file.
314+
func getEntryRawFile(collections collectionList) func(c echo.Context) error {
315+
return func(c echo.Context) error {
316+
name := c.Param("name")
317+
collection, exists := collections[name]
318+
if !exists {
319+
return c.JSON(http.StatusNotFound, errorResponse(ErrCodeNotFound, "Collection not found", fmt.Sprintf("Collection '%s' does not exist", name)))
320+
}
321+
322+
entryParam := c.Param("entry")
323+
entry, err := url.PathUnescape(entryParam)
324+
if err != nil {
325+
entry = entryParam
326+
}
327+
328+
fpath, err := collection.GetEntryFilePath(entry)
329+
if err != nil {
330+
return c.JSON(http.StatusNotFound, errorResponse(ErrCodeNotFound, "Entry not found", fmt.Sprintf("Entry '%s' does not exist in collection '%s'", entry, name)))
331+
}
332+
333+
return c.File(fpath)
334+
}
335+
}
336+
312337
// uploadFile handles uploading files to a collection
313338
func uploadFile(collections collectionList, fileAssets string) func(c echo.Context) error {
314339
return func(c echo.Context) error {

0 commit comments

Comments
 (0)