Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions docs/media-urls.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Asset URL Handling

This document explains how SmooSense handles asset URLs (images, videos, audio) in data tables.

## Overview

SmooSense supports multiple URL formats for media assets:
- **Absolute URLs**: `http://example.com/image.jpg`, `https://cdn.example.com/video.mp4`
- **Cloud Storage URLs**: `s3://bucket/file.wav`
- **Relative Paths**: `./images/photo.jpg`, `./audio/sound.wav`
- **Absolute Paths**: `/path/to/file.mp3`, `~/home/user/image.png`

## URL Processing Pipeline

### 1. Load data by running a query
`executeQueryAsListOfDict()` @ [useRowData.ts](../smoosense-gui/src/lib/hooks/useRowData.ts)

### 2. Process all cells and resolve media URLs
`fetchProcessedRowDataFunction()` @ [processedRowDataSlice.ts](../smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts)

For **every cell in every row**:
- Check if value needs resolution using `needToResolveMediaUrl()` @ [mediaUrlUtils.ts](../smoosense-gui/src/lib/utils/mediaUrlUtils.ts)
- Resolve it using `resolveAssetUrl()` @ [mediaUrlUtils.ts](../smoosense-gui/src/lib/utils/mediaUrlUtils.ts)


#### Relative Path (local tablePath)
- **Input Example**: `./images/photo.jpg` with tablePath `/data/file.csv`
- **Resolution**: Resolve relative to local `tablePath` directory, proxy through backend
- **Final Output**: `{baseUrl}/api/get-file?path=/data/images/photo.jpg&redirect=false`

#### Relative Path (S3 tablePath)
- **Input Example**: `./images/photo.jpg` with tablePath `s3://bucket/data/file.csv`
- **Resolution**: Resolve relative to S3 `tablePath` directory, proxy through S3 proxy
- **Final Output**: `{baseUrl}/api/s3-proxy?url=s3%3A%2F%2Fbucket%2Fdata%2Fimages%2Fphoto.jpg`

#### Relative Path (HTTP/HTTPS tablePath)
- **Input Example**: `./images/photo.jpg` with tablePath `https://example.com/data/file.csv`
- **Resolution**: Resolve relative to HTTP/HTTPS `tablePath` directory
- **Final Output**: `https://example.com/data/images/photo.jpg`

#### Absolute Path
- **Input Example**: `/home/user/image.png`
- **Resolution**: Proxy through backend API
- **Final Output**: `{baseUrl}/api/get-file?path=/home/user/image.png&redirect=false`

#### Home Path
- **Input Example**: `~/Documents/file.wav`
- **Resolution**: Proxy through backend API
- **Final Output**: `{baseUrl}/api/get-file?path=~/Documents/file.wav&redirect=false`

#### S3 URL (with media extension)
- **Input Example**: `s3://bucket/file.wav`
- **Resolution**: Proxy through backend S3 proxy (only for media files: images, videos, audio)
- **Final Output**: `{baseUrl}/api/s3-proxy?url=s3%3A%2F%2Fbucket%2Ffile.wav`

#### HTTP/HTTPS URL
- **Input Example**: `https://cdn.example.com/image.jpg`
- **Resolution**: No modification
- **Final Output**: `https://cdn.example.com/image.jpg`

### 3. Return processed data
[useProcessedRowData.ts](../smoosense-gui/src/lib/hooks/useProcessedRowData.ts)
- Returns the processed data with all media URLs resolved


## Backend Integration

### File Serving Endpoint
`/api/get-file`
`get_file()` @ [fs.py](../smoosense-py/smoosense/handlers/fs.py)
- Serves local files (relative paths, absolute paths, home paths)
- Accepts `path` parameter with file location

### S3 Proxy Endpoint
`/api/s3-proxy`
`s3_proxy()` @ [fs.py](../smoosense-py/smoosense/handlers/fs.py)
- Proxies S3 URLs through backend
- Accepts `url` parameter with full S3 URL (e.g., `s3://bucket/path/to/file`)

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { configureStore } from '@reduxjs/toolkit'
import processedRowDataReducer, { fetchProcessedRowData, tablePathToUrl, type ProcessedRowDataState } from '../processedRowDataSlice'
import processedRowDataReducer, { fetchProcessedRowData, type ProcessedRowDataState } from '../processedRowDataSlice'
import { resolveAssetUrl } from '@/lib/utils/mediaUrlUtils'

// Mock the dependencies
jest.mock('@/lib/utils/urlUtils', () => ({
Expand Down Expand Up @@ -38,132 +39,146 @@ describe('processedRowDataSlice', () => {
})

it('should handle fetch with empty data', async () => {
const result = await store.dispatch(fetchProcessedRowData({ rawData: [], urlColumns: [] }))
const result = await store.dispatch(fetchProcessedRowData({ rawData: [] }))

// Should not dispatch due to shouldWait condition
// eslint-disable-next-line @typescript-eslint/no-explicit-any
expect((result as any).meta.requestStatus).toBe('fulfilled')
})

it('should handle fetch with valid data', async () => {
const testData = [{ id: 1, name: 'test' }]
const urlColumns = ['image_url']

const result = await store.dispatch(fetchProcessedRowData({ rawData: testData, urlColumns }))


const result = await store.dispatch(fetchProcessedRowData({ rawData: testData }))

// eslint-disable-next-line @typescript-eslint/no-explicit-any
expect((result as any).meta.requestStatus).toBe('fulfilled')
// eslint-disable-next-line @typescript-eslint/no-explicit-any
expect((result as any).payload).toEqual(testData)

const state = (store.getState() as TestStore).processedRowData
expect(state.data).toEqual(testData)
expect(state.loading).toBe(false)
expect(state.error).toBeNull()
})
})

describe('tablePathToUrl', () => {
describe('resolveAssetUrl', () => {
const baseUrl = 'http://localhost:8001'
const tablePath = '/data/folder/file.parquet'

describe('relative URLs starting with ./', () => {
it('should resolve relative URL with tablePath', () => {
const url = './images/photo.jpg'
const expected = './api/get-file?path=%2Fdata%2Ffolder%2Fimages%2Fphoto.jpg&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fimages%2Fphoto.jpg&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should resolve relative URL with nested path', () => {
const url = './subfolder/nested/file.txt'
const expected = './api/get-file?path=%2Fdata%2Ffolder%2Fsubfolder%2Fnested%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fsubfolder%2Fnested%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should resolve relative URL with parent directory', () => {
const url = './../other/file.txt'
const expected = './api/get-file?path=%2Fdata%2Ffolder%2F..%2Fother%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Ffolder%2F..%2Fother%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})
})

describe('absolute file paths starting with /', () => {
it('should convert absolute path to API URL with baseUrl', () => {
const url = '/data/images/photo.jpg'
const expected = './api/get-file?path=%2Fdata%2Fimages%2Fphoto.jpg&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Fimages%2Fphoto.jpg&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should handle paths with spaces', () => {
const url = '/data/path with spaces/file.txt'
const expected = './api/get-file?path=%2Fdata%2Fpath+with+spaces%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Fpath+with+spaces%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should handle paths with special characters', () => {
const url = '/data/file@#$%.txt'
const expected = './api/get-file?path=%2Fdata%2Ffile%40%23%24%25.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Fdata%2Ffile%40%23%24%25.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})
})

describe('home directory paths starting with ~/', () => {
it('should convert home path to API URL with baseUrl', () => {
const url = '~/Documents/file.txt'
const expected = './api/get-file?path=%7E%2FDocuments%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%7E%2FDocuments%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should handle home path with nested folders', () => {
const url = '~/folder/subfolder/file.txt'
const expected = './api/get-file?path=%7E%2Ffolder%2Fsubfolder%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%7E%2Ffolder%2Fsubfolder%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})
})

describe('S3 URLs', () => {
it('should proxy S3 URL through s3-proxy endpoint', () => {
const url = 's3://bucket/folder/file.txt'
const expected = 'api/s3-proxy?url=' + encodeURIComponent(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should proxy S3 URL with special characters', () => {
const url = 's3://my-bucket/path with spaces/file@#.jpg'
const expected = 'api/s3-proxy?url=' + encodeURIComponent(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should proxy S3 URL with nested paths', () => {
const url = 's3://bucket/folder/subfolder/image.png'
const expected = 'api/s3-proxy?url=' + encodeURIComponent(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})
})

describe('absolute URLs', () => {
it('should return HTTP URL unchanged', () => {
const url = 'http://example.com/image.jpg'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url)
})

it('should return HTTPS URL unchanged', () => {
const url = 'https://example.com/image.jpg'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
})

it('should return S3 URL unchanged', () => {
const url = 's3://bucket/folder/file.txt'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url)
})

it('should return other protocol URLs unchanged', () => {
const url = 'ftp://server/file.txt'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url)
})

it('should return data URLs unchanged', () => {
const url = ''
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url)
})
})

describe('edge cases', () => {
it('should handle root path /', () => {
const url = '/'
const expected = './api/get-file?path=%2F&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2F&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should handle single file in root', () => {
const url = '/file.txt'
const expected = './api/get-file?path=%2Ffile.txt&redirect=false'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
const expected = 'api/get-file?path=%2Ffile.txt&redirect=false'
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected)
})

it('should handle URL with query parameters (absolute URL)', () => {
const url = 'https://example.com/image.jpg?size=large'
expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url)
expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url)
})
})
})
Original file line number Diff line number Diff line change
@@ -1,51 +1,16 @@
import {type BaseAsyncDataState, createAsyncDataSlice} from '@/lib/utils/createAsyncDataSlice'
import {API_PREFIX} from '@/lib/utils/urlUtils'
import {pathDirname, pathJoin} from '@/lib/utils/pathUtils'
import {cloneDeep, isNil} from 'lodash'
import {needToResolveMediaUrl, resolveAssetUrl} from '@/lib/utils/mediaUrlUtils'
import {isNil, mapValues} from 'lodash'

export type ProcessedRowDataState = BaseAsyncDataState<Record<string, unknown>[]>

interface FetchProcessedRowDataParams {
rawData: Record<string, unknown>[]
urlColumns: string[]
}

/**
* Convert a table path to a URL, handling relative paths and baseUrl
* @param url - The URL or table path to convert
* @param tablePath - The current table path (for resolving relative paths like ./)
* @param baseUrl - The base URL to prepend (for absolute paths like / or ~/)
* @returns The converted URL
*/
export const tablePathToUrl = (url: string, tablePath: string, baseUrl: string): string => {
let fullUrl = url

// If URL starts with "./", resolve it relative to tablePath
if (url.startsWith('./')) {
const dirPath = pathDirname(tablePath)
const relativePath = url.substring(2) // Remove './' prefix
fullUrl = pathJoin(dirPath, relativePath)
}

// If URL starts with '/' or '~/', convert to API endpoint
if (fullUrl.startsWith('/') || fullUrl.startsWith('~/')) {
const params = new URLSearchParams({
path: fullUrl,
redirect: 'false'
})
const relativeUrl = `${API_PREFIX}/get-file?${params.toString()}`

// Prepend baseUrl
return baseUrl + '/' + relativeUrl
}

// Return unchanged for absolute URLs (http://, https://, s3://, etc.)
return fullUrl
}

// Processed row data fetch function
const fetchProcessedRowDataFunction = async (
{ rawData, urlColumns }: FetchProcessedRowDataParams,
{ rawData }: FetchProcessedRowDataParams,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
dispatch: any,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand All @@ -55,34 +20,26 @@ const fetchProcessedRowDataFunction = async (
return []
}

let processedRowData = [...rawData]

// 1. Convert relative URLs to absolute URLs using baseUrl and tablePath
if (urlColumns && urlColumns.length > 0 && getState) {
// Process all cells: resolve media URLs
if (getState) {
const state = getState()
const tablePath = state.ui?.tablePath
const baseUrl = state.ui?.baseUrl

// Only process if both tablePath and baseUrl are available
if (tablePath && baseUrl) {
// Clone the data to avoid mutation
processedRowData = cloneDeep(processedRowData)

// Convert relative URLs for each URL column
processedRowData.forEach((row) => {
urlColumns.forEach((col) => {
const value = row[col]
if (value && typeof value === 'string') {
// Use tablePathToUrl to convert relative URLs to absolute URLs
row[col] = tablePathToUrl(value, tablePath, baseUrl)
}
})
})
// Use pure functional map to transform data without mutation
return rawData.map((row) =>
mapValues(row, (value) =>
needToResolveMediaUrl(value)
? resolveAssetUrl(value as string, tablePath, baseUrl)
: value
)
)
}
}


return processedRowData
return rawData
}

// Should wait condition - check if rawData is provided
Expand Down
Loading