diff --git a/docs/media-urls.md b/docs/media-urls.md new file mode 100644 index 0000000..e11d392 --- /dev/null +++ b/docs/media-urls.md @@ -0,0 +1,79 @@ +# Asset URL Handling + +This document explains how SmooSense handles asset URLs (images, videos, audio) in data tables. + +## Overview + +SmooSense supports multiple URL formats for media assets: +- **Absolute URLs**: `http://example.com/image.jpg`, `https://cdn.example.com/video.mp4` +- **Cloud Storage URLs**: `s3://bucket/file.wav` +- **Relative Paths**: `./images/photo.jpg`, `./audio/sound.wav` +- **Absolute Paths**: `/path/to/file.mp3`, `~/home/user/image.png` + +## URL Processing Pipeline + +### 1. Load data by running a query +`executeQueryAsListOfDict()` @ [useRowData.ts](../smoosense-gui/src/lib/hooks/useRowData.ts) + +### 2. Process all cells and resolve media URLs +`fetchProcessedRowDataFunction()` @ [processedRowDataSlice.ts](../smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts) + +For **every cell in every row**: +- Check if value needs resolution using `needToResolveMediaUrl()` @ [mediaUrlUtils.ts](../smoosense-gui/src/lib/utils/mediaUrlUtils.ts) +- Resolve it using `resolveAssetUrl()` @ [mediaUrlUtils.ts](../smoosense-gui/src/lib/utils/mediaUrlUtils.ts) + + +#### Relative Path (local tablePath) +- **Input Example**: `./images/photo.jpg` with tablePath `/data/file.csv` +- **Resolution**: Resolve relative to local `tablePath` directory, proxy through backend +- **Final Output**: `{baseUrl}/api/get-file?path=/data/images/photo.jpg&redirect=false` + +#### Relative Path (S3 tablePath) +- **Input Example**: `./images/photo.jpg` with tablePath `s3://bucket/data/file.csv` +- **Resolution**: Resolve relative to S3 `tablePath` directory, proxy through S3 proxy +- **Final Output**: `{baseUrl}/api/s3-proxy?url=s3%3A%2F%2Fbucket%2Fdata%2Fimages%2Fphoto.jpg` + +#### Relative Path (HTTP/HTTPS tablePath) +- **Input Example**: `./images/photo.jpg` with tablePath `https://example.com/data/file.csv` +- **Resolution**: Resolve relative to HTTP/HTTPS `tablePath` directory +- **Final Output**: `https://example.com/data/images/photo.jpg` + +#### Absolute Path +- **Input Example**: `/home/user/image.png` +- **Resolution**: Proxy through backend API +- **Final Output**: `{baseUrl}/api/get-file?path=/home/user/image.png&redirect=false` + +#### Home Path +- **Input Example**: `~/Documents/file.wav` +- **Resolution**: Proxy through backend API +- **Final Output**: `{baseUrl}/api/get-file?path=~/Documents/file.wav&redirect=false` + +#### S3 URL (with media extension) +- **Input Example**: `s3://bucket/file.wav` +- **Resolution**: Proxy through backend S3 proxy (only for media files: images, videos, audio) +- **Final Output**: `{baseUrl}/api/s3-proxy?url=s3%3A%2F%2Fbucket%2Ffile.wav` + +#### HTTP/HTTPS URL +- **Input Example**: `https://cdn.example.com/image.jpg` +- **Resolution**: No modification +- **Final Output**: `https://cdn.example.com/image.jpg` + +### 3. Return processed data +[useProcessedRowData.ts](../smoosense-gui/src/lib/hooks/useProcessedRowData.ts) +- Returns the processed data with all media URLs resolved + + +## Backend Integration + +### File Serving Endpoint +`/api/get-file` +`get_file()` @ [fs.py](../smoosense-py/smoosense/handlers/fs.py) +- Serves local files (relative paths, absolute paths, home paths) +- Accepts `path` parameter with file location + +### S3 Proxy Endpoint +`/api/s3-proxy` +`s3_proxy()` @ [fs.py](../smoosense-py/smoosense/handlers/fs.py) +- Proxies S3 URLs through backend +- Accepts `url` parameter with full S3 URL (e.g., `s3://bucket/path/to/file`) + diff --git a/smoosense-gui/src/lib/features/processedRowData/__tests__/processedRowDataSlice.test.ts b/smoosense-gui/src/lib/features/processedRowData/__tests__/processedRowDataSlice.test.ts index da3f562..bacbc45 100644 --- a/smoosense-gui/src/lib/features/processedRowData/__tests__/processedRowDataSlice.test.ts +++ b/smoosense-gui/src/lib/features/processedRowData/__tests__/processedRowDataSlice.test.ts @@ -1,5 +1,6 @@ import { configureStore } from '@reduxjs/toolkit' -import processedRowDataReducer, { fetchProcessedRowData, tablePathToUrl, type ProcessedRowDataState } from '../processedRowDataSlice' +import processedRowDataReducer, { fetchProcessedRowData, type ProcessedRowDataState } from '../processedRowDataSlice' +import { resolveAssetUrl } from '@/lib/utils/mediaUrlUtils' // Mock the dependencies jest.mock('@/lib/utils/urlUtils', () => ({ @@ -38,8 +39,8 @@ describe('processedRowDataSlice', () => { }) it('should handle fetch with empty data', async () => { - const result = await store.dispatch(fetchProcessedRowData({ rawData: [], urlColumns: [] })) - + const result = await store.dispatch(fetchProcessedRowData({ rawData: [] })) + // Should not dispatch due to shouldWait condition // eslint-disable-next-line @typescript-eslint/no-explicit-any expect((result as any).meta.requestStatus).toBe('fulfilled') @@ -47,15 +48,14 @@ describe('processedRowDataSlice', () => { it('should handle fetch with valid data', async () => { const testData = [{ id: 1, name: 'test' }] - const urlColumns = ['image_url'] - - const result = await store.dispatch(fetchProcessedRowData({ rawData: testData, urlColumns })) - + + const result = await store.dispatch(fetchProcessedRowData({ rawData: testData })) + // eslint-disable-next-line @typescript-eslint/no-explicit-any expect((result as any).meta.requestStatus).toBe('fulfilled') // eslint-disable-next-line @typescript-eslint/no-explicit-any expect((result as any).payload).toEqual(testData) - + const state = (store.getState() as TestStore).processedRowData expect(state.data).toEqual(testData) expect(state.loading).toBe(false) @@ -63,107 +63,122 @@ describe('processedRowDataSlice', () => { }) }) -describe('tablePathToUrl', () => { +describe('resolveAssetUrl', () => { const baseUrl = 'http://localhost:8001' const tablePath = '/data/folder/file.parquet' describe('relative URLs starting with ./', () => { it('should resolve relative URL with tablePath', () => { const url = './images/photo.jpg' - const expected = './api/get-file?path=%2Fdata%2Ffolder%2Fimages%2Fphoto.jpg&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fimages%2Fphoto.jpg&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should resolve relative URL with nested path', () => { const url = './subfolder/nested/file.txt' - const expected = './api/get-file?path=%2Fdata%2Ffolder%2Fsubfolder%2Fnested%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fsubfolder%2Fnested%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should resolve relative URL with parent directory', () => { const url = './../other/file.txt' - const expected = './api/get-file?path=%2Fdata%2Ffolder%2F..%2Fother%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2F..%2Fother%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) }) describe('absolute file paths starting with /', () => { it('should convert absolute path to API URL with baseUrl', () => { const url = '/data/images/photo.jpg' - const expected = './api/get-file?path=%2Fdata%2Fimages%2Fphoto.jpg&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Fimages%2Fphoto.jpg&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should handle paths with spaces', () => { const url = '/data/path with spaces/file.txt' - const expected = './api/get-file?path=%2Fdata%2Fpath+with+spaces%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Fpath+with+spaces%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should handle paths with special characters', () => { const url = '/data/file@#$%.txt' - const expected = './api/get-file?path=%2Fdata%2Ffile%40%23%24%25.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Fdata%2Ffile%40%23%24%25.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) }) describe('home directory paths starting with ~/', () => { it('should convert home path to API URL with baseUrl', () => { const url = '~/Documents/file.txt' - const expected = './api/get-file?path=%7E%2FDocuments%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%7E%2FDocuments%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should handle home path with nested folders', () => { const url = '~/folder/subfolder/file.txt' - const expected = './api/get-file?path=%7E%2Ffolder%2Fsubfolder%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%7E%2Ffolder%2Fsubfolder%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('S3 URLs', () => { + it('should proxy S3 URL through s3-proxy endpoint', () => { + const url = 's3://bucket/folder/file.txt' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should proxy S3 URL with special characters', () => { + const url = 's3://my-bucket/path with spaces/file@#.jpg' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should proxy S3 URL with nested paths', () => { + const url = 's3://bucket/folder/subfolder/image.png' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) }) describe('absolute URLs', () => { it('should return HTTP URL unchanged', () => { const url = 'http://example.com/image.jpg' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url) }) it('should return HTTPS URL unchanged', () => { const url = 'https://example.com/image.jpg' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) - }) - - it('should return S3 URL unchanged', () => { - const url = 's3://bucket/folder/file.txt' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url) }) it('should return other protocol URLs unchanged', () => { const url = 'ftp://server/file.txt' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url) }) it('should return data URLs unchanged', () => { const url = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url) }) }) describe('edge cases', () => { it('should handle root path /', () => { const url = '/' - const expected = './api/get-file?path=%2F&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2F&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should handle single file in root', () => { const url = '/file.txt' - const expected = './api/get-file?path=%2Ffile.txt&redirect=false' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) + const expected = 'api/get-file?path=%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(baseUrl + '/' + expected) }) it('should handle URL with query parameters (absolute URL)', () => { const url = 'https://example.com/image.jpg?size=large' - expect(tablePathToUrl(url, tablePath, baseUrl)).toBe(url) + expect(resolveAssetUrl(url, tablePath, baseUrl)).toBe(url) }) }) }) \ No newline at end of file diff --git a/smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts b/smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts index feb79fa..2849f58 100644 --- a/smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts +++ b/smoosense-gui/src/lib/features/processedRowData/processedRowDataSlice.ts @@ -1,51 +1,16 @@ import {type BaseAsyncDataState, createAsyncDataSlice} from '@/lib/utils/createAsyncDataSlice' -import {API_PREFIX} from '@/lib/utils/urlUtils' -import {pathDirname, pathJoin} from '@/lib/utils/pathUtils' -import {cloneDeep, isNil} from 'lodash' +import {needToResolveMediaUrl, resolveAssetUrl} from '@/lib/utils/mediaUrlUtils' +import {isNil, mapValues} from 'lodash' export type ProcessedRowDataState = BaseAsyncDataState[]> interface FetchProcessedRowDataParams { rawData: Record[] - urlColumns: string[] -} - -/** - * Convert a table path to a URL, handling relative paths and baseUrl - * @param url - The URL or table path to convert - * @param tablePath - The current table path (for resolving relative paths like ./) - * @param baseUrl - The base URL to prepend (for absolute paths like / or ~/) - * @returns The converted URL - */ -export const tablePathToUrl = (url: string, tablePath: string, baseUrl: string): string => { - let fullUrl = url - - // If URL starts with "./", resolve it relative to tablePath - if (url.startsWith('./')) { - const dirPath = pathDirname(tablePath) - const relativePath = url.substring(2) // Remove './' prefix - fullUrl = pathJoin(dirPath, relativePath) - } - - // If URL starts with '/' or '~/', convert to API endpoint - if (fullUrl.startsWith('/') || fullUrl.startsWith('~/')) { - const params = new URLSearchParams({ - path: fullUrl, - redirect: 'false' - }) - const relativeUrl = `${API_PREFIX}/get-file?${params.toString()}` - - // Prepend baseUrl - return baseUrl + '/' + relativeUrl - } - - // Return unchanged for absolute URLs (http://, https://, s3://, etc.) - return fullUrl } // Processed row data fetch function const fetchProcessedRowDataFunction = async ( - { rawData, urlColumns }: FetchProcessedRowDataParams, + { rawData }: FetchProcessedRowDataParams, // eslint-disable-next-line @typescript-eslint/no-explicit-any dispatch: any, // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -55,34 +20,26 @@ const fetchProcessedRowDataFunction = async ( return [] } - let processedRowData = [...rawData] - - // 1. Convert relative URLs to absolute URLs using baseUrl and tablePath - if (urlColumns && urlColumns.length > 0 && getState) { + // Process all cells: resolve media URLs + if (getState) { const state = getState() const tablePath = state.ui?.tablePath const baseUrl = state.ui?.baseUrl // Only process if both tablePath and baseUrl are available if (tablePath && baseUrl) { - // Clone the data to avoid mutation - processedRowData = cloneDeep(processedRowData) - - // Convert relative URLs for each URL column - processedRowData.forEach((row) => { - urlColumns.forEach((col) => { - const value = row[col] - if (value && typeof value === 'string') { - // Use tablePathToUrl to convert relative URLs to absolute URLs - row[col] = tablePathToUrl(value, tablePath, baseUrl) - } - }) - }) + // Use pure functional map to transform data without mutation + return rawData.map((row) => + mapValues(row, (value) => + needToResolveMediaUrl(value) + ? resolveAssetUrl(value as string, tablePath, baseUrl) + : value + ) + ) } } - - return processedRowData + return rawData } // Should wait condition - check if rawData is provided diff --git a/smoosense-gui/src/lib/hooks/__tests__/useProcessedRowData.test.tsx b/smoosense-gui/src/lib/hooks/__tests__/useProcessedRowData.test.tsx index 8b6070e..91b753e 100644 --- a/smoosense-gui/src/lib/hooks/__tests__/useProcessedRowData.test.tsx +++ b/smoosense-gui/src/lib/hooks/__tests__/useProcessedRowData.test.tsx @@ -4,15 +4,12 @@ import { Provider } from 'react-redux' import { configureStore } from '@reduxjs/toolkit' import { useProcessedRowData } from '../useProcessedRowData' import { useRowData } from '../useRowData' -import { useImageAndVideoColumns } from '../useRenderType' // Mock the dependencies jest.mock('../useRowData') -jest.mock('../useRenderType') jest.mock('../useAsyncData') const mockUseRowData = useRowData as jest.MockedFunction -const mockUseImageAndVideoColumns = useImageAndVideoColumns as jest.MockedFunction // Mock useAsyncData jest.mock('../useAsyncData', () => ({ @@ -49,9 +46,6 @@ describe('useProcessedRowData', () => { setNeedRefresh: jest.fn() }) - // Mock useImageAndVideoColumns to return empty array - mockUseImageAndVideoColumns.mockReturnValue([]) - // Mock useAsyncData to return empty array when given empty rawData mockUseAsyncData.mockReturnValue({ data: [], @@ -76,9 +70,6 @@ describe('useProcessedRowData', () => { setNeedRefresh: jest.fn() }) - // Mock useImageAndVideoColumns - mockUseImageAndVideoColumns.mockReturnValue(['image_col']) - // Mock useAsyncData mockUseAsyncData.mockReturnValue({ data: [], @@ -95,7 +86,7 @@ describe('useProcessedRowData', () => { fetchAction: expect.any(Function), setNeedRefreshAction: expect.any(Function), buildParams: expect.any(Function), - dependencies: [[], ['image_col']] + dependencies: [[]] }) // Verify buildParams returns correct value for empty array @@ -104,8 +95,7 @@ describe('useProcessedRowData', () => { const params = buildParams() expect(params).toEqual({ - rawData: [], - urlColumns: ['image_col'] + rawData: [] }) }) @@ -118,9 +108,6 @@ describe('useProcessedRowData', () => { setNeedRefresh: jest.fn() }) - // Mock useImageAndVideoColumns - mockUseImageAndVideoColumns.mockReturnValue([]) - // Mock useAsyncData to return loading state mockUseAsyncData.mockReturnValue({ data: [], @@ -145,8 +132,6 @@ describe('useProcessedRowData', () => { setNeedRefresh: jest.fn() }) - mockUseImageAndVideoColumns.mockReturnValue([]) - // Mock useAsyncData to return null data mockUseAsyncData.mockReturnValue({ data: null, @@ -170,8 +155,6 @@ describe('useProcessedRowData', () => { setNeedRefresh: jest.fn() }) - mockUseImageAndVideoColumns.mockReturnValue(['image_col']) - // Mock useAsyncData to simulate processing empty array and returning empty array mockUseAsyncData.mockReturnValue({ data: [], // Processed empty array should still be empty array @@ -188,8 +171,7 @@ describe('useProcessedRowData', () => { const params = buildParams() expect(params).toEqual({ - rawData: [], - urlColumns: ['image_col'] + rawData: [] }) // Final result should be empty array diff --git a/smoosense-gui/src/lib/hooks/useProcessedRowData.ts b/smoosense-gui/src/lib/hooks/useProcessedRowData.ts index 7e8450d..7fc9e0d 100644 --- a/smoosense-gui/src/lib/hooks/useProcessedRowData.ts +++ b/smoosense-gui/src/lib/hooks/useProcessedRowData.ts @@ -1,6 +1,5 @@ import { useRowData } from './useRowData' import { useAsyncData } from './useAsyncData' -import { useImageAndVideoColumns } from './useRenderType' import { fetchProcessedRowData, setNeedRefresh as setNeedRefreshAction } from '@/lib/features/processedRowData/processedRowDataSlice' interface UseProcessedRowDataResult { @@ -13,9 +12,6 @@ interface UseProcessedRowDataResult { export function useProcessedRowData(): UseProcessedRowDataResult { // Get raw row data first const { data: rawData, loading: rawDataLoading, error: rawDataError } = useRowData() - - // Get URL columns that need proxying - const urlColumns = useImageAndVideoColumns() // Use the async data pattern for processed row data const { data: processedData, loading: processedLoading, error: processedError, setNeedRefresh } = useAsyncData({ @@ -23,9 +19,9 @@ export function useProcessedRowData(): UseProcessedRowDataResult { fetchAction: fetchProcessedRowData, setNeedRefreshAction: setNeedRefreshAction, buildParams: () => { - return { rawData, urlColumns } + return { rawData } }, - dependencies: [rawData, urlColumns] + dependencies: [rawData] }) return { diff --git a/smoosense-gui/src/lib/hooks/useRenderType.ts b/smoosense-gui/src/lib/hooks/useRenderType.ts index 7c33d2a..c846f3f 100644 --- a/smoosense-gui/src/lib/hooks/useRenderType.ts +++ b/smoosense-gui/src/lib/hooks/useRenderType.ts @@ -164,11 +164,16 @@ export function useVideoColumns(): string[] { } -export function useImageAndVideoColumns(): string[] { +/** + * Hook to get all columns with media URLs (Image, Video, Audio) + * @returns Array of column names that contain media URLs + */ +export function useMediaColumns(): string[] { const renderTypes = useRenderType() return [ ...getColumnsByRenderType(renderTypes, RenderType.ImageUrl), - ...getColumnsByRenderType(renderTypes, RenderType.VideoUrl) + ...getColumnsByRenderType(renderTypes, RenderType.VideoUrl), + ...getColumnsByRenderType(renderTypes, RenderType.AudioUrl) ] } diff --git a/smoosense-gui/src/lib/utils/__tests__/mediaUrlUtils.test.ts b/smoosense-gui/src/lib/utils/__tests__/mediaUrlUtils.test.ts new file mode 100644 index 0000000..286efe5 --- /dev/null +++ b/smoosense-gui/src/lib/utils/__tests__/mediaUrlUtils.test.ts @@ -0,0 +1,332 @@ +import { needToResolveMediaUrl, resolveAssetUrl } from '../mediaUrlUtils' + +describe('needToResolveMediaUrl', () => { + describe('non-string values', () => { + it('should return false for numbers', () => { + expect(needToResolveMediaUrl(123)).toBe(false) + expect(needToResolveMediaUrl(0)).toBe(false) + expect(needToResolveMediaUrl(-1)).toBe(false) + }) + + it('should return false for null and undefined', () => { + expect(needToResolveMediaUrl(null)).toBe(false) + expect(needToResolveMediaUrl(undefined)).toBe(false) + }) + + it('should return false for objects', () => { + expect(needToResolveMediaUrl({})).toBe(false) + expect(needToResolveMediaUrl({ url: './image.jpg' })).toBe(false) + }) + + it('should return false for arrays', () => { + expect(needToResolveMediaUrl([])).toBe(false) + expect(needToResolveMediaUrl(['./image.jpg'])).toBe(false) + }) + + it('should return false for booleans', () => { + expect(needToResolveMediaUrl(true)).toBe(false) + expect(needToResolveMediaUrl(false)).toBe(false) + }) + }) + + describe('strings without path prefixes', () => { + it('should return false for HTTP URLs', () => { + expect(needToResolveMediaUrl('http://example.com/image.jpg')).toBe(false) + expect(needToResolveMediaUrl('https://example.com/video.mp4')).toBe(false) + }) + + it('should return false for plain filenames without path prefix', () => { + expect(needToResolveMediaUrl('image.jpg')).toBe(false) + expect(needToResolveMediaUrl('video.mp4')).toBe(false) + }) + + it('should return false for empty strings', () => { + expect(needToResolveMediaUrl('')).toBe(false) + }) + }) + + describe('S3 URLs', () => { + it('should return true for S3 URLs with media extensions', () => { + expect(needToResolveMediaUrl('s3://bucket/audio.mp3')).toBe(true) + expect(needToResolveMediaUrl('s3://my-bucket/images/photo.jpg')).toBe(true) + expect(needToResolveMediaUrl('s3://bucket/video.mp4')).toBe(true) + expect(needToResolveMediaUrl('s3://bucket/music.wav')).toBe(true) + }) + + it('should return false for S3 URLs without media extensions', () => { + expect(needToResolveMediaUrl('s3://bucket/file.txt')).toBe(false) + expect(needToResolveMediaUrl('s3://bucket/data.csv')).toBe(false) + expect(needToResolveMediaUrl('s3://bucket/document.pdf')).toBe(false) + }) + }) + + describe('strings with path prefixes but non-media extensions', () => { + it('should return false for relative paths with non-media extensions', () => { + expect(needToResolveMediaUrl('./file.txt')).toBe(false) + expect(needToResolveMediaUrl('./document.pdf')).toBe(false) + expect(needToResolveMediaUrl('./data.json')).toBe(false) + }) + + it('should return false for absolute paths with non-media extensions', () => { + expect(needToResolveMediaUrl('/home/user/file.txt')).toBe(false) + expect(needToResolveMediaUrl('/data/document.csv')).toBe(false) + }) + + it('should return false for home paths with non-media extensions', () => { + expect(needToResolveMediaUrl('~/Documents/file.txt')).toBe(false) + expect(needToResolveMediaUrl('~/data.xlsx')).toBe(false) + }) + }) + + describe('valid media URLs with relative paths', () => { + it('should return true for relative image paths', () => { + expect(needToResolveMediaUrl('./images/photo.jpg')).toBe(true) + expect(needToResolveMediaUrl('./image.png')).toBe(true) + expect(needToResolveMediaUrl('./pic.gif')).toBe(true) + expect(needToResolveMediaUrl('./logo.svg')).toBe(true) + expect(needToResolveMediaUrl('./photo.webp')).toBe(true) + }) + + it('should return true for relative video paths', () => { + expect(needToResolveMediaUrl('./videos/clip.mp4')).toBe(true) + expect(needToResolveMediaUrl('./video.webm')).toBe(true) + expect(needToResolveMediaUrl('./movie.mov')).toBe(true) + expect(needToResolveMediaUrl('./recording.avi')).toBe(true) + }) + + it('should return true for relative audio paths', () => { + expect(needToResolveMediaUrl('./audio/sound.mp3')).toBe(true) + expect(needToResolveMediaUrl('./music.wav')).toBe(true) + expect(needToResolveMediaUrl('./voice.ogg')).toBe(true) + expect(needToResolveMediaUrl('./song.m4a')).toBe(true) + }) + }) + + describe('valid media URLs with absolute paths', () => { + it('should return true for absolute image paths', () => { + expect(needToResolveMediaUrl('/home/user/images/photo.jpg')).toBe(true) + expect(needToResolveMediaUrl('/data/image.png')).toBe(true) + expect(needToResolveMediaUrl('/tmp/screenshot.jpeg')).toBe(true) + }) + + it('should return true for absolute video paths', () => { + expect(needToResolveMediaUrl('/videos/clip.mp4')).toBe(true) + expect(needToResolveMediaUrl('/home/user/movie.webm')).toBe(true) + }) + + it('should return true for absolute audio paths', () => { + expect(needToResolveMediaUrl('/audio/sound.mp3')).toBe(true) + expect(needToResolveMediaUrl('/music/track.wav')).toBe(true) + }) + }) + + describe('valid media URLs with home paths', () => { + it('should return true for home image paths', () => { + expect(needToResolveMediaUrl('~/Pictures/photo.jpg')).toBe(true) + expect(needToResolveMediaUrl('~/image.png')).toBe(true) + }) + + it('should return true for home video paths', () => { + expect(needToResolveMediaUrl('~/Videos/clip.mp4')).toBe(true) + expect(needToResolveMediaUrl('~/movie.webm')).toBe(true) + }) + + it('should return true for home audio paths', () => { + expect(needToResolveMediaUrl('~/Music/song.mp3')).toBe(true) + expect(needToResolveMediaUrl('~/audio.wav')).toBe(true) + }) + }) + + describe('edge cases', () => { + it('should handle paths with multiple dots', () => { + expect(needToResolveMediaUrl('./path/to/../image.jpg')).toBe(true) + }) + + it('should be case-sensitive for extensions', () => { + expect(needToResolveMediaUrl('./image.JPG')).toBe(true) + expect(needToResolveMediaUrl('./image.PNG')).toBe(true) + }) + + it('should handle paths with spaces', () => { + expect(needToResolveMediaUrl('./path with spaces/image.jpg')).toBe(true) + expect(needToResolveMediaUrl('/home/user/my photos/pic.png')).toBe(true) + }) + + it('should handle paths with special characters', () => { + expect(needToResolveMediaUrl('./images/photo@2x.jpg')).toBe(true) + expect(needToResolveMediaUrl('/data/file_#1.png')).toBe(true) + }) + }) +}) + +describe('resolveAssetUrl', () => { + const baseUrl = 'http://localhost:8001' + const localTablePath = '/data/folder/file.parquet' + const s3TablePath = 's3://my-bucket/data/folder/file.parquet' + const httpsTablePath = 'https://example.com/data/folder/file.parquet' + + describe('relative URLs starting with ./ - local tablePath', () => { + it('should resolve relative URL with local tablePath', () => { + const url = './images/photo.jpg' + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fimages%2Fphoto.jpg&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should resolve relative URL with nested path', () => { + const url = './subfolder/nested/file.txt' + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2Fsubfolder%2Fnested%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should resolve relative URL with parent directory', () => { + const url = './../other/file.txt' + const expected = 'api/get-file?path=%2Fdata%2Ffolder%2F..%2Fother%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('relative URLs starting with ./ - S3 tablePath', () => { + it('should resolve relative URL with S3 tablePath', () => { + const url = './images/photo.jpg' + const expectedUrl = 's3://my-bucket/data/folder/images/photo.jpg' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(expectedUrl) + expect(resolveAssetUrl(url, s3TablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should resolve relative URL with nested path (S3)', () => { + const url = './subfolder/nested/file.txt' + const expectedUrl = 's3://my-bucket/data/folder/subfolder/nested/file.txt' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(expectedUrl) + expect(resolveAssetUrl(url, s3TablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should resolve relative URL with parent directory (S3)', () => { + const url = './../other/file.txt' + const expectedUrl = 's3://my-bucket/data/other/file.txt' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(expectedUrl) + expect(resolveAssetUrl(url, s3TablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('relative URLs starting with ./ - HTTPS tablePath', () => { + it('should resolve relative URL with HTTPS tablePath', () => { + const url = './images/photo.jpg' + const expectedUrl = 'https://example.com/data/folder/images/photo.jpg' + expect(resolveAssetUrl(url, httpsTablePath, baseUrl)).toBe(expectedUrl) + }) + + it('should resolve relative URL with nested path (HTTPS)', () => { + const url = './subfolder/nested/file.txt' + const expectedUrl = 'https://example.com/data/folder/subfolder/nested/file.txt' + expect(resolveAssetUrl(url, httpsTablePath, baseUrl)).toBe(expectedUrl) + }) + + it('should resolve relative URL with parent directory (HTTPS)', () => { + const url = './../other/file.txt' + const expectedUrl = 'https://example.com/data/other/file.txt' + expect(resolveAssetUrl(url, httpsTablePath, baseUrl)).toBe(expectedUrl) + }) + + it('should resolve relative URL with HTTP tablePath', () => { + const httpTablePath = 'http://example.com/data/folder/file.parquet' + const url = './images/photo.jpg' + const expectedUrl = 'http://example.com/data/folder/images/photo.jpg' + expect(resolveAssetUrl(url, httpTablePath, baseUrl)).toBe(expectedUrl) + }) + }) + + describe('absolute file paths starting with /', () => { + it('should convert absolute path to API URL with baseUrl', () => { + const url = '/data/images/photo.jpg' + const expected = 'api/get-file?path=%2Fdata%2Fimages%2Fphoto.jpg&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should handle paths with spaces', () => { + const url = '/data/path with spaces/file.txt' + const expected = 'api/get-file?path=%2Fdata%2Fpath+with+spaces%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should handle paths with special characters', () => { + const url = '/data/file@#$%.txt' + const expected = 'api/get-file?path=%2Fdata%2Ffile%40%23%24%25.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('home directory paths starting with ~/', () => { + it('should convert home path to API URL with baseUrl', () => { + const url = '~/Documents/file.txt' + const expected = 'api/get-file?path=%7E%2FDocuments%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should handle home path with nested folders', () => { + const url = '~/folder/subfolder/file.txt' + const expected = 'api/get-file?path=%7E%2Ffolder%2Fsubfolder%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('S3 URLs', () => { + it('should proxy S3 URL through s3-proxy endpoint', () => { + const url = 's3://bucket/folder/file.txt' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should proxy S3 URL with special characters', () => { + const url = 's3://my-bucket/path with spaces/file@#.jpg' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should proxy S3 URL with nested paths', () => { + const url = 's3://bucket/folder/subfolder/image.png' + const expected = 'api/s3-proxy?url=' + encodeURIComponent(url) + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + }) + + describe('absolute URLs', () => { + it('should return HTTP URL unchanged', () => { + const url = 'http://example.com/image.jpg' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(url) + }) + + it('should return HTTPS URL unchanged', () => { + const url = 'https://example.com/image.jpg' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(url) + }) + + it('should return other protocol URLs unchanged', () => { + const url = 'ftp://server/file.txt' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(url) + }) + + it('should return data URLs unchanged', () => { + const url = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(url) + }) + }) + + describe('edge cases', () => { + it('should handle root path /', () => { + const url = '/' + const expected = 'api/get-file?path=%2F&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should handle single file in root', () => { + const url = '/file.txt' + const expected = 'api/get-file?path=%2Ffile.txt&redirect=false' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(baseUrl + '/' + expected) + }) + + it('should handle URL with query parameters (absolute URL)', () => { + const url = 'https://example.com/image.jpg?size=large' + expect(resolveAssetUrl(url, localTablePath, baseUrl)).toBe(url) + }) + }) +}) diff --git a/smoosense-gui/src/lib/utils/agGridCellRenderers.tsx b/smoosense-gui/src/lib/utils/agGridCellRenderers.tsx index 607868f..bd95e81 100644 --- a/smoosense-gui/src/lib/utils/agGridCellRenderers.tsx +++ b/smoosense-gui/src/lib/utils/agGridCellRenderers.tsx @@ -4,6 +4,7 @@ import { ColDef, ICellRendererParams } from 'ag-grid-community' import type { BaseColDef } from '@/lib/features/colDefs/agSlice' import _, { isNil } from 'lodash' import { getFileType, FileType } from './fileTypes' +import { isUrl } from './urlUtils' import ImageCellRenderer from './cellRenderers/ImageCellRenderer' import VideoCellRenderer from './cellRenderers/VideoCellRenderer' import AudioCellRenderer from './cellRenderers/AudioCellRenderer' @@ -37,17 +38,6 @@ export enum RenderType { // Helper functions for string analysis -function isUrl(str: string): boolean { - return str.startsWith('http://') || - str.startsWith('https://') || - str.startsWith('iframe+http://') || - str.startsWith('iframe+https://') || - str.startsWith('s3://') || - str.startsWith('ftp://') || - str.startsWith('file://') || - str.startsWith('./') -} - function inferUrlType(str: string): RenderType { // Check for iframe+ prefix first if (str.startsWith('iframe+http://') || str.startsWith('iframe+https://')) { diff --git a/smoosense-gui/src/lib/utils/mediaUrlUtils.ts b/smoosense-gui/src/lib/utils/mediaUrlUtils.ts new file mode 100644 index 0000000..57e7370 --- /dev/null +++ b/smoosense-gui/src/lib/utils/mediaUrlUtils.ts @@ -0,0 +1,119 @@ +import {API_PREFIX} from './urlUtils' +import {pathDirname, pathJoin} from './pathUtils' +import {getFileType, FileType} from './fileTypes' + +/** + * Check if a URL needs to be resolved + * Returns true if: + * - Value is a string + * - AND starts with ./, /, ~/, or s3:// + * - AND has a media file extension (image, video, or audio) + */ +export const needToResolveMediaUrl = (value: unknown): boolean => { + // Must be a string + if (typeof value !== 'string') { + return false + } + + + // Must start with relative or absolute path prefix + if ( + !value.startsWith('./') && + !value.startsWith('/') && + !value.startsWith('~/') && + !value.startsWith('s3://') + ) { + return false + } + + // Must be a media file + const fileType = getFileType(value) + return fileType === FileType.Image || fileType === FileType.Video || fileType === FileType.Audio +} + +/** + * Resolve relative path based on tablePath type (local, S3, or HTTP/HTTPS) + */ +function resolveRelativePath(tablePath: string, relativePath: string): string { + const cleanRelative = relativePath.substring(2) // Remove './' prefix + + // Case 1: S3 tablePath (e.g., s3://bucket/folder/file.parquet) + if (tablePath.startsWith('s3://')) { + // Extract the directory path from S3 URL + const lastSlashIndex = tablePath.lastIndexOf('/') + const dirPath = tablePath.substring(0, lastSlashIndex) + + // Handle parent directory references (..) + const parts = dirPath.substring(5).split('/') // Remove 's3://' and split + const bucket = parts.shift() // First part is bucket name + const relativeParts = cleanRelative.split('/') + + for (const part of relativeParts) { + if (part === '..') { + parts.pop() + } else if (part !== '.' && part !== '') { + parts.push(part) + } + } + + return `s3://${bucket}/${parts.join('/')}` + } + + // Case 2: HTTP/HTTPS tablePath (e.g., https://example.com/data/file.parquet) + if (tablePath.startsWith('http://') || tablePath.startsWith('https://')) { + try { + const tableUrl = new URL(tablePath) + // Get directory by removing filename + const pathParts = tableUrl.pathname.split('/') + pathParts.pop() // Remove filename + const dirPath = pathParts.join('/') + + // Resolve relative path using URL API + const resolved = new URL(cleanRelative, tableUrl.origin + dirPath + '/').href + return resolved + } catch { + // If URL parsing fails, fall back to returning original tablePath + return tablePath + } + } + + // Case 3: Local file path (e.g., /data/folder/file.parquet) + const dirPath = pathDirname(tablePath) + return pathJoin(dirPath, cleanRelative) +} + +/** + * Resolve asset URLs to full URLs, handling relative paths, absolute paths, S3 URLs, and remote URLs + * @param url - The URL or file path to resolve + * @param tablePath - The current table path (for resolving relative paths like ./) + * @param baseUrl - The base URL to prepend (for absolute paths like / or ~/) + * @returns The resolved full URL + */ +export const resolveAssetUrl = (url: string, tablePath: string, baseUrl: string): string => { + let fullUrl = url + + // If URL starts with "./", resolve it relative to tablePath + if (url.startsWith('./')) { + fullUrl = resolveRelativePath(tablePath, url) + } + + let relativeUrl = null + // If URL starts with '/' or '~/', convert to API endpoint + if (fullUrl.startsWith('/') || fullUrl.startsWith('~/')) { + const params = new URLSearchParams({ + path: fullUrl, + redirect: 'false' + }) + relativeUrl = `${API_PREFIX}/get-file?${params.toString()}` + } else if (fullUrl.startsWith('s3://')){ + relativeUrl = `${API_PREFIX}/s3-proxy?url=${encodeURIComponent(fullUrl)}` + } else { + return fullUrl + } + + // Remove leading ./ if present + const cleanUrl = relativeUrl.startsWith('./') ? relativeUrl.substring(2) : relativeUrl + + // Prepend baseUrl + return baseUrl + '/' + cleanUrl +} diff --git a/smoosense-gui/src/lib/utils/urlUtils.ts b/smoosense-gui/src/lib/utils/urlUtils.ts index 6269990..7717b56 100644 --- a/smoosense-gui/src/lib/utils/urlUtils.ts +++ b/smoosense-gui/src/lib/utils/urlUtils.ts @@ -12,6 +12,20 @@ export const getScheme = (url: string): string => { } } +/** + * Check if a string is a URL (http, https, s3, ftp, file, or relative path) + */ +export function isUrl(str: string): boolean { + return str.startsWith('http://') || + str.startsWith('https://') || + str.startsWith('iframe+http://') || + str.startsWith('iframe+https://') || + str.startsWith('s3://') || + str.startsWith('ftp://') || + str.startsWith('file://') || + str.startsWith('./') +} + export const needProxy = (url: string): boolean => { const scheme = getScheme(url); return !['http', 'https', ''].includes(scheme);