diff --git a/src/components/File/File.test.tsx b/src/components/File/File.test.tsx index 1c681494..bcfcb2c7 100644 --- a/src/components/File/File.test.tsx +++ b/src/components/File/File.test.tsx @@ -44,9 +44,16 @@ describe('File Component', () => { const source = getHttpSource(url) assert(source?.kind === 'file') - const { getByText } = await act(() => render()) + const { getAllByRole } = await act(() => render( + + + + )) - expect(getByText(url)).toBeDefined() + const links = getAllByRole('link') + expect(links[0]?.getAttribute('href')).toBe('/') + expect(links[1]?.getAttribute('href')).toBe('/files?key=https://example.com/') + expect(links[2]?.getAttribute('href')).toBe('/files?key=https://example.com/test.txt') }) it('renders correct breadcrumbs for nested folders', async () => { diff --git a/src/lib/sources/httpSource.ts b/src/lib/sources/httpSource.ts index a80d9bb3..132a0d67 100644 --- a/src/lib/sources/httpSource.ts +++ b/src/lib/sources/httpSource.ts @@ -1,14 +1,127 @@ -import { FileSource } from './types.js' +import { DirSource, FileMetadata, FileSource, SourcePart } from './types.js' import { getFileName } from './utils.js' -export function getHttpSource(sourceId: string, options?: {requestInit?: RequestInit}): FileSource | undefined { +function s3list(bucket: string, prefix: string) { + const url = `https://${bucket}.s3.amazonaws.com/?list-type=2&prefix=${prefix}&delimiter=/` + return fetch(url) + .then(res => { + if (!res.ok) throw new Error(`${res.status} ${res.statusText}`) + return res.text() + }) + .then(text => { + const results = [] + + // Parse regular objects (files and explicit directories) + const contentsRegex = /(.*?)<\/Contents>/gs + const contentsMatches = text.match(contentsRegex) ?? [] + + for (const match of contentsMatches) { + const keyMatch = /(.*?)<\/Key>/.exec(match) + const lastModifiedMatch = /(.*?)<\/LastModified>/.exec(match) + const sizeMatch = /(.*?)<\/Size>/.exec(match) + const eTagMatch = /"(.*?)"<\/ETag>/.exec(match) ?? /"(.*?)"<\/ETag>/.exec(match) + + if (!keyMatch || !lastModifiedMatch) continue + + const key = keyMatch[1] + const lastModified = lastModifiedMatch[1] + const size = sizeMatch ? parseInt(sizeMatch[1] ?? '', 10) : undefined + const eTag = eTagMatch ? eTagMatch[1] : undefined + + results.push({ key, lastModified, size, eTag }) + } + + // Parse CommonPrefixes (virtual directories) + const prefixRegex = /(.*?)<\/CommonPrefixes>/gs + const prefixMatches = text.match(prefixRegex) ?? [] + + for (const match of prefixMatches) { + const prefixMatch = /(.*?)<\/Prefix>/.exec(match) + if (!prefixMatch) continue + + const key = prefixMatch[1] + results.push({ + key, + lastModified: new Date().toISOString(), // No lastModified for CommonPrefixes + size: 0, + isCommonPrefix: true, + }) + } + + return results + }) +} + +function getSourceParts(sourceId: string): SourcePart[] { + const [protocol, rest] = sourceId.split('://', 2) + const parts = rest + ? [`${protocol}://${rest.split('/', 1)[0]}`, ...rest.split('/').slice(1)] + : sourceId.split('/') + const sourceParts = [ + ...parts.map((part, depth) => { + const slashSuffix = depth === parts.length - 1 ? '' : '/' + return { + text: part + slashSuffix, + sourceId: parts.slice(0, depth + 1).join('/') + slashSuffix, + } + }), + ] + if (sourceParts[sourceParts.length - 1]?.text === '') { + sourceParts.pop() + } + return sourceParts +} + +export function getHttpSource(sourceId: string, options?: {requestInit?: RequestInit}): FileSource | DirSource | undefined { if (!URL.canParse(sourceId)) { return undefined } + + const sourceParts = getSourceParts(sourceId) + + if (sourceId.endsWith('/')) { + const url = new URL(sourceId) + const bucket = url.hostname.split('.')[0] + const prefix = url.pathname.slice(1) + + if (!bucket) { + return undefined + } + + return { + kind: 'directory', + sourceId, + sourceParts, + prefix, + listFiles: () => s3list(bucket, prefix).then(items => + items + .filter(item => item.key !== undefined) + .map(item => { + if (!item.key) { + throw new Error('Key is undefined') + } + const isDirectory = item.key.endsWith('/') + const itemSourceId = `https://${bucket}.s3.amazonaws.com/${item.key}` + // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing + let name = item.key.split('/').pop() || item.key + if (name && isDirectory) { + name = name.replace(prefix, '') + } + return { + name, + lastModified: item.lastModified, + sourceId: itemSourceId, + kind: isDirectory ? 'directory' : 'file', + } as FileMetadata + }) + ), + } as DirSource + } + return { kind: 'file', sourceId, - sourceParts: [{ text: sourceId, sourceId }], + sourceParts, fileName: getFileName(sourceId), resolveUrl: sourceId, requestInit: options?.requestInit,