diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 1354316..079822d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -1,6 +1,11 @@ { "permissions": { - "allow": ["Bash(pnpm run test:*)", "Bash(pnpm run lint:*)"], + "allow": [ + "Bash(pnpm run test:*)", + "Bash(pnpm run lint:*)", + "Bash(pnpm run lint:biome)", + "Bash(pnpm run lint:types)" + ], "deny": [], "ask": [] } diff --git a/.knip.jsonc b/.knip.jsonc index 3db5271..71df071 100644 --- a/.knip.jsonc +++ b/.knip.jsonc @@ -1,9 +1,11 @@ { "$schema": "https://unpkg.com/knip@5/schema-jsonc.json", - "ignore": ["example/src/index.express.ts", "example/src/index.fastify.ts", "scripts/loadEntities.ts"], + "ignore": ["src/index.dev.ts", "scripts/loadEntities.ts"], "ignoreDependencies": [ // Used in prisma.schema, not directly in code "prisma-json-types-generator", + // Used in generated prisma code + "@prisma/client", // Needed by @semantic-release based on our config "conventional-changelog-conventionalcommits" ] diff --git a/CLAUDE.md b/CLAUDE.md index 135aaa8..7acfbb5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -73,8 +73,8 @@ pnpm run test:coverage # With coverage report ### Coverage Requirements -- Minimum 95% for lines, functions, branches, statements -- Excluded: generated code, dist, example, config files +- Minimum 100% for lines, functions, branches, statements +- Excluded: generated code, dist, example, config files, dev server - CI pipeline fails if coverage drops below threshold - View reports: `open coverage/index.html` @@ -148,8 +148,70 @@ src/ - `GET /entity/:id` - Retrieve single entity - `GET /entities` - List/filter entities with pagination +- `GET /file/:id` - Download or retrieve file content - `POST /search` - OpenSearch queries with faceting +### Entity Types + +The API supports multiple entity types following the PCDM (Portland Common Data Model) and schema.org standards: + +#### Entity Types + +- **Collection** (`http://pcdm.org/models#Collection`) - Top-level groupings of objects and files + - No `memberOf` or `rootCollection` (these are null) + - Can contain Objects and Files as children + +- **Object** (`http://pcdm.org/models#Object`) - Items within collections + - Has `memberOf` pointing to parent Collection + - Has `rootCollection` pointing to top-level Collection + - Can contain Files as children + +- **File** (`http://schema.org/MediaObject`) - Individual files (audio, video, documents, etc.) + - Has `memberOf` pointing to parent Object or Collection + - Has `rootCollection` pointing to top-level Collection + - Stores file metadata in `rocrate` JSON (encodingFormat, contentSize, etc.) + +#### Schema.org Entity Types + +Supporting entity types for metadata: + +- **Person** (`http://schema.org/Person`) - Contributors, researchers, speakers +- **Language** (`http://schema.org/Language`) - Language information +- **Place** (`http://schema.org/Place`) - Geographical locations +- **Organization** (`http://schema.org/Organization`) - Organisations + +#### Entity Hierarchy + +The typical hierarchy follows this pattern: + +``` +Collection (memberOf: null) +├── Object (memberOf: Collection) +│ └── File (memberOf: Object) +└── File (memberOf: Collection) +``` + +#### Filtering by Entity Type + +All routes support filtering by entity type: + +```bash +# Get all Files Entities +GET /entities?entityType=http://schema.org/MediaObject + +# Get Files belonging to a specific Object +GET /entities?memberOf=http://example.com/object/1&entityType=http://schema.org/MediaObject + +# Search for Files +POST /search +{ + "query": "audio", + "filters": { + "entityType": ["https://schema.org/MediaObject"] + } +} +``` + ### Error Handling - Use consistent error response format @@ -175,6 +237,7 @@ The API provides a flexible transformer system for customising entity responses. ### Overview The transformer system enables: + - **Access control**: Control visibility of metadata and content based on licenses - **Data enrichment**: Add computed fields or fetch related data - **Response customisation**: Adapt the API response to specific client needs @@ -329,6 +392,7 @@ import { AllPublicAccessTransformer } from 'arocapi'; ### Applied Routes Transformers are applied to all entity routes: + - `GET /entity/:id` - Single entity - `GET /entities` - Entity list (each entity transformed) - `POST /search` - Search results (entities + search metadata) @@ -520,6 +584,981 @@ describe('Custom Transformer Tests', () => { }); ``` +## File Transformers + +The API provides a separate transformer system for file responses, similar to entity transformers but specifically designed for files. File transformers are applied in a three-stage pipeline: + +1. **Base file transformer** - Converts database File records to standard format +2. **File access transformer** - Adds access control information (content only) +3. **File transformers** - Optional additional transformations + +### Overview + +The file transformer system enables: + +- **Content access control**: Control visibility of file content based on licenses +- **Data enrichment**: Add computed fields or fetch related data +- **Response customisation**: Adapt file responses to specific client needs +- **Async operations**: Fetch additional data or perform authorisation checks + +**Key Difference from Entity Transformers**: File metadata (filename, size, mediaType, etc.) is always accessible. Only content access is controlled via the `access.content` field. + +### Transformation Pipeline + +Every file response flows through this pipeline: + +``` +Database File → baseFileTransformer → fileAccessTransformer → fileTransformers[] → Response +``` + +### Usage + +When mounting the application, you **must** provide both `accessTransformer` (for entities) and `fileAccessTransformer` (for files). This is a required security feature to ensure conscious decisions about access control. + +```typescript +import { Client } from '@opensearch-project/opensearch'; +import arocapi, { + AllPublicAccessTransformer, + AllPublicFileAccessTransformer, +} from 'arocapi'; +import fastify from 'fastify'; +import { PrismaClient } from './generated/prisma/client.js'; + +const server = fastify(); +const prisma = new PrismaClient(); +const opensearch = new Client({ node: process.env.OPENSEARCH_URL }); + +// For fully public datasets +await server.register(arocapi, { + prisma, + opensearch, + accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, // Required for files + fileHandler: { /* ... */ }, + roCrateHandler: { /* ... */ }, +}); + +// For restricted content, provide custom transformers +await server.register(arocapi, { + prisma, + opensearch, + accessTransformer: AllPublicAccessTransformer, + // Required: Controls access to file content + fileAccessTransformer: async (file, { request, fastify }) => { + const user = await authenticateUser(request); + const canAccessContent = await checkLicense(file.contentLicenseId, user); + + return { + ...file, + access: { + content: canAccessContent, + contentAuthorizationUrl: canAccessContent + ? undefined + : 'https://example.com/request-access', + }, + }; + }, + // Optional: Additional file transformations + fileTransformers: [ + // Add computed fields + async (file) => ({ + ...file, + displayFilename: file.filename.toUpperCase(), + sizeInKB: Math.round(file.size / 1024), + }), + ], + fileHandler: { /* ... */ }, + roCrateHandler: { /* ... */ }, +}); +``` + +### Transformer Types + +#### File Access Transformer + +Controls access to file content. Receives a `StandardFile` and must return an `AuthorisedFile`. + +**Note**: File metadata is always accessible. Only content access is controlled. + +```typescript +type FileAccessTransformer = ( + file: StandardFile, + context: TransformerContext, +) => Promise | AuthorisedFile; + +type StandardFile = { + id: string; + filename: string; + mediaType: string; + size: number; + memberOf: string; + rootCollection: string; + contentLicenseId: string; +}; + +type AuthorisedFile = StandardFile & { + access: { + content: boolean; + contentAuthorizationUrl?: string; + }; +}; +``` + +#### File Transformers + +Optional transformations applied after access control. Each transformer receives the output of the previous one. + +```typescript +type FileTransformer = ( + file: TInput, + context: TransformerContext, +) => Promise | TOutput; +``` + +#### Transformer Context + +All file transformers receive the same context object as entity transformers: + +```typescript +type TransformerContext = { + request: FastifyRequest; // Access request headers, params, etc. + fastify: FastifyInstance; // Access prisma, opensearch, etc. +}; +``` + +### AllPublicFileAccessTransformer + +The `AllPublicFileAccessTransformer` is provided for fully public datasets. It grants full access to file content: + +```typescript +import { AllPublicFileAccessTransformer } from 'arocapi'; + +// Returns: +{ + ...file, + access: { + content: true, + }, +} +``` + +**Security Note**: The `fileAccessTransformer` parameter is **required**. You must explicitly choose `AllPublicFileAccessTransformer` for public data or implement a custom transformer for restricted content. This prevents accidental exposure of restricted files. + +### Applied Routes + +File transformers are applied to the files listing route: + +- `GET /files` - File list (each file transformed) + +**Note**: The `/file/:id` endpoint (for downloading file content) uses the `fileHandler` system, not file transformers. Access control for downloads should be implemented in your `fileHandler`. + +### Examples + +#### Content Access Control with License Checking + +```typescript +fileAccessTransformer: async (file, { request, fastify }) => { + const user = await getUserFromRequest(request); + + // Check if user has access to content license + const hasContentAccess = await checkUserLicense( + user, + file.contentLicenseId, + fastify.prisma, + ); + + return { + ...file, + access: { + content: hasContentAccess, + contentAuthorizationUrl: hasContentAccess + ? undefined + : `https://rems.example.com/apply?license=${file.contentLicenseId}`, + }, + }; +} +``` + +#### Add Computed Display Fields + +```typescript +fileTransformers: [ + (file) => ({ + ...file, + displayFilename: file.filename.toUpperCase(), + sizeInKB: Math.round(file.size / 1024), + sizeInMB: Math.round(file.size / 1024 / 1024), + extension: file.filename.split('.').pop(), + }), +] +``` + +#### Fetch Parent Entity Information + +```typescript +fileTransformers: [ + async (file, { fastify }) => { + const parent = await fastify.prisma.entity.findFirst({ + where: { rocrateId: file.memberOf }, + }); + + return { + ...file, + parentEntity: parent ? { + id: parent.rocrateId, + name: parent.name, + type: parent.entityType, + } : null, + }; + }, +] +``` + +#### Request-Specific Data + +```typescript +fileTransformers: [ + (file, { request }) => ({ + ...file, + downloadUrl: `${request.protocol}://${request.hostname}/file/${encodeURIComponent(file.id)}`, + requestedAt: new Date().toISOString(), + }), +] +``` + +### Testing File Transformers + +Test custom file transformers by passing them to your test Fastify instance: + +```typescript +import { describe, it, expect, beforeEach } from 'vitest'; +import { fastify, fastifyBefore } from './test/helpers/fastify.js'; +import filesRoute from './routes/files.js'; +import { AllPublicFileAccessTransformer } from './transformers/default.js'; + +describe('Custom File Transformer Tests', () => { + beforeEach(async () => { + await fastifyBefore(); + }); + + it('should apply custom file access transformer', async () => { + const customFileAccessTransformer = (file) => ({ + ...file, + access: { + content: false, + contentAuthorizationUrl: 'https://example.com/request-access', + }, + }); + + await fastify.register(filesRoute, { + fileAccessTransformer: customFileAccessTransformer, + }); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + const body = JSON.parse(response.body); + expect(body.files[0].access.content).toBe(false); + }); + + it('should apply custom file transformers', async () => { + const customTransformer = (file) => ({ + ...file, + tested: true, + uppercaseFilename: file.filename.toUpperCase(), + }); + + await fastify.register(filesRoute, { + fileAccessTransformer: AllPublicFileAccessTransformer, + fileTransformers: [customTransformer], + }); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + const body = JSON.parse(response.body); + expect(body.files[0].tested).toBe(true); + expect(body.files[0].uppercaseFilename).toBe('FILE1.WAV'); + }); +}); +``` + +## File Handler System + +The API provides two separate handler systems for serving different types of content: + +1. **File Handler** - Serves file content for File records (`/file/:id`) +2. **RO-Crate Handler** - Serves RO-Crate metadata for any entity (`/entity/:id/rocrate`) + +### Database Schema + +#### File Table + +The API uses a dedicated `File` table to store file metadata: + +```prisma +model File { + id Int @id @default(autoincrement()) + fileId String @db.VarChar(2048) // URI identifier + filename String @db.VarChar(255) + mediaType String @db.VarChar(127) // MIME type + size BigInt // File size in bytes + memberOf String @db.VarChar(2048) // Parent entity URI (required) + rootCollection String @db.VarChar(2048) // Top-level collection URI (required) + contentLicenseId String @db.VarChar(2048) // Content license (required) + meta Json? // Storage metadata + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt +} +``` + +### File Endpoints + +#### GET /files + +List files with pagination, filtering, and access control: + +```bash +GET /files?memberOf=http://example.com/collection/1&limit=10&offset=0 +``` + +Query parameters: + +- `memberOf` - Filter by parent entity URI +- `limit` - Max results (1-1000, default: 100) +- `offset` - Skip N results (default: 0) +- `sort` - Sort by: `id`, `filename`, `createdAt`, `updatedAt` +- `order` - Sort order: `asc`, `desc` + +Response: + +```json +{ + "total": 42, + "files": [ + { + "id": "http://example.com/file/recording.wav", + "filename": "recording.wav", + "mediaType": "audio/wav", + "size": 2048576, + "memberOf": "http://example.com/collection/1", + "rootCollection": "http://example.com/collection/1", + "contentLicenseId": "https://creativecommons.org/licenses/by/4.0/", + "access": { + "metadata": true, + "content": true + } + } + ] +} +``` + +**Note**: The `/files` response does not include `metadataLicenseId`. Files only return their own `contentLicenseId`. + +The `/files` endpoint: + +1. Queries the File table +2. Applies the `accessTransformer` to add access control +3. Returns file metadata with content license and access information + +#### GET /file/:id + +Retrieve file content by file ID. + +### File Handler + +The file handler serves the actual file content. This handler is applied to the `/file/:id` endpoint. + +#### Overview + +The file handler system enables: + +- **File streaming**: Stream file content directly to the client +- **Redirects**: Redirect to external file storage (S3, CDN, etc.) +- **Range support**: HTTP range requests for media streaming +- **Custom storage backends**: Integrate with any storage system +- **Metadata support**: Store implementation-specific metadata in the `meta` JSON field + +#### File Handler Configuration + +When mounting the application, you **must** provide a `fileHandler`. This is a required parameter to ensure conscious decisions about file storage. + +```typescript +import { createReadStream } from 'node:fs'; +import { Readable } from 'node:stream'; +import { Client } from '@opensearch-project/opensearch'; +import arocapi, { AllPublicAccessTransformer } from 'arocapi'; +import fastify from 'fastify'; +import { PrismaClient } from './generated/prisma/client.js'; + +const server = fastify(); +const prisma = new PrismaClient(); +const opensearch = new Client({ node: process.env.OPENSEARCH_URL }); + +await server.register(arocapi, { + prisma, + opensearch, + accessTransformer: AllPublicAccessTransformer, + // Required: File handler for serving File content + fileHandler: { + get: async (file, { request, fastify }) => { + // Example: Stream from local filesystem + const filePath = `/data/files/${file.meta.storagePath}`; + const stream = createReadStream(filePath); + + return { + type: 'stream', + stream, + metadata: { + contentType: file.mediaType, + contentLength: file.size, + etag: `"${file.fileId}"`, + lastModified: file.updatedAt, + }, + }; + }, + head: async (file, { request, fastify }) => { + // Return metadata without streaming the file + return { + contentType: file.mediaType, + contentLength: file.size, + etag: `"${file.fileId}"`, + lastModified: file.updatedAt, + }; + }, + }, + // Required: RO-Crate handler for serving RO-Crate metadata + roCrateHandler: { + get: async (entity, { request, fastify }) => { + // Serve the RO-Crate metadata as JSON-LD + const rocrate = entity.meta.rocrate; + + return { + type: 'stream', + stream: Readable.from([JSON.stringify(rocrate, null, 2)]), + metadata: { + contentType: 'application/ld+json', + contentLength: JSON.stringify(rocrate).length, + }, + }; + }, + head: async (entity, { request, fastify }) => { + return { + contentType: 'application/ld+json', + contentLength: JSON.stringify(entity.meta.rocrate).length, + }; + }, + }, +}); +``` + +### File Handler Types + +The file handler is an object interface with required methods: + +```typescript +type FileHandler = { + get: GetFileHandler; // Required: retrieve file content + head: HeadFileHandler; // Required: retrieve file metadata +}; + +type GetFileHandler = ( + file: File, // File from database + context: { + request: FastifyRequest, // Access request headers, query params + fastify: FastifyInstance, // Access prisma, opensearch, etc. + }, +) => Promise | FileResult | false; + +type HeadFileHandler = ( + file: File, // File from database + context: FileHandlerContext, +) => Promise | FileMetadata | false; +``` + +**Note**: The file parameter is a record from the File table. The `/file/:id` endpoint queries the File table directly. + +#### FileResult Types + +**Stream Response** - Serve file content directly: + +```typescript +{ + type: 'stream', + stream: Readable, // Node.js readable stream + metadata: { + contentType: string, // MIME type (e.g., 'audio/wav') + contentLength: number, // File size in bytes + etag?: string, // Optional cache validation + lastModified?: Date, // Optional last modified date + }, +} +``` + +**Redirect Response** - Redirect to external location: + +```typescript +{ + type: 'redirect', + url: string, // Redirect URL (e.g., S3 presigned URL) +} +``` + +### Query Parameters + +The `/file/:id` endpoint supports these query parameters: + +- `disposition` - 'inline' (default) or 'attachment' for download prompts +- `filename` - Custom filename for Content-Disposition header (defaults to file.filename) +- `noRedirect` - Boolean; if true with redirect response, returns JSON `{"location": "url"}` instead of 302 redirect + +### HTTP Range Support + +The endpoint automatically handles HTTP range requests for partial content: + +- Returns **206 Partial Content** for valid range requests +- Returns **416 Range Not Satisfiable** for invalid ranges +- Sets appropriate `Content-Range` headers + +**Note**: The current implementation is simplified. For production use with large media files, implement range support in your fileHandler using seekable streams or storage APIs that support byte ranges. + +### Entity Meta Field + +The `meta` JSON field in the Entity model stores implementation-specific metadata: + +```typescript +// Example: Store storage location +await prisma.entity.create({ + data: { + rocrateId: 'http://example.com/file/123', + name: 'audio.wav', + // ... other fields + meta: { + storageBucket: 's3://my-bucket', + storageKey: 'collections/col-01/audio.wav', + checksum: 'sha256:abc123...', + }, + }, +}); +``` + +### File Handler Examples + +#### Local Filesystem + +```typescript +import { createReadStream, stat } from 'node:fs/promises'; + +fileHandler: { + get: async (file, { fastify }) => { + const filePath = `/data/${entity.meta.storagePath}`; + const stats = await stat(filePath); + + return { + type: 'stream', + stream: createReadStream(filePath), + metadata: { + contentType: file.mediaType || 'application/octet-stream', + contentLength: stats.size, + lastModified: stats.mtime, + }, + }; + }, + head: async (file, { fastify }) => { + const filePath = `/data/${entity.meta.storagePath}`; + const stats = await stat(filePath); + + return { + contentType: file.mediaType || 'application/octet-stream', + contentLength: stats.size, + lastModified: stats.mtime, + }; + }, +} +``` + +#### S3/Object Storage with Redirect + +```typescript +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { getSignedUrl } from '@aws-sdk/s3-request-presigner'; + +const s3 = new S3Client({ region: 'us-east-1' }); + +fileHandler: { + get: async (file) => { + const command = new GetObjectCommand({ + Bucket: file.meta.bucket, + Key: file.meta.s3Key, + }); + + // Generate presigned URL (expires in 1 hour) + const url = await getSignedUrl(s3, command, { expiresIn: 3600 }); + + return { + type: 'redirect', + url, + }; + }, + head: async (entity) => { + // For redirects, you may want to fetch metadata or return cached values + return { + contentType: file.mediaType + contentLength: file.size, + }; + }, +} +``` + +#### S3 with Streaming + +```typescript +import { S3Client, GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3'; + +const s3 = new S3Client({ region: 'us-east-1' }); + +fileHandler: { + get: async (file) => { + const bucket = file.meta.bucket; + const key = file.meta.s3Key; + + // Get file metadata + const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key })); + + // Stream file + const { Body } = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key })); + + return { + type: 'stream', + stream: Body as Readable, + metadata: { + contentType: head.ContentType, + contentLength: head.ContentLength, + etag: head.ETag, + lastModified: head.LastModified, + }, + }; + }, + head: async (file) => { + const bucket = file.meta.bucket; + const key = file.meta.s3Key; + + const head = await s3.send(new HeadObjectCommand({ Bucket: bucket, Key: key })); + + return { + contentType: head.ContentType, + contentLength: head.ContentLength, + etag: head.ETag, + lastModified: head.LastModified, + }; + }, +} +``` + +#### OCFL Repository + +```typescript +import { OCFLRepository } from '@ocfl/ocfl'; + +const ocfl = new OCFLRepository('/path/to/repository'); + +fileHandler: { + get: async (file) => { + const objectId = file.meta.ocflObjectId; + const versionId = file.meta.ocflVersion || 'head'; + const object = await ocfl.getObject(objectId); + const ocflFile = object.getFile('./', versionId); + + return { + type: 'stream', + stream: ocflFile.getStream(), + metadata: { + contentType: ocflFile.mimeType, + contentLength: ocflFile.size, + etag: ocflFile.digest, + }, + }; + }, + head: async (file) => { + const objectId = file.meta.ocflObjectId; + const object = await ocfl.getObject(objectId); + const ocflFile = object.getFile('./'); + + return { + contentType: ocflFile.mimeType, + contentLength: ocflFile.size, + etag: ocflFile.digest, + }; + }, +} +``` + +#### With Authorization Checks + +```typescript +fileHandler: { + get: async (file, { request }) => { + // Check user authorization + const token = request.headers.authorization; + const user = await verifyToken(token); + + if (!user) { + throw new Error('Unauthorized'); + } + + // Check content license access + const hasAccess = await checkLicense(file.contentLicenseId, user.id); + if (!hasAccess) { + throw new Error('Forbidden: Insufficient license'); + } + + // Serve file from storage + const filePath = `/data/${file.meta.storagePath}`; + return { + type: 'stream', + stream: createReadStream(filePath), + metadata: { + contentType: file.mediaType, + contentLength: file.size + }, + }; + }, + head: async (file, { request }) => { + // Same authorization checks + const token = request.headers.authorization; + const user = await verifyToken(token); + + if (!user) { + throw new Error('Unauthorized'); + } + + const hasAccess = await checkLicense(file.contentLicenseId, user.id); + if (!hasAccess) { + throw new Error('Forbidden: Insufficient license'); + } + + return { + contentType: file.mediaType, + contentLength: file.size, + }; + }, +} +``` + +### RO-Crate Handler + +The RO-Crate handler serves RO-Crate metadata for any entity type (Collection, Object, or File). This handler is applied to the `/entity/:id/rocrate` endpoint. + +#### Overview + +The RO-Crate handler enables: + +- **JSON-LD streaming**: Stream RO-Crate metadata as application/ld+json +- **Redirects**: Redirect to stored RO-Crate files +- **File serving**: Serve RO-Crate files from disk with nginx X-Accel-Redirect support +- **Universal support**: Works with Collections, Objects, and Files + +#### RO-Crate Handler Types + +```typescript +type RoCrateHandler = { + get: GetRoCrateHandler; // Required: retrieve RO-Crate metadata + head: HeadRoCrateHandler; // Required: retrieve RO-Crate metadata headers +}; + +type GetRoCrateHandler = ( + entity: Entity, // Any entity type (Collection, Object, or File) + context: { + request: FastifyRequest, + fastify: FastifyInstance, + }, +) => Promise | FileResult | false; + +type HeadRoCrateHandler = ( + entity: Entity, + context: FileHandlerContext, +) => Promise | FileMetadata | false; +``` + +#### RO-Crate Handler Examples + +**Stream from Database**: + +```typescript +roCrateHandler: { + get: async (entity) => { + const rocrate = entity.rocrate; + const jsonString = JSON.stringify(rocrate, null, 2); + + return { + type: 'stream', + stream: Readable.from([jsonString]), + metadata: { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(jsonString), + etag: `"${entity.id}-rocrate"`, + lastModified: entity.updatedAt, + }, + }; + }, + head: async (entity) => { + const jsonString = JSON.stringify(entity.rocrate); + return { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(jsonString), + etag: `"${entity.id}-rocrate"`, + lastModified: entity.updatedAt, + }; + }, +} +``` + +**Serve from Filesystem**: + +```typescript +import { createReadStream, stat } from 'node:fs/promises'; + +roCrateHandler: { + get: async (entity) => { + const rocrateFile = `/data/rocrates/${entity.meta.rocrateFile}`; + const stats = await stat(rocrateFile); + + return { + type: 'file', + path: rocrateFile, + metadata: { + contentType: 'application/ld+json', + contentLength: stats.size, + lastModified: stats.mtime, + }, + }; + }, + head: async (entity) => { + const rocrateFile = `/data/rocrates/${entity.meta.rocrateFile}`; + const stats = await stat(rocrateFile); + + return { + contentType: 'application/ld+json', + contentLength: stats.size, + lastModified: stats.mtime, + }; + }, +} +``` + +**Redirect to S3**: + +```typescript +import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; +import { getSignedUrl } from '@aws-sdk/s3-request-presigner'; + +const s3 = new S3Client({ region: 'us-east-1' }); + +roCrateHandler: { + get: async (entity) => { + const command = new GetObjectCommand({ + Bucket: entity.meta.bucket, + Key: `${entity.meta.prefix}/ro-crate-metadata.json`, + }); + + const url = await getSignedUrl(s3, command, { expiresIn: 3600 }); + + return { + type: 'redirect', + url, + }; + }, + head: async (entity) => { + // Return cached metadata + return { + contentType: 'application/ld+json', + contentLength: entity.meta.rocrateSize, + }; + }, +} +``` + +### Testing Handlers + +Test custom file and RO-Crate handlers by passing them to your test Fastify instance: + +```typescript +import { describe, it, expect, beforeEach } from 'vitest'; +import { Readable } from 'node:stream'; +import { fastify, fastifyBefore } from './test/helpers/fastify.js'; +import fileRoute from './routes/file.js'; +import crateRoute from './routes/crate.js'; + +describe('Custom Handler Tests', () => { + beforeEach(async () => { + await fastifyBefore(); + }); + + it('should stream files from custom storage', async () => { + const customFileHandler = { + get: async (entity) => ({ + type: 'stream', + stream: Readable.from(['test content']), + metadata: { + contentType: 'audio/wav', + contentLength: 12, + }, + }), + head: async (entity) => ({ + contentType: 'audio/wav', + contentLength: 12, + }), + }; + + await fastify.register(fileRoute, { + fileHandler: customFileHandler, + }); + + const response = await fastify.inject({ + method: 'GET', + url: '/entity/http://example.com/file.wav/file', + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('audio/wav'); + expect(response.body).toBe('test content'); + }); + + it('should serve RO-Crate metadata', async () => { + const customRoCrateHandler = { + get: async (entity) => ({ + type: 'stream', + stream: Readable.from([JSON.stringify(entity.rocrate)]), + metadata: { + contentType: 'application/ld+json', + contentLength: JSON.stringify(entity.rocrate).length, + }, + }), + head: async (entity) => ({ + contentType: 'application/ld+json', + contentLength: JSON.stringify(entity.rocrate).length, + }), + }; + + await fastify.register(crateRoute, { + roCrateHandler: customRoCrateHandler, + }); + + const response = await fastify.inject({ + method: 'GET', + url: '/entity/http://example.com/collection/rocrate', + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + }); +}); +``` + ## Database Management ### Prisma Operations @@ -597,4 +1636,3 @@ pnpm run dbconsole # Access database console --- This document should be updated as the project evolves and new patterns emerge. - diff --git a/README.md b/README.md index 321ef69..0f831bd 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ OPENSEARCH_URL="http://localhost:9200" Create your Fastify application with Typescript support: ```typescript -// src/app.ts +// src/index.ts import { Client } from '@opensearch-project/opensearch'; import arocapi, { AllPublicAccessTransformer } from 'arocapi'; import Fastify from 'fastify'; @@ -104,6 +104,35 @@ fastify.register(arocapi, { prisma, opensearch, accessTransformer: AllPublicAccessTransformer, + // Required: File handler for serving File entity content + fileHandler: { + get: async (file) => { + const fileUrl = `https://storage.example.com/${file.meta.storagePath}`; + return { type: 'redirect', url: fileUrl }; + }, + head: async (file) => ({ + contentType: file.mediaType, + contentLength: file.size, + }), + }, + // Required: RO-Crate handler for serving RO-Crate metadata + roCrateHandler: { + get: async (entity) => { + const jsonString = JSON.stringify(entity.rocrate, null, 2); + return { + type: 'stream', + stream: Readable.from([jsonString]), + metadata: { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(jsonString), + }, + }; + }, + head: async (entity) => ({ + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(JSON.stringify(entity.rocrate)), + }), + }, }); try { @@ -222,10 +251,10 @@ The arocapi provides the following endpoints: - `GET /entities` - List all entities with pagination and filtering - `GET /entity/:id` - Get a specific entity by ID -- `POST /entity` - Create a new entity -- `PUT /entity/:id` - Update an existing entity -- `DELETE /entity/:id` - Delete an entity -- `GET /search` - Search entities using OpenSearch +- `GET /entity/:id/rocrate` - Download RO-Crate metadata +- `GET /files` - List files with pagination and filtering +- `GET /file/:id` - Download or access file content +- `POST /search` - Search entities using OpenSearch ## Customising Entity Responses @@ -252,26 +281,22 @@ await server.register(arocapi, { **For restricted content**, implement a custom access transformer: ```typescript -await server.register(arocapi, { - prisma, - opensearch, - accessTransformer: async (entity, { request, fastify }) => { - // Custom logic to determine access - const user = await authenticateUser(request); - const canAccessContent = await checkLicense(entity.contentLicenseId, user); +const accessTransformer = async (entity, { request, fastify }) => { + // Custom logic to determine access + const user = await authenticateUser(request); + const canAccessContent = await checkLicense(entity.contentLicenseId, user); - return { - ...entity, - access: { - metadata: true, // Metadata always visible - content: canAccessContent, - contentAuthorisationUrl: canAccessContent - ? undefined - : 'https://rems.example.com/request-access', - }, - }; - }, -}); + return { + ...entity, + access: { + metadata: true, // Metadata always visible + content: canAccessContent, + contentAuthorisationUrl: canAccessContent + ? undefined + : 'https://rems.example.com/request-access', + }, + }; +}; ``` > [!WARNING] @@ -362,6 +387,166 @@ entityTransformers: [ ] ``` +## File Handler System + +The API provides two separate handler systems for serving different types of content: + +1. **File Handler** - Serves file content for Files (`/files/:id`) +2. **RO-Crate Handler** - Serves RO-Crate metadata for any entity (`/entity/:id/rocrate`) + +### File Handler (Required) + +The `fileHandler` parameter is **required**. It serves actual file content. + +### RO-Crate Handler (Required) + +The `roCrateHandler` parameter is **required**. It serves RO-Crate metadata as JSON-LD +for any entity type (Collection, Object, or File). + +**File Handler Example** (S3 with redirect): + +```typescript +fileHandler: { + get: async (file) => { + const command = new GetObjectCommand({ + Bucket: file.meta.bucket, + Key: file.meta.s3Key, + }); + const url = await getSignedUrl(s3, command, { expiresIn: 3600 }); + + return { type: 'redirect', url }; + }, + head: async (file) => { + return { + contentType: file.mediaType, + contentLength: file.size, + }; + }, +} +``` + +**File Handler Example** (local filesystem): + +```typescript +fileHandler: { + get: async (file) => { + const filePath = `/data/files/${file.meta.storagePath}`; + const stats = await stat(filePath); + + return { + type: 'stream', + stream: createReadStream(filePath), + metadata: { + contentType: file.mediaType, + contentLength: stats.size, + lastModified: stats.mtime, + }, + }; + }, + head: async (file) => { + const filePath = `/data/files/${file.meta.storagePath}`; + const stats = await stat(filePath); + + return { + contentType: file.mediaType, + contentLength: stats.size, + lastModified: stats.mtime, + }; + }, +} +``` + +**RO-Crate Handler Example** (stream from database): + +```typescript +roCrateHandler: { + get: async (entity) => { + const jsonString = JSON.stringify(entity.rocrate, null, 2); + + return { + type: 'stream', + stream: Readable.from([jsonString]), + metadata: { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(jsonString), + }, + }; + }, + head: async (entity) => { + const jsonString = JSON.stringify(entity.rocrate); + return { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(jsonString), + }; + }, +} +``` + +### File Handler Response Types + +The file handler must return one of two response types: + +**Redirect Response** - Redirect to external file location: + +```typescript +{ type: 'redirect', url: 'https://storage.example.com/file.wav' } +``` + +**Stream Response** - Serve file content directly: + +```typescript +{ + type: 'stream', + stream: Readable, // Node.js readable stream + metadata: { + contentType: 'audio/wav', + contentLength: 1024, + etag?: '"abc123"', // Optional + lastModified?: new Date(), // Optional + }, +} +``` + +### Query Parameters + +The `/file/:id` endpoint supports these query parameters: + +- `disposition` - 'inline' (default) or 'attachment' for download prompts +- `filename` - Custom filename for Content-Disposition header (defaults to entity.name) +- `noRedirect` - If true with redirect response, returns JSON `{location: url}` instead of HTTP 302 + +### HTTP Range Support + +The endpoint automatically handles HTTP range requests for partial content, +useful for media streaming: + +- Returns **206 Partial Content** for valid range requests +- Returns **416 Range Not Satisfiable** for invalid ranges +- Sets appropriate `Content-Range` and `Accept-Ranges` headers + +### Entity Meta Field + +The `meta` JSON field in the Entity model stores implementation-specific +metadata for your file handler: + +```typescript +await prisma.entity.create({ + data: { + rocrateId: 'http://example.com/file/123', + name: 'audio.wav', + entityType: 'http://schema.org/MediaObject', + // ... other required fields + meta: { + bucket: 's3://my-bucket', + storagePath: 'collections/col-01', + checksum: 'sha256:abc123...', + }, + }, +}); +``` + +Your file handler can use this metadata to locate files in your storage system. + ## Development Workflow ### Local Development Setup diff --git a/docker-compose.yaml b/docker-compose.yaml index 63fdca7..96c608c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,10 +1,11 @@ services: oni-ui: - image: ghcr.io/paradisec-archive/oni-ui:paradisec + #image: ghcr.io/language-research-technology/oni-ui:new-api + image: ghcr.io/paradisec-archive/oni-ui:new-api ports: - - 5173:5173 + - 3000:80 volumes: - - ./docker/oni-config.json:/app/config/configuration.json + - ./docker/oni-config.json:/configuration.json db: image: mysql:8 diff --git a/docker/oni-config.json b/docker/oni-config.json index 78c4ab7..979ae0f 100644 --- a/docker/oni-config.json +++ b/docker/oni-config.json @@ -1,13 +1,11 @@ { "api": { "rocrate": { - "endpoint": "http://localhost:3000", + "endpoint": "http://localhost:5173", "path": "" } }, "ui": { - "siteName": "AROCAPI Test", - "siteNameX": "AROCAPI", "title": "An RO Crate API Reference Implementation", "shortTitle": "AROCAPI", "splash": { @@ -17,13 +15,10 @@ "enabled": true, "launcher": "Acknowledgement of Country" }, - "showLogo": true, "navHeight": "80px", - "showNotebooks": false, "showEllipsis": true, "help": { "aboutText": "This is an Oni Data Portal publication. This portal provides access to a repository of language data, it is provided by the Language Data Commons of Australia [LDaCA](https://ldaca.edu.au).", - "helpUrl": "https://www.example.com", "citationText": "CITE AS: Oni Platform. A project with co-investment from Australian Research Data Commons, The University of Queensland, The University of Melbourne, The University of Sydney, Monash University, First Languages Australia and AARNet." }, "subHelpLinks": [ @@ -38,8 +33,10 @@ "href": "/terms", "title": "Terms of Service" }, - "email": { - "help": "help@example.com" + "privacy": { + "text": "Example privacy service", + "href": "/privacy", + "title": "Privacy" }, "footer": { "copyright": "Example Group", @@ -49,36 +46,29 @@ } }, "login": { - "enabled": true - }, - "authorizationProvider": { - "label": "REMS", - "url": "https://rems-uat.example.com/applications" - }, - "enrollment": { - "enforced": false, - "URL": "https://registry-test.ldaca.edu.au/registry/co_petitions/start/coef:9" + "enabled": false, + "manageTermsAndConditions": false }, "topNavItems": [ { - "route": "list?conformsTo=https%3A%2F%2Fw3id.org%2Fldac%2Fprofile%23Collection", - "display": "Collections" + "route": "list", + "display": "All" }, { - "route": "list?conformsTo=https%3A%2F%2Fw3id.org%2Fldac%2Fprofile%23Object", - "display": "Items" + "route": "list?entityType=http%3A%2F%2Fpcdm.org%2Fmodels%23Collection", + "display": "Collections" }, { - "route": "list?conformsTo=https%3A%2F%2Fw3id.org%2Fldac%2Fprofile%23Object,https%3A%2F%2Fw3id.org%2Fldac%2Fprofile%23Collection", - "display": "Both" + "route": "list?entityType=http%3A%2F%2Fpcdm.org%2Fmodels%23Object", + "display": "Items" }, { - "route": "search?f=%257B%2522%2540type%2522%253A%255B%2522RepositoryCollection%2522%255D%252C%2522_isTopLevel.%2540value%2522%253A%255B%2522true%2522%255D%257D", - "display": "Top Collections" + "route": "list?entityType=http%3A%2F%2Fpcdm.org%2Fmodels%23File", + "display": "Files" }, { - "route": "/search?f=%257B%2522%2540type%2522%253A%255B%2522SoftwareApplication%2522%255D%257D", - "display": "Notebooks" + "route": "/search", + "display": "Search" } ], "search": { @@ -87,10 +77,6 @@ "value": "relevance", "label": "Relevance" }, - { - "value": "id", - "label": "Id" - }, { "value": "name", "label": "Title" @@ -98,29 +84,11 @@ { "value": "createdAt", "label": "Created" - }, - { - "value": "updatedAt", - "label": "Updated" - } - ], - "searchSorting": { - "value": "relevance", - "label": "Relevance" - }, - "ordering": [ - { - "value": "asc", - "label": "Ascending" - }, - { - "value": "desc", - "label": "Descending" } ], - "defaultOrder": { - "value": "desc", - "label": "Descending" + "default": { + "sorting": "relevance", + "ordering": "desc" }, "searchDetails": [ { @@ -130,321 +98,184 @@ ] }, "main": { - "fields": [ + "byteFields": ["size", "contentSize"], + "expand": ["ldac:speaker", "author", "citation"] + }, + "collection": { + "meta": { + "mode": "filter", + "top": [ + { + "display": "Name", + "name": "name" + }, + { + "display": "Description", + "name": "description" + }, + { + "display": "Date Published", + "name": "datePublished" + } + ], + "hide": [] + } + }, + "object": { + "meta": { + "mode": "filter", + "top": [ + { + "display": "Name", + "name": "name" + }, + { + "display": "Description", + "name": "description" + }, + { + "display": "Date Published", + "name": "datePublished" + } + ], + "hide": [] + } + }, + "file": { + "meta": { + "hide": [] + } + }, + "googleForm": { + "takedown": "https://docs.google.com/forms/d/e/1FAIpQLSc3wWGYLgkEtba65mL46BUhMAPWiHt5EHrQ2rWiI4nJqPjpDg/viewform?usp=pp_url&entry.812577446=" + }, + "citeData": { + "help": { + "text": "See this help page for more detail." + } + }, + "head": { + "title": "DC.publisher", + "meta": [ { - "display": "Language", - "name": "inLanguage" + "name": "DC.title", + "content": "name" }, { - "display": "Linguistic Genre", - "name": "linguisticGenre" + "name": "DC.description", + "content": "description" }, { - "display": "Comminication Mode", - "name": "communicationMode" + "name": "DC.creator", + "content": "creator" }, { - "display": "Annotation Type", - "name": "annotationType" + "name": "DC.creator", + "content": "author" }, { - "display": "File Formats", - "name": "encodingFormat" - } - ], - "byteFields": ["size", "contentSize"], - "expand": ["speaker", "author", "citation"] - }, - "collection": { - "name": { - "display": "Name", - "name": "name" - }, - "top": [ - { - "display": "Name", - "name": "name" + "name": "DC.contributor", + "content": "contributor" }, { - "display": "Description", - "name": "description" + "name": "DC.date", + "content": "datePublished" }, { - "display": "Date Published", - "name": "datePublished" - } - ], - "meta": { - "hide": [ - "name", - "description", - "datePublished", - "license", - "@type", - "containsTypes", - "crateId", - "isRoot", - "isTopLevel", - "root", - "hasPart", - "hasMember", - "memberOf", - "isSubLevel", - "memberOf", - "access", - "collectionStack", - "metadataIsPublic", - "metadataLicense", - "identifier", - "subCollection", - "mainCollection" - ], - "displayHasMember": false, - "displayHasPart": true - }, - "relationships": [ - { - "name": "notebook", - "display": "Notebooks", - "type": "SoftwareApplication" - } - ] - }, - "object": { - "name": { - "display": "Name", - "name": "name" - }, - "top": [ - { - "display": "Name", - "name": "name" + "name": "DC.language", + "content": "inLanguage" }, { - "display": "Description", - "name": "description" + "name": "DC.identifier", + "content": "@id" }, { - "display": "Date Published", - "name": "datePublished" - } - ], - "meta": { - "hide": [ - "memberOf", - "name", - "description", - "datePublished", - "identifier", - "license", - "@type", - "containsTypes", - "crateId", - "isRoot", - "isTopLevel", - "root", - "hasPart", - "hasMember", - "error", - "parent", - "access", - "indexableText", - "access", - "collectionStack", - "metadataIsPublic", - "metadataLicense", - "mainCollection", - "subCollection" - ] - } - }, - "file": { - "meta": { - "hide": [ - "name", - "@type", - "access", - "crateId", - "parent", - "memberOf", - "root", - "text", - "collectionStack", - "metadataIsPublic", - "metadataLicense", - "mainCollection", - "subCollection" - ] - } - }, - "notebook": { - "name": { - "display": "Name", - "name": "name" - }, - "top": [ + "name": "DC.type", + "content": "@type" + }, { - "display": "Name", - "name": "name" + "name": "DC.type", + "content": "materialType" }, { - "display": "Description", - "name": "description" + "name": "DC.license", + "content": "license" }, { - "display": "Date Published", - "name": "datePublished" + "name": "DC.date", + "content": "dateCreated" } - ], - "meta": { - "hide": [ - "name", - "description", - "@type", - "datePublished", - "gitName", - "gitRepo", - "url", - "binderLink", - "base64", - "access", - "metadataIsPublic", - "metadataLicense", - "mainCollection", - "subCollection" - ], - "displayHasMember": false, - "displayHasPart": false - } - }, - "analytics": { - "gaMeasurementId": "G-XXX" - }, - "googleForm": { - "takedown": "https://docs.google.com/forms/d/e/1FAIpQLSc3wWGYLgkEtba65mL46BUhMAPWiHt5EHrQ2rWiI4nJqPjpDg/viewform?usp=pp_url&entry.812577446=" - }, - "binderhubRegistry": { - "registryJson": "https://api.github.com/repos/Language-Research-Technology/BinderHub-registry/contents/BinderHub-registry.json" + ] }, + "aggregations": [ { "display": "Collector", - "order": 0, - "name": "collector_name", - "active": true, - "hide": false + "name": "collector_name" }, { "display": "Collection", - "order": 1, - "name": "memberOf", - "hide": false + "name": "memberOf" }, { "display": "Main Collections", - "order": 0, "name": "root", - "help": "A group of related objects such as a corpus, a sub-corpus, or items collected in a session with consultants.", - "hide": false + "help": "A group of related objects such as a corpus, a sub-corpus, or items collected in a session with consultants." }, { "display": "Collection", - "order": 1, "name": "mainCollection", - "help": "A group of related objects such as a corpus, a sub-corpus, or items collected in a session with consultants.", - "hide": true + "help": "A group of related objects such as a corpus, a sub-corpus, or items collected in a session with consultants." }, { "display": "Country", - "order": 2, "name": "countries", "help": "The countries associated with the entity." }, { "display": "License", - "order": 0, - "name": "licenseId", - "hide": true, - "icons": true + "name": "licenseId" }, { "display": "Access", - "order": 2, "name": "licenseName", "help": "The access conditions associated with a resource." }, { "display": "Record Type", - "order": 3, "name": "type", "help": "The type of object a record describes, i.e. a collection, object or file. For individual files, this field also gives information about the nature of the material, i.e. primary material, transcription, annotation etc." }, { "display": "Language", - "order": 4, "name": "languages", "help": "The language(s) of the content in this resource." }, { "display": "Communication Mode", - "order": 5, "name": "communicationMode", - "help": "The mode(s) (spoken, written, signed, etc.) used in the interaction represented by this resource.", - "icons": true + "help": "The mode(s) (spoken, written, signed, etc.) used in the interaction represented by this resource." }, { "display": "Linguistic Genre", - "order": 6, "name": "linguisticGenre", "help": "The linguistic classification of the genre of this resource." }, { "display": "File Format", - "order": 7, "name": "encodingFormat", - "help": "The media type of the resource.", - "icons": true + "help": "The media type of the resource." }, { "display": "Annotation Type", - "order": 8, "name": "annotationType" } ], "searchFields": { - "name": { - "label": "Name", - "checked": true - }, - "description": { - "label": "Description", - "checked": true - }, - "inLanguage": { - "label": "Language", - "checked": true - }, - "text": { - "label": "Text", - "checked": true - } - }, - "licenses": [ - { - "license": "#restricted", - "group": "restricted-users", - "access": "loginPlus", - "enrollment": { - "url": "http://localhost", - "label": "", - "class": "" - } - } - ], - "conformsTo": { - "collection": "https://w3id.org/ldac/profile#Collection", - "object": "https://w3id.org/ldac/profile#Object", - "notebook": "https://w3id.org/ldac/profile#Notebook" + "name": "Name", + "description": "Description", + "inLanguage": "Language", + "text": "Text" }, "mapConfig": { "boundingBox": { diff --git a/example/data/README.md b/example/data/README.md new file mode 100644 index 0000000..bd692f6 --- /dev/null +++ b/example/data/README.md @@ -0,0 +1,296 @@ +# Sample Language Documentation Data + +This directory contains realistic sample data for testing and development of the AROCAPI system. The data represents two language documentation collections with authentic RO-Crate metadata, audio files, and ELAN annotation files. + +## Contents + +The sample data consists of **2 collections** with **3 items each** (6 items total): + +### Collection 1: Nyeleni Language Documentation Archive + +A West African endangered language documentation project. + +- **Collection ID**: `http://example.com/collection/nyeleni-001` +- **Language**: Nyeleni (nya) +- **Location**: Mopti Region, Mali +- **License**: CC BY-NC-SA 4.0 (content), CC BY 4.0 (metadata) +- **Depositor**: Dr. Aminata Koné, University of Bamako + +**Items:** + +1. **Traditional Greetings** (`item-01-greeting`) + - Speaker: Elder Mamadou Diarra + - Duration: 3 seconds + - Includes formal greetings and health inquiries + +2. **The Tale of the Clever Hare** (`item-02-story`) + - Narrator: Fatoumata Traoré + - Duration: 4.5 seconds + - Traditional folktale with moral lessons + +3. **Kinship Terms Elicitation** (`item-03-vocabulary`) + - Consultant: Sekou Keita + - Duration: 4 seconds + - Structured vocabulary session on family terms + +### Collection 2: Coastal Dialects Archive + +A Pacific coastal language documentation project. + +- **Collection ID**: `http://example.com/collection/coastal-001` +- **Language**: Pacific Coastal Language (pac) +- **Location**: Northern Pacific Coast +- **License**: CC BY-NC-ND 4.0 (content), CC BY 4.0 (metadata) +- **Depositor**: Dr. Sarah Chen, Pacific Coast University + +**Items:** + +1. **The Great Storm - Personal Narrative** (`item-01-narrative`) + - Speaker: William Johnson + - Duration: 5 seconds + - Historical account from 1962 + +2. **Traditional Fishing Songs** (`item-02-songs`) + - Singer: Mary Thompson + - Duration: 3.5 seconds + - Songs performed before launching boats + +3. **Preparing the Net - Conversation** (`item-03-conversation`) + - Speakers: James Wilson & Robert Anderson + - Duration: 4.2 seconds + - Natural conversation about traditional net-making + +## File Structure + +``` +example/data/ +├── README.md # This file +├── seed.ts # Database seeding script +├── generate-wav.ts # Script to regenerate WAV files +│ +├── collection-01-nyeleni/ +│ ├── ro-crate-metadata.json # Collection-level metadata +│ ├── item-01-greeting/ +│ │ ├── ro-crate-metadata.json # Item metadata +│ │ ├── greeting.wav # Audio recording (3s, 440Hz) +│ │ └── greeting.eaf # ELAN annotations +│ ├── item-02-story/ +│ │ ├── ro-crate-metadata.json +│ │ ├── story.wav # Audio recording (4.5s, 523Hz) +│ │ └── story.eaf +│ └── item-03-vocabulary/ +│ ├── ro-crate-metadata.json +│ ├── vocabulary.wav # Audio recording (4s, 587Hz) +│ └── vocabulary.eaf +│ +└── collection-02-coastal/ + ├── ro-crate-metadata.json + ├── item-01-narrative/ + │ ├── ro-crate-metadata.json + │ ├── narrative.wav # Audio recording (5s, 659Hz) + │ └── narrative.eaf + ├── item-02-songs/ + │ ├── ro-crate-metadata.json + │ ├── songs.wav # Audio recording (3.5s, 698Hz) + │ └── songs.eaf + └── item-03-conversation/ + ├── ro-crate-metadata.json + ├── conversation.wav # Audio recording (4.2s, 784Hz) + └── conversation.eaf +``` + +## RO-Crate Metadata + +Each collection and item includes comprehensive RO-Crate metadata following the [RO-Crate 1.1 specification](https://www.researchobject.org/ro-crate/): + +### Collection-Level Metadata + +- Collection name and description +- Geographic location with coordinates +- Primary language with ISO 639-3 codes +- Depositor information +- Institutional affiliation +- License information (separate for content and metadata) +- Links to all items in the collection + +### Item-Level Metadata + +- Item name and description +- Recording date and genre +- Speaker/contributor information +- Language information +- File parts (audio + annotation) +- Relationship to parent collection +- License information + +## Audio Files (WAV) + +All WAV files are real audio files with valid WAV format headers: + +- **Format**: 16-bit PCM, mono, 44.1kHz +- **Content**: Sine wave tones at different frequencies +- **Durations**: 3-5 seconds each +- **Size**: ~50-150KB each +- **Features**: Fade in/out to prevent clicks + +Each item has a unique tone frequency for easy identification during testing. + +## ELAN Annotation Files (.eaf) + +ELAN files follow the [ELAN Annotation Format (EAF) 3.0](https://www.mpi.nl/tools/elan/) specification: + +### Tier Structure + +1. **Transcription tier**: Time-aligned transcription in the source language +2. **Translation tier**: Free English translation (symbolic association) +3. **Additional tiers**: Notes, glosses, or discourse structure (varies by item) + +### Features + +- Time-aligned annotations with millisecond precision +- Multiple participants (for conversation) +- Linguistic type definitions +- Language metadata (ISO 639-3) +- Realistic linguistic content with proper orthography + +## Usage + +### Loading Sample Data + +To seed your local database with this sample data: + +```bash +# From the example directory +pnpm run seed +``` + +This will: +1. Clear existing entities from the database +2. Load both collections and all items +3. Create OpenSearch index with proper mappings +4. Index all entities for search + +### Regenerating WAV Files + +If you need to regenerate the audio files: + +```bash +# From the example directory +pnpm run generate-wav +``` + +### Testing with Sample Data + +The sample data can be used to test: + +- **Entity retrieval**: `GET /entity/{rocrateId}` +- **Collection browsing**: `GET /entities?memberOf={collectionId}` +- **Search functionality**: `POST /search` with various queries +- **Access control**: Different licenses between collections +- **Language filtering**: ISO 639-3 language codes +- **Geographic search**: GeoJSON coordinates in metadata + +### Example API Queries + +```bash +# Get a collection +GET /entity/http%3A%2F%2Fexample.com%2Fcollection%2Fnyeleni-001 + +# Get all items in a collection +GET /entities?memberOf=http://example.com/collection/nyeleni-001 + +# Search for greetings +POST /search +{ + "query": { + "match": { + "description": "greeting" + } + } +} +``` + +## Data Characteristics + +### Realistic Features + +- **Authentic metadata structure**: Based on real language documentation projects +- **Proper licensing**: Different CC licenses between collections +- **Geographic diversity**: West Africa and Pacific Coast locations +- **Genre variety**: Greetings, stories, elicitation, narratives, songs, conversation +- **Multi-speaker content**: Includes items with multiple contributors +- **Cultural context**: Annotations include cultural and linguistic notes + +### Simplifications + +- **Audio content**: Simple sine wave tones instead of actual speech +- **Language codes**: Fictional language codes (nya, pac) for demonstration +- **Entity IDs**: Simple HTTP URIs instead of production identifiers +- **File sizes**: Smaller than production files for faster testing + +## License + +This sample data is provided for testing and development purposes only. + +- **Metadata**: CC BY 4.0 +- **Collection 1 content**: CC BY-NC-SA 4.0 (fictional license for testing) +- **Collection 2 content**: CC BY-NC-ND 4.0 (fictional license for testing) + +All speaker names, locations, and linguistic content are fictional and created solely for demonstration purposes. + +## Maintenance + +### Updating Metadata + +To modify the RO-Crate metadata: + +1. Edit the appropriate `ro-crate-metadata.json` file +2. Re-run the seed script to update the database +3. Ensure the JSON-LD is valid RO-Crate 1.1 format + +### Adding New Items + +To add new items to a collection: + +1. Create a new directory under the collection +2. Add `ro-crate-metadata.json`, WAV, and EAF files +3. Update the collection's `ro-crate-metadata.json` to include the new item +4. Update `seed.ts` to process the new item +5. Run `pnpm run seed` + +### Regenerating Files + +- **WAV files**: Modify `generate-wav.ts` and run `pnpm run generate-wav` +- **ELAN files**: Manually edit the XML or use ELAN software + +## Technical Details + +### Database Schema + +Entities are stored in the `Entity` table with: + +- `rocrateId`: Unique identifier (VARCHAR 2048) +- `name`: Entity name (VARCHAR 1024) +- `description`: Text description +- `entityType`: Schema.org or PCDM type +- `memberOf`: Parent collection reference +- `rootCollection`: Top-level collection reference +- `metadataLicenseId`: License for metadata +- `contentLicenseId`: License for content +- `rocrate`: Full RO-Crate metadata (JSON) + +### OpenSearch Mappings + +The search index includes: + +- Keyword fields: `rocrateId`, `entityType`, `memberOf`, `inLanguage` +- Text fields: `name`, `description` +- Structured fields: `location` (geo_point) + +## References + +- [RO-Crate Specification](https://www.researchobject.org/ro-crate/) +- [ELAN Annotation Format](https://www.mpi.nl/tools/elan/) +- [Portland Common Data Model (PCDM)](https://pcdm.org/) +- [Schema.org](https://schema.org/) +- [Creative Commons Licenses](https://creativecommons.org/licenses/) diff --git a/example/data/collection-01-nyeleni/item-01-greeting/greeting.eaf b/example/data/collection-01-nyeleni/item-01-greeting/greeting.eaf new file mode 100644 index 0000000..1fad7d6 --- /dev/null +++ b/example/data/collection-01-nyeleni/item-01-greeting/greeting.eaf @@ -0,0 +1,71 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:c5d3f8a1-2b4e-4c9d-8f1a-3e5b7c9d2f4e + 8 +
+ + + + + + + + + + + + I ni sɔgɔma + + + + + I ka kɛnɛ wa? + + + + + Ala ka sa + + + + + + + Good morning + + + + + Are you well? + + + + + Thank God + + + + + + + Standard morning greeting + + + + + Health inquiry - important social protocol + + + + + + + + + + + + +
diff --git a/example/data/collection-01-nyeleni/item-01-greeting/greeting.wav b/example/data/collection-01-nyeleni/item-01-greeting/greeting.wav new file mode 100644 index 0000000..b3a63d9 Binary files /dev/null and b/example/data/collection-01-nyeleni/item-01-greeting/greeting.wav differ diff --git a/example/data/collection-01-nyeleni/item-01-greeting/ro-crate-metadata.json b/example/data/collection-01-nyeleni/item-01-greeting/ro-crate-metadata.json new file mode 100644 index 0000000..19c2ee9 --- /dev/null +++ b/example/data/collection-01-nyeleni/item-01-greeting/ro-crate-metadata.json @@ -0,0 +1,98 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "Traditional Greetings - Elder Mamadou", + "description": "Recording of traditional Nyeleni greetings performed by Elder Mamadou Diarra. The recording demonstrates formal greetings used in community gatherings and includes responses from multiple participants.", + "identifier": "http://example.com/item/nyeleni-greeting-001", + "dateCreated": "2024-01-10", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "memberOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/nya" + }, + "recordingDate": "2024-01-10", + "contributor": [ + { + "@id": "#speaker-mamadou" + }, + { + "@id": "#researcher-aminata" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/nyeleni-greeting-001/file/greeting.wav" + }, + { + "@id": "http://example.com/item/nyeleni-greeting-001/file/greeting.eaf" + } + ] + }, + { + "@id": "http://example.com/item/nyeleni-greeting-001/file/greeting.wav", + "@type": ["File", "MediaObject"], + "filename": "greeting.wav", + "encodingFormat": "audio/wav", + "contentSize": "88200", + "duration": "PT3S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "http://example.com/item/nyeleni-greeting-001/file/greeting.eaf", + "@type": ["File", "MediaObject"], + "filename": "greeting.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "#speaker-mamadou", + "@type": "Person", + "name": "Mamadou Diarra", + "description": "Village elder and fluent Nyeleni speaker, born 1945", + "gender": "male", + "birthDate": "1945" + }, + { + "@id": "#researcher-aminata", + "@type": "Person", + "name": "Dr. Aminata Koné", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/nya", + "@type": "Language", + "name": "Nyeleni", + "identifier": "nya" + } + ] +} diff --git a/example/data/collection-01-nyeleni/item-02-story/ro-crate-metadata.json b/example/data/collection-01-nyeleni/item-02-story/ro-crate-metadata.json new file mode 100644 index 0000000..c6a888b --- /dev/null +++ b/example/data/collection-01-nyeleni/item-02-story/ro-crate-metadata.json @@ -0,0 +1,99 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "The Tale of the Clever Hare", + "description": "Traditional folktale narrated by Fatoumata Traoré. This story is part of the oral tradition passed down through generations and teaches lessons about wisdom and community cooperation.", + "identifier": "http://example.com/item/nyeleni-story-001", + "dateCreated": "2024-01-12", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "memberOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/nya" + }, + "recordingDate": "2024-01-12", + "genre": "Traditional folktale", + "contributor": [ + { + "@id": "#speaker-fatoumata" + }, + { + "@id": "#researcher-aminata" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/nyeleni-story-001/file/story.wav" + }, + { + "@id": "http://example.com/item/nyeleni-story-001/file/story.eaf" + } + ] + }, + { + "@id": "http://example.com/item/nyeleni-story-001/file/story.wav", + "@type": ["File", "MediaObject"], + "filename": "story.wav", + "encodingFormat": "audio/wav", + "contentSize": "132300", + "duration": "PT4.5S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "http://example.com/item/nyeleni-story-001/file/story.eaf", + "@type": ["File", "MediaObject"], + "filename": "story.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "#speaker-fatoumata", + "@type": "Person", + "name": "Fatoumata Traoré", + "description": "Storyteller and community historian, born 1958", + "gender": "female", + "birthDate": "1958" + }, + { + "@id": "#researcher-aminata", + "@type": "Person", + "name": "Dr. Aminata Koné", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/nya", + "@type": "Language", + "name": "Nyeleni", + "identifier": "nya" + } + ] +} diff --git a/example/data/collection-01-nyeleni/item-02-story/story.eaf b/example/data/collection-01-nyeleni/item-02-story/story.eaf new file mode 100644 index 0000000..1b2fc22 --- /dev/null +++ b/example/data/collection-01-nyeleni/item-02-story/story.eaf @@ -0,0 +1,76 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:a7e2d9f4-6c8b-4d1e-9a2f-5b7c8d4e3f2a + 12 +
+ + + + + + + + + + + + Sɔgɔ dɔ tun bɛ yen + + + + + A tun ka hakili sɔrɔ kosɛbɛ + + + + + A ye denbaya kɛlɛ dɔɔni dɔɔni + + + + + + + There was once a hare + + + + + He was very clever + + + + + He helped the children one by one + + + + + + + Opening formula - traditional story beginning + + + + + Character introduction + + + + + Main narrative action - moral theme introduced + + + + + + + + + + + + +
diff --git a/example/data/collection-01-nyeleni/item-02-story/story.wav b/example/data/collection-01-nyeleni/item-02-story/story.wav new file mode 100644 index 0000000..45de3be Binary files /dev/null and b/example/data/collection-01-nyeleni/item-02-story/story.wav differ diff --git a/example/data/collection-01-nyeleni/item-03-vocabulary/ro-crate-metadata.json b/example/data/collection-01-nyeleni/item-03-vocabulary/ro-crate-metadata.json new file mode 100644 index 0000000..1846228 --- /dev/null +++ b/example/data/collection-01-nyeleni/item-03-vocabulary/ro-crate-metadata.json @@ -0,0 +1,100 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "Kinship Terms Elicitation", + "description": "Structured vocabulary elicitation session focusing on kinship terminology in Nyeleni. Speaker Sekou Keita provides terms for family relationships and explains cultural context.", + "identifier": "http://example.com/item/nyeleni-vocabulary-001", + "dateCreated": "2024-01-14", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "memberOf": { + "@id": "http://example.com/collection/nyeleni-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/nya" + }, + "recordingDate": "2024-01-14", + "genre": "Elicitation session", + "keywords": ["vocabulary", "kinship", "elicitation"], + "contributor": [ + { + "@id": "#speaker-sekou" + }, + { + "@id": "#researcher-aminata" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/nyeleni-vocabulary-001/file/vocabulary.wav" + }, + { + "@id": "http://example.com/item/nyeleni-vocabulary-001/file/vocabulary.eaf" + } + ] + }, + { + "@id": "http://example.com/item/nyeleni-vocabulary-001/file/vocabulary.wav", + "@type": ["File", "MediaObject"], + "filename": "vocabulary.wav", + "encodingFormat": "audio/wav", + "contentSize": "117600", + "duration": "PT4S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "http://example.com/item/nyeleni-vocabulary-001/file/vocabulary.eaf", + "@type": ["File", "MediaObject"], + "filename": "vocabulary.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + } + }, + { + "@id": "#speaker-sekou", + "@type": "Person", + "name": "Sekou Keita", + "description": "Language consultant and teacher, born 1972", + "gender": "male", + "birthDate": "1972" + }, + { + "@id": "#researcher-aminata", + "@type": "Person", + "name": "Dr. Aminata Koné", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/nya", + "@type": "Language", + "name": "Nyeleni", + "identifier": "nya" + } + ] +} diff --git a/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.eaf b/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.eaf new file mode 100644 index 0000000..bd98090 --- /dev/null +++ b/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.eaf @@ -0,0 +1,106 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:f9d4e2b7-8c1a-4f6e-9d3b-2a5c7e8f4d1b + 15 +
+ + + + + + + + + + + + + + fa + + + + + ba + + + + + denmuso + + + + + fadenmuso + + + + + + + father + + + + + mother + + + + + daughter + + + + + niece (father's daughter) + + + + + + + father + + + + + mother + + + + + child-female + + + + + father-child-female + + + + + + + Compound: den (child) + muso (female) + + + + + Three-part compound showing productive morphology + + + + + + + + + + + + + +
diff --git a/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.wav b/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.wav new file mode 100644 index 0000000..ec72533 Binary files /dev/null and b/example/data/collection-01-nyeleni/item-03-vocabulary/vocabulary.wav differ diff --git a/example/data/collection-01-nyeleni/ro-crate-metadata.json b/example/data/collection-01-nyeleni/ro-crate-metadata.json new file mode 100644 index 0000000..46447e2 --- /dev/null +++ b/example/data/collection-01-nyeleni/ro-crate-metadata.json @@ -0,0 +1,83 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Collection"], + "name": "Nyeleni Language Documentation Archive", + "description": "A collection of audio recordings documenting the Nyeleni language, an endangered language spoken in West Africa. This archive contains traditional greetings, stories, and vocabulary elicitation sessions recorded with fluent speakers.", + "identifier": "http://example.com/collection/nyeleni-001", + "datePublished": "2024-01-15", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/nya" + }, + "spatialCoverage": { + "@id": "#place-mali" + }, + "creator": { + "@id": "#person-depositor" + }, + "hasPart": [ + { + "@id": "http://example.com/item/nyeleni-greeting-001" + }, + { + "@id": "http://example.com/item/nyeleni-story-001" + }, + { + "@id": "http://example.com/item/nyeleni-vocabulary-001" + } + ] + }, + { + "@id": "http://iso639-3.sil.org/code/nya", + "@type": "Language", + "name": "Nyeleni", + "alternateName": "nya", + "identifier": "nya" + }, + { + "@id": "#place-mali", + "@type": "Place", + "name": "Mopti Region, Mali", + "geo": { + "@type": "GeoCoordinates", + "latitude": -3.9969, + "longitude": 14.4919 + } + }, + { + "@id": "#person-depositor", + "@type": "Person", + "name": "Dr. Aminata Koné", + "email": "a.kone@example.org", + "affiliation": { + "@id": "#org-university" + } + }, + { + "@id": "#org-university", + "@type": "Organization", + "name": "University of Bamako, Department of Linguistics" + } + ] +} diff --git a/example/data/collection-02-coastal/item-01-narrative/narrative.eaf b/example/data/collection-02-coastal/item-01-narrative/narrative.eaf new file mode 100644 index 0000000..e8b5a39 --- /dev/null +++ b/example/data/collection-02-coastal/item-01-narrative/narrative.eaf @@ -0,0 +1,76 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:b8e3f9c2-7d4a-4e1b-8f2c-6a9d5e7b3c8f + 10 +
+ + + + + + + + + + + + xʷəl̕əm̕ qʷəl q̓ʷəy̓qi qax̌ + + + + + ʔəy̕ k̓ʷəl qʷəy̓qəy̓ + + + + + nəxʷ č̓ič̓əm̕aθ qʷəl tə syəθəb + + + + + + + In 1962 there was a very big storm + + + + + The waves were enormous + + + + + All our people helped each other + + + + + + + Temporal orientation - setting the scene + + + + + Intensification - emphasizing severity + + + + + Community response - cultural value of cooperation + + + + + + + + + + + + +
diff --git a/example/data/collection-02-coastal/item-01-narrative/narrative.wav b/example/data/collection-02-coastal/item-01-narrative/narrative.wav new file mode 100644 index 0000000..34978ae Binary files /dev/null and b/example/data/collection-02-coastal/item-01-narrative/narrative.wav differ diff --git a/example/data/collection-02-coastal/item-01-narrative/ro-crate-metadata.json b/example/data/collection-02-coastal/item-01-narrative/ro-crate-metadata.json new file mode 100644 index 0000000..32da0c6 --- /dev/null +++ b/example/data/collection-02-coastal/item-01-narrative/ro-crate-metadata.json @@ -0,0 +1,99 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "The Great Storm - Personal Narrative", + "description": "Elder William's firsthand account of the historic storm of 1962 and its impact on the coastal community. This narrative provides valuable insight into traditional weather knowledge and community resilience.", + "identifier": "http://example.com/item/coastal-narrative-001", + "dateCreated": "2024-01-20", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "memberOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/pac" + }, + "recordingDate": "2024-01-20", + "genre": "Personal narrative", + "contributor": [ + { + "@id": "#speaker-william" + }, + { + "@id": "#researcher-sarah" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/coastal-narrative-001/file/narrative.wav" + }, + { + "@id": "http://example.com/item/coastal-narrative-001/file/narrative.eaf" + } + ] + }, + { + "@id": "http://example.com/item/coastal-narrative-001/file/narrative.wav", + "@type": ["File", "MediaObject"], + "filename": "narrative.wav", + "encodingFormat": "audio/wav", + "contentSize": "147000", + "duration": "PT5S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "http://example.com/item/coastal-narrative-001/file/narrative.eaf", + "@type": ["File", "MediaObject"], + "filename": "narrative.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "#speaker-william", + "@type": "Person", + "name": "William Johnson", + "description": "Community elder and tradition keeper, born 1938", + "gender": "male", + "birthDate": "1938" + }, + { + "@id": "#researcher-sarah", + "@type": "Person", + "name": "Dr. Sarah Chen", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/pac", + "@type": "Language", + "name": "Pacific Coastal Language", + "identifier": "pac" + } + ] +} diff --git a/example/data/collection-02-coastal/item-02-songs/ro-crate-metadata.json b/example/data/collection-02-coastal/item-02-songs/ro-crate-metadata.json new file mode 100644 index 0000000..97c73a0 --- /dev/null +++ b/example/data/collection-02-coastal/item-02-songs/ro-crate-metadata.json @@ -0,0 +1,100 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "Traditional Fishing Songs", + "description": "Collection of three traditional songs sung during fishing preparations and boat launches. Performed by Mary Thompson, these songs have been passed down through her family for five generations.", + "identifier": "http://example.com/item/coastal-songs-001", + "dateCreated": "2024-01-22", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "memberOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/pac" + }, + "recordingDate": "2024-01-22", + "genre": "Traditional song", + "keywords": ["fishing", "songs", "traditional music"], + "contributor": [ + { + "@id": "#speaker-mary" + }, + { + "@id": "#researcher-sarah" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/coastal-songs-001/file/songs.wav" + }, + { + "@id": "http://example.com/item/coastal-songs-001/file/songs.eaf" + } + ] + }, + { + "@id": "http://example.com/item/coastal-songs-001/file/songs.wav", + "@type": ["File", "MediaObject"], + "filename": "songs.wav", + "encodingFormat": "audio/wav", + "contentSize": "102900", + "duration": "PT3.5S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "http://example.com/item/coastal-songs-001/file/songs.eaf", + "@type": ["File", "MediaObject"], + "filename": "songs.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "#speaker-mary", + "@type": "Person", + "name": "Mary Thompson", + "description": "Traditional singer and cultural educator, born 1952", + "gender": "female", + "birthDate": "1952" + }, + { + "@id": "#researcher-sarah", + "@type": "Person", + "name": "Dr. Sarah Chen", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/pac", + "@type": "Language", + "name": "Pacific Coastal Language", + "identifier": "pac" + } + ] +} diff --git a/example/data/collection-02-coastal/item-02-songs/songs.eaf b/example/data/collection-02-coastal/item-02-songs/songs.eaf new file mode 100644 index 0000000..103d9a9 --- /dev/null +++ b/example/data/collection-02-coastal/item-02-songs/songs.eaf @@ -0,0 +1,76 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:c9f4a8d3-6e2b-4f7c-9d1e-8b5a7f3e2c9d + 11 +
+ + + + + + + + + + + + huy̕ huy̕ nəs k̓ʷaθət + + + + + č̓ɛy̕əm sc̓əwan̕ + + + + + ʔəy̕ qən ʔə sc̓əwan̕ + + + + + + + Thank you, thank you, we are going fishing + + + + + Good salmon + + + + + We take the salmon + + + + + + + Opening gratitude formula - sung before launching boat + + + + + Rising melody - prayer for good catch + + + + + Acknowledgment of reciprocal relationship with salmon + + + + + + + + + + + + +
diff --git a/example/data/collection-02-coastal/item-02-songs/songs.wav b/example/data/collection-02-coastal/item-02-songs/songs.wav new file mode 100644 index 0000000..ce84f4b Binary files /dev/null and b/example/data/collection-02-coastal/item-02-songs/songs.wav differ diff --git a/example/data/collection-02-coastal/item-03-conversation/conversation.eaf b/example/data/collection-02-coastal/item-03-conversation/conversation.eaf new file mode 100644 index 0000000..277d26a --- /dev/null +++ b/example/data/collection-02-coastal/item-03-conversation/conversation.eaf @@ -0,0 +1,80 @@ + + +
+ + urn:nl-mpi-tools-elan-eaf:d8e5f3a9-7c4b-4e2d-9f1a-6b8d3e5c7f9a + 14 +
+ + + + + + + + + + + + č̓ɛy̕əm ʔə k̓ʷəθət̕ən + + + + + nəxʷ č̓ič̓əm̕aθ nə scəc̓əwən̕ + + + + + + + hən̕q̓ əw̕ən nə sk̓ʷay̕ + + + + + + + This net is good + + + + + All the knots are strong + + + + + + + Yes, I made it in the old way + + + + + + + k̓ʷəθət̕ən = fishing net (traditional terminology) + + + + + sk̓ʷay̕ = old way, traditional method + + + + + scəc̓əwən̕ = knots (diminutive/distributive form) + + + + + + + + + + + + +
diff --git a/example/data/collection-02-coastal/item-03-conversation/conversation.wav b/example/data/collection-02-coastal/item-03-conversation/conversation.wav new file mode 100644 index 0000000..1ec4f23 Binary files /dev/null and b/example/data/collection-02-coastal/item-03-conversation/conversation.wav differ diff --git a/example/data/collection-02-coastal/item-03-conversation/ro-crate-metadata.json b/example/data/collection-02-coastal/item-03-conversation/ro-crate-metadata.json new file mode 100644 index 0000000..e9f78cc --- /dev/null +++ b/example/data/collection-02-coastal/item-03-conversation/ro-crate-metadata.json @@ -0,0 +1,111 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Object"], + "name": "Preparing the Net - Conversation", + "description": "Natural conversation between two fishermen discussing traditional net preparation techniques. This recording captures authentic language use in a work context and includes technical vocabulary related to fishing practices.", + "identifier": "http://example.com/item/coastal-conversation-001", + "dateCreated": "2024-01-25", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "isPartOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "memberOf": { + "@id": "http://example.com/collection/coastal-001" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/pac" + }, + "recordingDate": "2024-01-25", + "genre": "Conversation", + "keywords": ["conversation", "fishing", "technical vocabulary"], + "contributor": [ + { + "@id": "#speaker-james" + }, + { + "@id": "#speaker-robert" + }, + { + "@id": "#researcher-sarah" + } + ], + "hasPart": [ + { + "@id": "http://example.com/item/coastal-conversation-001/file/conversation.wav" + }, + { + "@id": "http://example.com/item/coastal-conversation-001/file/conversation.eaf" + } + ] + }, + { + "@id": "http://example.com/item/coastal-conversation-001/file/conversation.wav", + "@type": ["File", "MediaObject"], + "filename": "conversation.wav", + "encodingFormat": "audio/wav", + "contentSize": "123200", + "duration": "PT4.2S", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "http://example.com/item/coastal-conversation-001/file/conversation.eaf", + "@type": ["File", "MediaObject"], + "filename": "conversation.eaf", + "encodingFormat": "text/x-eaf+xml", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + } + }, + { + "@id": "#speaker-james", + "@type": "Person", + "name": "James Wilson", + "description": "Fisherman and language consultant, born 1965", + "gender": "male", + "birthDate": "1965" + }, + { + "@id": "#speaker-robert", + "@type": "Person", + "name": "Robert Anderson", + "description": "Fisherman and boat builder, born 1968", + "gender": "male", + "birthDate": "1968" + }, + { + "@id": "#researcher-sarah", + "@type": "Person", + "name": "Dr. Sarah Chen", + "description": "Linguist and researcher" + }, + { + "@id": "http://iso639-3.sil.org/code/pac", + "@type": "Language", + "name": "Pacific Coastal Language", + "identifier": "pac" + } + ] +} diff --git a/example/data/collection-02-coastal/ro-crate-metadata.json b/example/data/collection-02-coastal/ro-crate-metadata.json new file mode 100644 index 0000000..56b1599 --- /dev/null +++ b/example/data/collection-02-coastal/ro-crate-metadata.json @@ -0,0 +1,83 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + }, + "about": { + "@id": "./" + } + }, + { + "@id": "./", + "@type": ["Dataset", "http://pcdm.org/models#Collection"], + "name": "Coastal Dialects Archive", + "description": "Documentation of coastal dialect variations along the Pacific rim. This collection preserves oral traditions, songs, and conversational speech from small fishing communities where traditional language use is declining.", + "identifier": "http://example.com/collection/coastal-001", + "datePublished": "2024-02-01", + "license": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "contentLicense": { + "@id": "https://creativecommons.org/licenses/by-nc-nd/4.0/" + }, + "metadataLicense": { + "@id": "https://creativecommons.org/licenses/by/4.0/" + }, + "inLanguage": { + "@id": "http://iso639-3.sil.org/code/pac" + }, + "spatialCoverage": { + "@id": "#place-coastal" + }, + "creator": { + "@id": "#person-depositor-coastal" + }, + "hasPart": [ + { + "@id": "http://example.com/item/coastal-narrative-001" + }, + { + "@id": "http://example.com/item/coastal-songs-001" + }, + { + "@id": "http://example.com/item/coastal-conversation-001" + } + ] + }, + { + "@id": "http://iso639-3.sil.org/code/pac", + "@type": "Language", + "name": "Pacific Coastal Language", + "alternateName": "pac", + "identifier": "pac" + }, + { + "@id": "#place-coastal", + "@type": "Place", + "name": "Northern Pacific Coast", + "geo": { + "@type": "GeoCoordinates", + "latitude": 51.4256, + "longitude": -127.7158 + } + }, + { + "@id": "#person-depositor-coastal", + "@type": "Person", + "name": "Dr. Sarah Chen", + "email": "s.chen@example.org", + "affiliation": { + "@id": "#org-coastal-university" + } + }, + { + "@id": "#org-coastal-university", + "@type": "Organization", + "name": "Pacific Coast University, Indigenous Languages Program" + } + ] +} diff --git a/example/data/seed.ts b/example/data/seed.ts new file mode 100644 index 0000000..072a9c9 --- /dev/null +++ b/example/data/seed.ts @@ -0,0 +1,328 @@ +import { existsSync, readFileSync, statSync } from 'node:fs'; +import { join } from 'node:path'; + +import { Client } from '@opensearch-project/opensearch'; + +import { PrismaClient } from '../../dist/generated/prisma/client.js'; + +const prisma = new PrismaClient(); +const opensearch = new Client({ + node: process.env.OPENSEARCH_URL || 'http://localhost:9200', +}); + +type RoCrateMetadata = { + '@context': string; + '@graph': Array<{ + '@id': string; + '@type': string | string[]; + name?: string; + description?: string; + identifier?: string; + license?: { '@id': string }; + contentLicense?: { '@id': string }; + metadataLicense?: { '@id': string }; + memberOf?: { '@id': string }; + isPartOf?: { '@id': string }; + hasPart?: Array<{ '@id': string }>; + inLanguage?: { '@id': string }; + [key: string]: unknown; + }>; +}; + +const loadRoCrate = (path: string) => { + const content = readFileSync(path, 'utf-8'); + + return JSON.parse(content) as RoCrateMetadata; +}; + +const extractEntityType = (typeField: string | string[]): string => { + const types = Array.isArray(typeField) ? typeField : [typeField]; + + const pcdmType = types.find((t) => t.includes('pcdm.org')); + const schemaType = types.find((t) => t.includes('schema.org')); + + return pcdmType || schemaType || types[0]; +}; + +const processCollection = async ( + collectionDir: string, + collectionRocrateId: string, + items: Array<{ dir: string; id: string }>, +) => { + console.log(`\nProcessing collection: ${collectionDir}`); + + const collectionPath = join(import.meta.dirname, collectionDir, 'ro-crate-metadata.json'); + const collectionCrate = loadRoCrate(collectionPath); + const collectionRoot = collectionCrate['@graph'].find((node) => node['@id'] === './'); + + if (!collectionRoot) { + throw new Error(`No root entity found in ${collectionPath}`); + } + + const collectionEntity = { + rocrateId: collectionRocrateId, + name: collectionRoot.name || 'Untitled Collection', + description: collectionRoot.description || '', + entityType: extractEntityType(collectionRoot['@type']), + memberOf: null, + rootCollection: null, + metadataLicenseId: + collectionRoot.metadataLicense?.['@id'] || + collectionRoot.license?.['@id'] || + 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: + collectionRoot.contentLicense?.['@id'] || + collectionRoot.license?.['@id'] || + 'https://creativecommons.org/licenses/by/4.0/', + rocrate: collectionCrate, + meta: { + storagePath: collectionPath, + }, + }; + + const collection = await prisma.entity.create({ + data: collectionEntity, + }); + console.log(` ✓ Created collection: ${collection.name}`); + + for (const item of items) { + await processItem(collectionDir, item.dir, item.id, collectionRocrateId); + } +}; + +const processItem = async ( + collectionDir: string, + itemDir: string, + itemRocrateId: string, + collectionRocrateId: string, +) => { + const itemPath = join(import.meta.dirname, collectionDir, itemDir, 'ro-crate-metadata.json'); + const itemCrate = loadRoCrate(itemPath); + const itemRoot = itemCrate['@graph'].find((node) => node['@id'] === './'); + + if (!itemRoot) { + throw new Error(`No root entity found in ${itemPath}`); + } + + const itemEntity = { + rocrateId: itemRocrateId, + name: itemRoot.name || 'Untitled Item', + description: itemRoot.description || '', + entityType: extractEntityType(itemRoot['@type']), + memberOf: collectionRocrateId, + rootCollection: collectionRocrateId, + metadataLicenseId: + itemRoot.metadataLicense?.['@id'] || itemRoot.license?.['@id'] || 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: + itemRoot.contentLicense?.['@id'] || itemRoot.license?.['@id'] || 'https://creativecommons.org/licenses/by/4.0/', + rocrate: itemCrate, + meta: { + storagePath: itemPath, + }, + }; + + const item = await prisma.entity.create({ + data: itemEntity, + }); + console.log(` ✓ Created item: ${item.name}`); + + // Process File entities from hasPart + if (itemRoot.hasPart && Array.isArray(itemRoot.hasPart)) { + for (const partRef of itemRoot.hasPart) { + const fileNode = itemCrate['@graph'].find((node) => node['@id'] === partRef['@id']); + + if (!fileNode) continue; + + const fileType = extractEntityType(fileNode['@type']); + + // Only create entities for PCDM File types + if (fileType === 'http://schema.org/MediaObject' || fileType === 'File') { + const fileRocrateId = partRef['@id']; + const filename = fileNode.filename as string; + const filePath = join(import.meta.dirname, collectionDir, itemDir, filename); + + // Get file stats from filesystem + if (!existsSync(filePath)) { + console.warn(` ! File not found: ${filePath}`); + throw new Error(`File not found: ${filePath}`); + } + + const stats = statSync(filePath); + + const fileEntity = { + rocrateId: fileRocrateId, + name: fileNode.name || filename, + description: fileNode.description || '', + entityType: 'http://schema.org/MediaObject', + fileId: fileRocrateId, + memberOf: itemRocrateId, + rootCollection: collectionRocrateId, + metadataLicenseId: + fileNode.license?.['@id'] || + itemRoot.contentLicense?.['@id'] || + itemRoot.license?.['@id'] || + 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: + fileNode.license?.['@id'] || + itemRoot.contentLicense?.['@id'] || + itemRoot.license?.['@id'] || + 'https://creativecommons.org/licenses/by/4.0/', + rocrate: fileNode, + meta: { + remapRootTo: fileRocrateId, + storagePath: itemPath, + }, + }; + + await prisma.entity.create({ + data: fileEntity, + }); + + const fileRecord = { + fileId: fileRocrateId, + filename: filename, + mediaType: (fileNode.encodingFormat as string) || 'application/octet-stream', + size: BigInt(stats.size), + memberOf: itemRocrateId, + rootCollection: collectionRocrateId, + contentLicenseId: + fileNode.license?.['@id'] || + itemRoot.contentLicense?.['@id'] || + itemRoot.license?.['@id'] || + 'https://creativecommons.org/licenses/by/4.0/', + meta: { + storagePath: filePath, + }, + }; + + await prisma.file.create({ + data: fileRecord, + }); + console.log(` ✓ Created file entity and record: ${filename}`); + } + } + } +}; + +const createOpenSearchIndex = async (): Promise => { + console.log('\nSetting up OpenSearch index...'); + + try { + await opensearch.indices.delete({ index: 'entities' }); + console.log(' ✓ Deleted existing index'); + } catch (_error) { + // Index doesn't exist, that's fine + } + + // Create new index with mappings + await opensearch.indices.create({ + index: 'entities', + body: { + mappings: { + properties: { + rocrateId: { type: 'keyword' }, + name: { + type: 'text', + fields: { + keyword: { type: 'keyword' }, + }, + }, + description: { type: 'text' }, + entityType: { type: 'keyword' }, + memberOf: { type: 'keyword' }, + rootCollection: { type: 'keyword' }, + metadataLicenseId: { type: 'keyword' }, + contentLicenseId: { type: 'keyword' }, + inLanguage: { type: 'keyword' }, + location: { type: 'geo_point' }, + mediaType: { type: 'keyword' }, + communicationMode: { type: 'keyword' }, + }, + }, + }, + }); + + console.log(' ✓ Created index with mappings'); +}; + +const indexEntities = async (): Promise => { + console.log('\nIndexing entities in OpenSearch...'); + + const entities = await prisma.entity.findMany(); + const operations = entities.flatMap((entity) => [ + { index: { _index: 'entities', _id: `${entity.id}` } }, + { + rocrateId: entity.rocrateId, + name: entity.name, + description: entity.description, + entityType: entity.entityType, + memberOf: entity.memberOf, + rootCollection: entity.rootCollection, + metadataLicenseId: entity.metadataLicenseId, + contentLicenseId: entity.contentLicenseId, + }, + ]); + + await opensearch.bulk({ + body: operations, + refresh: true, + }); + + console.log(` ✓ Indexed ${entities.length} entities`); +}; + +const seed = async (): Promise => { + console.log('🌱 Seeding database with sample language documentation collections...\n'); + + try { + console.log('Clearing existing data...'); + await prisma.entity.deleteMany({}); + await prisma.file.deleteMany({}); + console.log(' ✓ Database cleared'); + + await processCollection('collection-01-nyeleni', 'http://example.com/collection/nyeleni-001', [ + { dir: 'item-01-greeting', id: 'http://example.com/item/nyeleni-greeting-001' }, + { dir: 'item-02-story', id: 'http://example.com/item/nyeleni-story-001' }, + { dir: 'item-03-vocabulary', id: 'http://example.com/item/nyeleni-vocabulary-001' }, + ]); + + await processCollection('collection-02-coastal', 'http://example.com/collection/coastal-001', [ + { dir: 'item-01-narrative', id: 'http://example.com/item/coastal-narrative-001' }, + { dir: 'item-02-songs', id: 'http://example.com/item/coastal-songs-001' }, + { dir: 'item-03-conversation', id: 'http://example.com/item/coastal-conversation-001' }, + ]); + + await createOpenSearchIndex(); + await indexEntities(); + + const entityCounts = await prisma.entity.groupBy({ + by: ['entityType'], + _count: true, + }); + + console.log('\n✅ Seeding completed successfully!'); + console.log('\nCreated:'); + + for (const count of entityCounts) { + const typeName = count.entityType.split('#').pop() || count.entityType.split('/').pop(); + console.log(` • ${count._count} ${typeName} entities`); + } + + console.log('\nEach item includes:'); + console.log(' - RO-Crate metadata'); + console.log(' - WAV audio file (indexed as File entity)'); + console.log(' - ELAN annotation file (indexed as File entity)'); + } catch (error) { + console.error('\n❌ Seeding failed:', error); + throw error; + } finally { + await prisma.$disconnect(); + await opensearch.close(); + } +}; + +// Run the seed function +seed().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/example/package.json b/example/package.json deleted file mode 100644 index 26010b8..0000000 --- a/example/package.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "name": "rocapi-examples", - "private": true, - "version": "1.0.0", - "description": "Examples for mounting an Ro-Crate API", - "main": "index.js", - "type": "module", - "scripts": { - "build:ts": "tsc", - "watch:ts": "tsc -w", - "dev:fastify": "pnpm run dev:clean && pnpm run build:ts && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"pnpm:watch:ts\" \"pnpm:dev:start:fastify\"", - "dev:express": "pnpm run dev:clean && pnpm run build:ts && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"pnpm:watch:ts\" \"pnpm:dev:start:express\"", - "dev:start:fastify": "fastify start --ignore-watch=.ts$ -w -l info -P dist/index.fastify.js", - "dev:start:express": "node --watch dist/index.express.js", - "dev:clean": "rm -rf dist" - }, - "keywords": [], - "author": "", - "license": "ISC", - "packageManager": "pnpm@10.7.0", - "dependencies": { - "@fastify/routes": "^6.0.2", - "arocapi": "link:..", - "express": "^5.1.0", - "express-list-routes": "^1.3.1", - "fastify": "^5.5.0" - }, - "devDependencies": { - "@types/express": "^5.0.3", - "fastify-cli": "^7.4.0" - } -} diff --git a/example/src/generated b/example/src/generated deleted file mode 120000 index b41320b..0000000 --- a/example/src/generated +++ /dev/null @@ -1 +0,0 @@ -../../src/generated \ No newline at end of file diff --git a/example/src/index.express.ts b/example/src/index.express.ts deleted file mode 100644 index 66f8388..0000000 --- a/example/src/index.express.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { Client } from '@opensearch-project/opensearch'; -import Arocapi from 'arocapi/express'; -import express from 'express'; -import expressListRoutes from 'express-list-routes'; - -import { PrismaClient } from './generated/prisma/client.js'; - -const prisma = new PrismaClient(); - -if (!process.env.OPENSEARCH_URL) { - throw new Error('OPENSEARCH_URL environment variable is not set'); -} -const opensearchUrl = process.env.OPENSEARCH_URL; -const opensearch = new Client({ node: opensearchUrl }); - -const app = express(); -const arocapi = await Arocapi({ opensearch, prisma }); -app.use('/api', arocapi); - -app.get('/', (_req, res) => { - const routes = expressListRoutes(app).map((r) => r.path); - - res.send({ - about: 'Example implmentation of mounting an ROCrate API in an express app', - routes, - }); -}); - -console.log('Mounted ROCrate API'); -console.log('Available routes:'); -console.log(expressListRoutes(app)); - -const port = 3000; -app.listen(port, () => { - console.log(`Example app listening on port ${port}`); -}); - -export default app; -export { app }; diff --git a/example/src/index.fastify.ts b/example/src/index.fastify.ts deleted file mode 100644 index 85de535..0000000 --- a/example/src/index.fastify.ts +++ /dev/null @@ -1,39 +0,0 @@ -import routes from '@fastify/routes'; -import { Client } from '@opensearch-project/opensearch'; -import arocapi from 'arocapi'; -import type { FastifyPluginAsync } from 'fastify'; - -declare module 'fastify' { - interface FastifyInstance { - prisma: PrismaClient; - opensearch: Client; - } -} - -import { PrismaClient } from './generated/prisma/client.js'; - -const prisma = new PrismaClient(); - -if (!process.env.OPENSEARCH_URL) { - throw new Error('OPENSEARCH_URL environment variable is not set'); -} -const opensearchUrl = process.env.OPENSEARCH_URL; -const opensearch = new Client({ node: opensearchUrl }); - -const app: FastifyPluginAsync = async (fastify, _options): Promise => { - await fastify.register(routes); - - fastify.register(arocapi, { prisma, opensearch }); - - fastify.get('/', async () => { - const routes = fastify.routes.keys().toArray(); - - return { - about: 'Example implmentation of mounting an ROCrate API in a fastify app', - routes, - }; - }); -}; - -export default app; -export { app }; diff --git a/example/tsconfig.json b/example/tsconfig.json deleted file mode 100644 index 11ce92e..0000000 --- a/example/tsconfig.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "extends": "fastify-tsconfig", - "compilerOptions": { - "lib": ["esnext", "dom"], - "outDir": "dist", - "sourceMap": true - }, - "include": ["src/**/*.ts"] -} diff --git a/package.json b/package.json index 796520a..4cf846d 100644 --- a/package.json +++ b/package.json @@ -61,26 +61,27 @@ "prisma.config.ts" ], "scripts": { - "start": "npm run build:ts && node dist/index.js", "build:ts": "tsc", "watch:ts": "tsc -w", - "dev": "npm run dev:clean && npm run build:ts && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"npm:watch:ts\" \"npm:dev:start\"", - "dev:start": "node --watch dist/index.js", + "dev": "pnpm run dev:clean && pnpm run build:ts && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"pnpm:dev:start\" \"pnpm:watch:ts\" ", + "dev:start": "node --watch --env-file=.env dist/index.dev.js", "dev:clean": "rm -rf dist", "dbconsole": "docker compose exec db mysql --password=password catalog", "load-test-data": "tsx src/scripts/loadEntities.ts", "lint:biome": "biome check", "lint:types": "tsc --noEmit", "lint:knip": "knip", + "lint": "concurrently -p \"[{name}]\" -n \"biome,types,knip\" -c \"yellow.bold,cyan.bold,magenta.bold\" \"pnpm:lint:biome\" \"pnpm:lint:types\" \"pnpm:lint:knip\"", "test": "vitest run", "test:watch": "./scripts/setup-integration.sh && vitest", "test:ui": "./scripts/setup-integration.sh && vitest --ui", "test:coverage": "./scripts/setup-integration.sh && vitest run --coverage", - "prepublishOnly": "npm run lint:biome && npm run lint:types && npm run build:ts", + "prepublishOnly": "pnpm run lint:biome && pnpm run lint:types && pnpm run build:ts", "generate": "npx prisma generate", - "postinstall": "npm run generate", + "postinstall": "pnpm run generate", "db:generate": "prisma generate", - "db:migrate": "prisma migrate deploy" + "db:migrate": "prisma migrate deploy", + "seed": "rm -rf dist && pnpm run build:ts && node --env-file=.env example/data/seed.ts" }, "repository": { "type": "git", diff --git a/prisma/migrations/20251020140329_add_meta_column/migration.sql b/prisma/migrations/20251020140329_add_meta_column/migration.sql new file mode 100644 index 0000000..4a4305f --- /dev/null +++ b/prisma/migrations/20251020140329_add_meta_column/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE `Entity` ADD COLUMN `meta` JSON NULL; diff --git a/prisma/migrations/20251027010713_add_file_table/migration.sql b/prisma/migrations/20251027010713_add_file_table/migration.sql new file mode 100644 index 0000000..d091258 --- /dev/null +++ b/prisma/migrations/20251027010713_add_file_table/migration.sql @@ -0,0 +1,17 @@ +-- CreateTable +CREATE TABLE `File` ( + `id` INTEGER NOT NULL AUTO_INCREMENT, + `fileId` VARCHAR(2048) NOT NULL, + `filename` VARCHAR(255) NOT NULL, + `mediaType` VARCHAR(127) NOT NULL, + `size` BIGINT NOT NULL, + `memberOf` VARCHAR(2048) NOT NULL, + `rootCollection` VARCHAR(2048) NOT NULL, + `contentLicenseId` VARCHAR(2048) NOT NULL, + `meta` JSON NULL, + `createdAt` DATETIME(3) NOT NULL DEFAULT CURRENT_TIMESTAMP(3), + `updatedAt` DATETIME(3) NOT NULL, + + UNIQUE INDEX `File_fileId_key`(`fileId`(768)), + PRIMARY KEY (`id`) +) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; diff --git a/prisma/migrations/20251027221138_add_file_id_to_entity/migration.sql b/prisma/migrations/20251027221138_add_file_id_to_entity/migration.sql new file mode 100644 index 0000000..7f00e34 --- /dev/null +++ b/prisma/migrations/20251027221138_add_file_id_to_entity/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE `Entity` ADD COLUMN `fileId` VARCHAR(2048) NULL; diff --git a/prisma/models/entity.prisma b/prisma/models/entity.prisma index ba3111f..d45e07f 100644 --- a/prisma/models/entity.prisma +++ b/prisma/models/entity.prisma @@ -11,8 +11,10 @@ model Entity { rootCollection String? @db.VarChar(2048) metadataLicenseId String @db.VarChar(2048) contentLicenseId String @db.VarChar(2048) + fileId String? @db.VarChar(2048) rocrate Json + meta Json? createdAt DateTime @default(now()) updatedAt DateTime @updatedAt diff --git a/prisma/models/file.prisma b/prisma/models/file.prisma new file mode 100644 index 0000000..2525fd0 --- /dev/null +++ b/prisma/models/file.prisma @@ -0,0 +1,20 @@ +model File { + id Int @id @default(autoincrement()) + + fileId String @db.VarChar(2048) + + filename String @db.VarChar(255) + mediaType String @db.VarChar(127) + size BigInt + + memberOf String @db.VarChar(2048) + rootCollection String @db.VarChar(2048) + contentLicenseId String @db.VarChar(2048) + + meta Json? + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@unique([fileId(length: 768)]) +} diff --git a/src/app.test.ts b/src/app.test.ts index b53e3cb..8fa2a32 100644 --- a/src/app.test.ts +++ b/src/app.test.ts @@ -1,8 +1,8 @@ import Fastify from 'fastify'; -import { beforeEach, describe, expect, it } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { mockReset } from 'vitest-mock-extended'; -import app, { AllPublicAccessTransformer } from './app.js'; +import app, { AllPublicAccessTransformer, AllPublicFileAccessTransformer } from './app.js'; import { opensearch, prisma } from './test/helpers/fastify.js'; @@ -50,6 +50,54 @@ describe('Entity Route', () => { await expect(() => fastify.ready()).rejects.toThrowError('accessTransformer is required'); }); + it('should handle missing fileAccessTransformer', async () => { + const fastify = Fastify({ logger: false }); + + // @ts-expect-error we are testing missing options + fastify.register(app, { + prisma, + opensearch, + disableCors: false, + accessTransformer: AllPublicAccessTransformer, + }); + + await expect(() => fastify.ready()).rejects.toThrowError('fileAccessTransformer is required'); + }); + + it('should handle missing fileHandler', async () => { + const fastify = Fastify({ logger: false }); + + // @ts-expect-error we are testing missing options + fastify.register(app, { + prisma, + opensearch, + disableCors: false, + accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, + }); + + await expect(() => fastify.ready()).rejects.toThrowError('fileHandler is required'); + }); + + it('should handle missing roCrateHandler', async () => { + const fastify = Fastify({ logger: false }); + + // @ts-expect-error we are testing missing options + fastify.register(app, { + prisma, + opensearch, + disableCors: false, + accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, + fileHandler: { + get: vi.fn(), + head: vi.fn(), + }, + }); + + await expect(() => fastify.ready()).rejects.toThrowError('roCrateHandler is required'); + }); + it('should handle broken opensearch', async () => { opensearch.ping.mockRejectedValue(new Error('Connection failed')); @@ -60,6 +108,15 @@ describe('Entity Route', () => { opensearch, disableCors: false, accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, + fileHandler: { + get: vi.fn(), + head: vi.fn(), + }, + roCrateHandler: { + get: vi.fn(), + head: vi.fn(), + }, }); await expect(() => fastify.ready()).rejects.toThrowError('Connection failed'); @@ -72,6 +129,15 @@ describe('Entity Route', () => { opensearch, disableCors: false, accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, + fileHandler: { + get: vi.fn(), + head: vi.fn(), + }, + roCrateHandler: { + get: vi.fn(), + head: vi.fn(), + }, }); fastify.get('/error', async () => { throw new Error('Random'); diff --git a/src/app.ts b/src/app.ts index 3832f40..72af795 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,20 +1,55 @@ import cors from '@fastify/cors'; import sensible from '@fastify/sensible'; import type { Client } from '@opensearch-project/opensearch'; -import type { PrismaClient } from '@prisma/client/extension'; import type { FastifyInstance, FastifyPluginAsync } from 'fastify'; import fp from 'fastify-plugin'; import { hasZodFastifySchemaValidationErrors, serializerCompiler, validatorCompiler } from 'fastify-type-provider-zod'; +import type { PrismaClient } from './generated/prisma/client.js'; +import crate from './routes/crate.js'; import entities from './routes/entities.js'; import entity from './routes/entity.js'; +import file from './routes/file.js'; +import files from './routes/files.js'; import search from './routes/search.js'; -import type { AccessTransformer, EntityTransformer } from './types/transformers.js'; +import type { FileHandler, RoCrateHandler } from './types/fileHandlers.js'; +import type { + AccessTransformer, + EntityTransformer, + FileAccessTransformer, + FileTransformer, +} from './types/transformers.js'; import { createValidationError } from './utils/errors.js'; -export type { AuthorisedEntity, StandardEntity } from './transformers/default.js'; -// Re-export transformers and types for external use -export { AllPublicAccessTransformer } from './transformers/default.js'; -export type { AccessTransformer, EntityTransformer, TransformerContext } from './types/transformers.js'; +export type { AuthorisedEntity, AuthorisedFile, StandardEntity, StandardFile } from './transformers/default.js'; +export { AllPublicAccessTransformer, AllPublicFileAccessTransformer } from './transformers/default.js'; +export type { + FileHandler, + FileHandlerContext, + FileMetadata, + FilePathResult, + FileRedirectResult, + FileResult, + FileStreamResult, + GetFileHandler, + GetRoCrateHandler, + HeadFileHandler, + HeadRoCrateHandler, + RoCrateHandler, +} from './types/fileHandlers.js'; +export type { + AccessTransformer, + EntityTransformer, + FileAccessTransformer, + FileTransformer, + TransformerContext, +} from './types/transformers.js'; + +declare module 'fastify' { + interface FastifyInstance { + prisma: PrismaClient; + opensearch: Client; + } +} const setupValidation = (fastify: FastifyInstance) => { fastify.setValidatorCompiler(validatorCompiler); @@ -36,9 +71,11 @@ const setupValidation = (fastify: FastifyInstance) => { const setupDatabase = async (fastify: FastifyInstance, prisma: PrismaClient) => { await prisma.$connect(); + fastify.decorate('prisma', prisma); - fastify.addHook('onClose', async (server) => { - await server.prisma.$disconnect(); + + fastify.addHook('onClose', async () => { + await prisma.$disconnect(); }); }; @@ -62,9 +99,23 @@ export type Options = { disableCors?: boolean; accessTransformer: AccessTransformer; entityTransformers?: EntityTransformer[]; + fileAccessTransformer: FileAccessTransformer; + fileTransformers?: FileTransformer[]; + fileHandler: FileHandler; + roCrateHandler: RoCrateHandler; }; const app: FastifyPluginAsync = async (fastify, options) => { - const { prisma, opensearch, disableCors = false, accessTransformer, entityTransformers } = options; + const { + prisma, + opensearch, + disableCors = false, + accessTransformer, + entityTransformers, + fileAccessTransformer, + fileTransformers, + fileHandler, + roCrateHandler, + } = options; if (!prisma) { throw new Error('Prisma client is required'); @@ -78,6 +129,18 @@ const app: FastifyPluginAsync = async (fastify, options) => { throw new Error('accessTransformer is required'); } + if (!fileAccessTransformer) { + throw new Error('fileAccessTransformer is required'); + } + + if (!fileHandler) { + throw new Error('fileHandler is required'); + } + + if (!roCrateHandler) { + throw new Error('roCrateHandler is required'); + } + fastify.register(sensible); if (!disableCors) { fastify.register(cors); @@ -88,6 +151,9 @@ const app: FastifyPluginAsync = async (fastify, options) => { fastify.register(entities, { accessTransformer, entityTransformers }); fastify.register(entity, { accessTransformer, entityTransformers }); + fastify.register(files, { fileAccessTransformer, fileTransformers }); + fastify.register(file, { fileHandler }); + fastify.register(crate, { roCrateHandler }); fastify.register(search, { accessTransformer, entityTransformers }); }; diff --git a/src/index.dev.ts b/src/index.dev.ts new file mode 100644 index 0000000..415d242 --- /dev/null +++ b/src/index.dev.ts @@ -0,0 +1,196 @@ +import { createReadStream, existsSync, readFileSync, statSync } from 'node:fs'; +import { Readable } from 'node:stream'; +import { Client } from '@opensearch-project/opensearch'; +import Fastify from 'fastify'; + +import arocapi, { + AllPublicAccessTransformer, + AllPublicFileAccessTransformer, + type EntityTransformer, + type FileHandler, + type RoCrateHandler, +} from './app.js'; +import type { Entity, File } from './generated/prisma/client.js'; +import { PrismaClient } from './generated/prisma/client.js'; + +declare module 'fastify' { + interface FastifyInstance { + prisma: PrismaClient; + opensearch: Client; + } +} + +const prisma = new PrismaClient(); + +if (!process.env.OPENSEARCH_URL) { + throw new Error('OPENSEARCH_URL environment variable is not set'); +} +const opensearchUrl = process.env.OPENSEARCH_URL; +const opensearch = new Client({ node: opensearchUrl }); + +const fileHandler: FileHandler = { + head: async (file: File) => { + const filePath = (file.meta as { storagePath: string }).storagePath; + + if (!existsSync(filePath)) { + return false; + } + + const stats = statSync(filePath); + + return { + contentType: file.mediaType, + contentLength: Number(file.size), + lastModified: stats.mtime, + }; + }, + get: async (file: File) => { + const filePath = (file.meta as { storagePath: string }).storagePath; + + if (!existsSync(filePath)) { + return false; + } + + const stats = statSync(filePath); + + return { + type: 'stream', + stream: createReadStream(filePath), + metadata: { + contentType: file.mediaType, + contentLength: Number(file.size), + lastModified: stats.mtime, + }, + }; + }, +}; + +const transformRoCrate = (filePath: string, remapRootTo: string): { rocrate: unknown; jsonString: string } | false => { + const fileContents = readFileSync(filePath, 'utf-8'); + const rocrate = JSON.parse(fileContents) as { + '@context'?: unknown; + '@graph'?: Array<{ '@id': string; [key: string]: unknown }>; + [key: string]: unknown; + }; + + const graph = rocrate['@graph'] || []; + const rootNode = graph.find((node) => node['@id'] === 'ro-crate-metadata.json') as + | { about: { '@id': string } } + | undefined; + + if (!rootNode) { + return false; + } + + rootNode.about['@id'] = remapRootTo; + + const jsonString = JSON.stringify(rocrate); + + return { rocrate, jsonString }; +}; + +const roCrateHandler: RoCrateHandler = { + head: async (entity: Entity) => { + const meta = entity.meta as { storagePath: string; remapRootTo: string }; + const filePath = meta.storagePath; + + if (!existsSync(filePath)) { + return false; + } + + if (meta.remapRootTo) { + const result = transformRoCrate(filePath, meta.remapRootTo); + + if (!result) { + return false; + } + + return { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(result.jsonString), + lastModified: statSync(filePath).mtime, + }; + } + + const stats = statSync(filePath); + + return { + contentType: 'application/ld+json', + contentLength: stats.size, + lastModified: stats.mtime, + }; + }, + get: async (entity: Entity) => { + const meta = entity.meta as { storagePath: string; remapRootTo: string }; + const filePath = meta.storagePath; + + if (!existsSync(filePath)) { + return false; + } + + if (meta.remapRootTo) { + const result = transformRoCrate(filePath, meta.remapRootTo); + + if (!result) { + return false; + } + + return { + type: 'stream', + stream: Readable.from(result.jsonString), + metadata: { + contentType: 'application/ld+json', + contentLength: Buffer.byteLength(result.jsonString), + lastModified: statSync(filePath).mtime, + }, + }; + } + + const stats = statSync(filePath); + + return { + type: 'stream', + stream: createReadStream(filePath), + metadata: { + contentType: 'application/ld+json', + contentLength: stats.size, + lastModified: stats.mtime, + }, + }; + }, +}; + +const oniTransformer: EntityTransformer = (entity, { request: _r, fastify: _f }) => { + return { + ...entity, + accessControl: 'Public', + counts: { + collections: 0, + objects: 0, + files: 0, + }, + }; +}; + +const fastify = Fastify({ + logger: true, +}); +fastify.register(arocapi, { + prisma, + opensearch, + fileHandler, + roCrateHandler, + accessTransformer: AllPublicAccessTransformer, + entityTransformers: [oniTransformer], + fileAccessTransformer: AllPublicFileAccessTransformer, +}); + +const start = async () => { + try { + await fastify.listen({ port: 9000 }); + } catch (err) { + fastify.log.error(err); + process.exit(1); + } +}; +start(); diff --git a/src/index.ts b/src/index.ts deleted file mode 100644 index 6b54319..0000000 --- a/src/index.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { Client } from '@opensearch-project/opensearch'; -import Fastify from 'fastify'; - -import arocapi, { AllPublicAccessTransformer } from './app.js'; -import { PrismaClient } from './generated/prisma/client.js'; - -declare module 'fastify' { - interface FastifyInstance { - prisma: PrismaClient; - opensearch: Client; - } -} - -const prisma = new PrismaClient(); - -if (!process.env.OPENSEARCH_URL) { - throw new Error('OPENSEARCH_URL environment variable is not set'); -} -const opensearchUrl = process.env.OPENSEARCH_URL; -const opensearch = new Client({ node: opensearchUrl }); - -const fastify = Fastify({ - logger: true, -}); -fastify.register(arocapi, { - prisma, - opensearch, - accessTransformer: AllPublicAccessTransformer, -}); - -const start = async () => { - try { - await fastify.listen({ port: 3000 }); - } catch (err) { - fastify.log.error(err); - process.exit(1); - } -}; -start(); diff --git a/src/routes/crate.test.ts b/src/routes/crate.test.ts new file mode 100644 index 0000000..58e58c4 --- /dev/null +++ b/src/routes/crate.test.ts @@ -0,0 +1,419 @@ +import { createReadStream } from 'node:fs'; +import { Readable } from 'node:stream'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { fastify, fastifyAfter, fastifyBefore, prisma } from '../test/helpers/fastify.js'; +import type { FileResult, RoCrateHandler } from '../types/fileHandlers.js'; +import type { StandardErrorResponse } from '../utils/errors.js'; +import crateRoute from './crate.js'; + +vi.mock('node:fs', () => ({ + createReadStream: vi.fn(), +})); + +describe('Crate Route', () => { + const mockRoCrateHandler: RoCrateHandler = { + get: vi.fn(), + head: vi.fn(), + }; + + beforeEach(async () => { + await fastifyBefore(); + await fastify.register(crateRoute, { roCrateHandler: mockRoCrateHandler }); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await fastifyAfter(); + }); + + const mockFileEntity = { + id: 1, + rocrateId: 'http://example.com/entity/file.wav', + name: 'test.wav', + description: 'A test file', + entityType: 'http://schema.org/MediaObject', + fileId: null, + memberOf: 'http://example.com/collection', + rootCollection: 'http://example.com/collection', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: { + '@context': 'https://w3id.org/ro/crate/1.1/context', + '@graph': [ + { + '@id': 'ro-crate-metadata.json', + '@type': 'CreativeWork', + conformsTo: { '@id': 'https://w3id.org/ro/crate/1.1' }, + about: { '@id': './' }, + }, + { + '@id': './', + '@type': 'Dataset', + name: 'Test RO-Crate', + }, + ], + }, + meta: {}, + }; + + const mockCollectionEntity = { + ...mockFileEntity, + rocrateId: 'http://example.com/collection', + name: 'Test Collection', + entityType: 'http://pcdm.org/models#Collection', + memberOf: null, + rootCollection: null, + }; + + const mockObjectEntity = { + ...mockFileEntity, + rocrateId: 'http://example.com/object', + name: 'Test Object', + entityType: 'http://pcdm.org/models#Object', + memberOf: 'http://example.com/collection', + rootCollection: 'http://example.com/collection', + }; + + describe('GET /entity/:id/rocrate', () => { + it('should stream RO-Crate metadata for File entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + + const mockStream = Readable.from(['{"@context": "https://w3id.org/ro/crate/1.1/context"}']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'text/plain', + contentLength: 52, + etag: '"rocrate123"', + lastModified: new Date('2025-01-01'), + }, + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(response.headers['content-length']).toBe('52'); + expect(response.headers.etag).toBe('"rocrate123"'); + expect(response.body).toBe('{"@context": "https://w3id.org/ro/crate/1.1/context"}'); + + expect(mockRoCrateHandler.get).toHaveBeenCalledWith( + mockFileEntity, + expect.objectContaining({ + request: expect.any(Object), + fastify: expect.any(Object), + }), + ); + }); + + it('should stream RO-Crate metadata for Collection entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockCollectionEntity); + + const mockStream = Readable.from(['{"@context": "https://w3id.org/ro/crate/1.1/context"}']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'application/json', + contentLength: 52, + }, + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/collection')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(mockRoCrateHandler.get).toHaveBeenCalledWith(mockCollectionEntity, expect.any(Object)); + }); + + it('should stream RO-Crate metadata for Object entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockObjectEntity); + + const mockStream = Readable.from(['{"@context": "https://w3id.org/ro/crate/1.1/context"}']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'application/ld+json', + contentLength: 52, + }, + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/object')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(mockRoCrateHandler.get).toHaveBeenCalledWith(mockObjectEntity, expect.any(Object)); + }); + + it('should handle redirect response', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + + const mockResult: FileResult = { + type: 'redirect', + url: 'https://storage.example.com/ro-crate-metadata.json', + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + + expect(response.statusCode).toBe(302); + expect(response.headers.location).toBe('https://storage.example.com/ro-crate-metadata.json'); + }); + + it('should handle file path response without nginx X-Accel-Redirect', async () => { + prisma.entity.findFirst.mockResolvedValue(mockCollectionEntity); + + const mockStream = Readable.from(['rocrate content']); + vi.mocked(createReadStream).mockReturnValue(mockStream as never); + + const mockResult: FileResult = { + type: 'file', + path: '/data/rocrate/ro-crate-metadata.json', + metadata: { + contentType: 'application/ld+json', + contentLength: 1024, + }, + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/collection')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(response.headers['content-length']).toBe('1024'); + expect(response.headers['x-accel-redirect']).toBeUndefined(); + expect(createReadStream).toHaveBeenCalledWith('/data/rocrate/ro-crate-metadata.json'); + }); + + it('should handle file path response with nginx X-Accel-Redirect', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + + const mockResult: FileResult = { + type: 'file', + path: '/data/rocrate/ro-crate-metadata.json', + accelPath: '/internal/rocrate/ro-crate-metadata.json', + metadata: { + contentType: 'application/ld+json', + contentLength: 2048, + }, + }; + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(response.headers['x-accel-redirect']).toBe('/internal/rocrate/ro-crate-metadata.json'); + expect(response.body).toBe(''); + }); + + it('should return 404 when entity not found', async () => { + prisma.entity.findFirst.mockResolvedValue(null); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/nonexistent')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested entity was not found'); + expect(mockRoCrateHandler.get).not.toHaveBeenCalled(); + }); + + it('should return 404 when handler returns false', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + vi.mocked(mockRoCrateHandler.get).mockResolvedValue(false); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested RO-Crate could not be retrieved'); + }); + + it('should return 500 when database error occurs', async () => { + prisma.entity.findFirst.mockRejectedValue(new Error('Database connection failed')); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should return 500 when roCrateHandler throws error', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + vi.mocked(mockRoCrateHandler.get).mockRejectedValue(new Error('RO-Crate error')); + + const response = await fastify.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should validate ID parameter format', async () => { + const response = await fastify.inject({ + method: 'GET', + url: '/entity/invalid-id/rocrate', + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + }); + + describe('HEAD /entity/:id/rocrate', () => { + it('should return RO-Crate metadata headers for File entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + + const mockMetadata = { + contentType: 'application/ld+json', + contentLength: 1024, + etag: '"rocrate-etag"', + lastModified: new Date('2025-01-15'), + }; + vi.mocked(mockRoCrateHandler.head).mockResolvedValue(mockMetadata); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/ld+json'); + expect(response.headers['content-length']).toBe('1024'); + expect(response.headers.etag).toBe('"rocrate-etag"'); + expect(response.body).toBe(''); + + expect(mockRoCrateHandler.head).toHaveBeenCalledWith( + mockFileEntity, + expect.objectContaining({ + request: expect.any(Object), + fastify: expect.any(Object), + }), + ); + }); + + it('should return RO-Crate metadata headers for Collection entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockCollectionEntity); + + const mockMetadata = { + contentType: 'text/plain', + contentLength: 512, + }; + vi.mocked(mockRoCrateHandler.head).mockResolvedValue(mockMetadata); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/collection')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('text/plain'); + expect(response.headers['content-length']).toBe('512'); + expect(response.body).toBe(''); + }); + + it('should return RO-Crate metadata headers for Object entity', async () => { + prisma.entity.findFirst.mockResolvedValue(mockObjectEntity); + + const mockMetadata = { + contentType: 'application/json', + contentLength: 2048, + }; + vi.mocked(mockRoCrateHandler.head).mockResolvedValue(mockMetadata); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/object')}/rocrate`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('application/json'); + expect(response.headers['content-length']).toBe('2048'); + expect(response.body).toBe(''); + }); + + it('should return 404 when entity not found', async () => { + prisma.entity.findFirst.mockResolvedValue(null); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/nonexistent')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(mockRoCrateHandler.head).not.toHaveBeenCalled(); + }); + + it('should return 404 when head handler returns false', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + vi.mocked(mockRoCrateHandler.head).mockResolvedValue(false); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested RO-Crate metadata was not found'); + }); + + it('should return 500 when head handler throws error', async () => { + prisma.entity.findFirst.mockResolvedValue(mockFileEntity); + vi.mocked(mockRoCrateHandler.head).mockRejectedValue(new Error('Metadata error')); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/entity/${encodeURIComponent('http://example.com/entity/file.wav')}/rocrate`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + }); +}); diff --git a/src/routes/crate.ts b/src/routes/crate.ts new file mode 100644 index 0000000..caf687e --- /dev/null +++ b/src/routes/crate.ts @@ -0,0 +1,138 @@ +import { createReadStream } from 'node:fs'; +import type { FastifyPluginAsync, FastifyReply } from 'fastify'; +import type { ZodTypeProvider } from 'fastify-type-provider-zod'; +import { z } from 'zod/v4'; +import type { FileMetadata, RoCrateHandler } from '../types/fileHandlers.js'; +import { createInternalError, createNotFoundError } from '../utils/errors.js'; + +const paramsSchema = z.object({ + id: z.string().regex(/^https?:\/\/.+/, 'Invalid URI format'), +}); + +type CrateRouteOptions = { + roCrateHandler: RoCrateHandler; +}; + +const setFileHeaders = ( + reply: FastifyReply, + metadata: { contentType: string; contentLength: number; etag?: string; lastModified?: Date }, +) => { + reply.header('Content-Type', metadata.contentType); + reply.header('Content-Length', metadata.contentLength.toString()); + + if (metadata.etag) { + reply.header('ETag', metadata.etag); + } + if (metadata.lastModified) { + reply.header('Last-Modified', metadata.lastModified.toUTCString()); + } +}; + +const crate: FastifyPluginAsync = async (fastify, opts) => { + const { roCrateHandler } = opts; + + fastify.withTypeProvider().head( + '/entity/:id/rocrate', + { + schema: { + params: paramsSchema, + }, + }, + async (request, reply) => { + const { id } = request.params; + + try { + const entity = await fastify.prisma.entity.findFirst({ + where: { + rocrateId: id, + }, + }); + + if (!entity) { + return reply.code(404).send(createNotFoundError('The requested entity was not found', id)); + } + + const metadata: FileMetadata | false = await roCrateHandler.head(entity, { + request, + fastify, + }); + + if (!metadata) { + return reply.code(404).send(createNotFoundError('The requested RO-Crate metadata was not found', id)); + } + + setFileHeaders(reply, metadata); + + return reply.code(200).send(); + } catch (error) { + fastify.log.error('RO-Crate metadata retrieval error:', error); + return reply.code(500).send(createInternalError()); + } + }, + ); + + fastify.withTypeProvider().get( + '/entity/:id/rocrate', + { + schema: { + params: paramsSchema, + }, + }, + async (request, reply) => { + const { id } = request.params; + + try { + const entity = await fastify.prisma.entity.findFirst({ + where: { + rocrateId: id, + }, + }); + + if (!entity) { + return reply.code(404).send(createNotFoundError('The requested entity was not found', id)); + } + + const result = await roCrateHandler.get(entity, { + request, + fastify, + }); + + if (!result) { + return reply.code(404).send(createNotFoundError('The requested RO-Crate could not be retrieved', id)); + } + + if (result.type === 'redirect') { + return reply.code(302).redirect(result.url); + } + + // Set content type to application/ld+json for RO-Crate + const metadata = { + ...result.metadata, + contentType: 'application/ld+json', + }; + + setFileHeaders(reply, metadata); + + // Handle stream response + if (result.type === 'stream') { + return reply.code(200).send(result.stream); + } + + if (result.type === 'file') { + if (result.accelPath) { + reply.header('X-Accel-Redirect', result.accelPath); + return reply.code(200).send(); + } + + const stream = createReadStream(result.path); + return reply.code(200).send(stream); + } + } catch (error) { + fastify.log.error('RO-Crate retrieval error:', error); + return reply.code(500).send(createInternalError()); + } + }, + ); +}; + +export default crate; diff --git a/src/routes/entities.test.ts b/src/routes/entities.test.ts index 357c7f3..361975b 100644 --- a/src/routes/entities.test.ts +++ b/src/routes/entities.test.ts @@ -23,6 +23,7 @@ describe('Entities Route', () => { name: 'Test Entity 1', description: 'First test entity', entityType: 'http://pcdm.org/models#Collection', + fileId: null, memberOf: null, rootCollection: null, metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', @@ -30,6 +31,7 @@ describe('Entities Route', () => { createdAt: new Date(), updatedAt: new Date(), rocrate: {}, + meta: {}, }, { id: 2, @@ -37,6 +39,7 @@ describe('Entities Route', () => { name: 'Test Entity 2', description: 'Second test entity', entityType: 'http://pcdm.org/models#Object', + fileId: null, memberOf: 'http://example.com/entity/1', rootCollection: 'http://example.com/entity/1', metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', @@ -44,6 +47,7 @@ describe('Entities Route', () => { createdAt: new Date(), updatedAt: new Date(), rocrate: {}, + meta: {}, }, ]; diff --git a/src/routes/entities.ts b/src/routes/entities.ts index 47d458f..394eb3b 100644 --- a/src/routes/entities.ts +++ b/src/routes/entities.ts @@ -52,7 +52,6 @@ const entities: FastifyPluginAsync = async (fastify, opts) }; } - // Map sort field to database field const sortField = sort === 'id' ? 'rocrateId' : sort; const dbEntities = await fastify.prisma.entity.findMany({ @@ -64,7 +63,6 @@ const entities: FastifyPluginAsync = async (fastify, opts) take: limit, }); - // Get total count for pagination metadata const total = await fastify.prisma.entity.count({ where }); // Apply transformers to each entity: base -> access -> additional diff --git a/src/routes/entity.test.ts b/src/routes/entity.test.ts index 774dadf..564c6fb 100644 --- a/src/routes/entity.test.ts +++ b/src/routes/entity.test.ts @@ -23,6 +23,7 @@ describe('Entity Route', () => { name: 'Test Entity', description: 'A test entity', entityType: 'http://schema.org/Person', + fileId: null, memberOf: null, rootCollection: null, metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', @@ -30,6 +31,7 @@ describe('Entity Route', () => { createdAt: new Date(), updatedAt: new Date(), rocrate: {}, + meta: {}, }; prisma.entity.findFirst.mockResolvedValue(mockEntity); diff --git a/src/routes/file.test.ts b/src/routes/file.test.ts new file mode 100644 index 0000000..80b83a2 --- /dev/null +++ b/src/routes/file.test.ts @@ -0,0 +1,377 @@ +import { createReadStream } from 'node:fs'; +import { Readable } from 'node:stream'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { fastify, fastifyAfter, fastifyBefore, prisma } from '../test/helpers/fastify.js'; +import type { FileHandler, FileResult } from '../types/fileHandlers.js'; +import type { StandardErrorResponse } from '../utils/errors.js'; +import fileRoute from './file.js'; + +vi.mock('node:fs', () => ({ + createReadStream: vi.fn(), +})); + +describe('File Route', () => { + const mockFileHandler: FileHandler = { + get: vi.fn(), + head: vi.fn(), + }; + + beforeEach(async () => { + await fastifyBefore(); + await fastify.register(fileRoute, { fileHandler: mockFileHandler }); + vi.clearAllMocks(); + }); + + afterEach(async () => { + await fastifyAfter(); + }); + + const mockFile = { + id: 1, + fileId: 'http://example.com/file/test.wav', + filename: 'test.wav', + mediaType: 'audio/wav', + size: BigInt(1024), + memberOf: 'http://example.com/collection', + rootCollection: 'http://example.com/collection', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + meta: { storagePath: '/data/files/test.wav' }, + createdAt: new Date(), + updatedAt: new Date(), + }; + + describe('GET /file/:id', () => { + it('should stream file content successfully', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockStream = Readable.from(['test file content']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'audio/wav', + contentLength: 17, + etag: '"abc123"', + lastModified: new Date('2025-01-01'), + }, + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('audio/wav'); + expect(response.headers['content-length']).toBe('17'); + expect(response.headers.etag).toBe('"abc123"'); + expect(response.headers['content-disposition']).toBe('inline; filename="test.wav"'); + expect(response.body).toBe('test file content'); + + expect(mockFileHandler.get).toHaveBeenCalledWith( + mockFile, + expect.objectContaining({ + request: expect.any(Object), + fastify: expect.any(Object), + }), + ); + }); + + it('should handle attachment disposition', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockStream = Readable.from(['test']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'audio/wav', + contentLength: 4, + }, + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}?disposition=attachment`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-disposition']).toBe('attachment; filename="test.wav"'); + }); + + it('should use custom filename when provided', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockStream = Readable.from(['test']); + const mockResult: FileResult = { + type: 'stream', + stream: mockStream, + metadata: { + contentType: 'audio/wav', + contentLength: 4, + }, + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}?filename=custom-name.wav`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-disposition']).toBe('inline; filename="custom-name.wav"'); + }); + + it('should handle redirect response with noRedirect=false (default)', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockResult: FileResult = { + type: 'redirect', + url: 'https://storage.example.com/files/test.wav', + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + + expect(response.statusCode).toBe(302); + expect(response.headers.location).toBe('https://storage.example.com/files/test.wav'); + }); + + it('should handle redirect response with noRedirect=true', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockResult: FileResult = { + type: 'redirect', + url: 'https://storage.example.com/files/test.wav', + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}?noRedirect=true`, + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body); + expect(body).toEqual({ location: 'https://storage.example.com/files/test.wav' }); + }); + + it('should handle file path response without nginx X-Accel-Redirect', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockStream = Readable.from(['file content']); + vi.mocked(createReadStream).mockReturnValue(mockStream as never); + + const mockResult: FileResult = { + type: 'file', + path: '/data/files/test.wav', + metadata: { + contentType: 'audio/wav', + contentLength: 1024, + }, + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('audio/wav'); + expect(response.headers['content-length']).toBe('1024'); + expect(response.headers['x-accel-redirect']).toBeUndefined(); + expect(createReadStream).toHaveBeenCalledWith('/data/files/test.wav'); + }); + + it('should handle file path response with nginx X-Accel-Redirect', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockResult: FileResult = { + type: 'file', + path: '/data/files/test.wav', + accelPath: '/internal/files/test.wav', + metadata: { + contentType: 'audio/wav', + contentLength: 1024, + }, + }; + vi.mocked(mockFileHandler.get).mockResolvedValue(mockResult); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('audio/wav'); + // Note: Content-Length is '0' because Fastify sends an empty response. + // In production, nginx will replace this with the actual file size when serving via X-Accel-Redirect + expect(response.headers['content-length']).toBe('0'); + expect(response.headers['x-accel-redirect']).toBe('/internal/files/test.wav'); + expect(response.body).toBe(''); // Empty body when using X-Accel-Redirect + }); + + it('should return 404 when entity not found', async () => { + prisma.entity.findFirst.mockResolvedValue(null); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/nonexistent')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested file was not found'); + expect(mockFileHandler.get).not.toHaveBeenCalled(); + }); + + it('should return 500 when database error occurs', async () => { + prisma.file.findFirst.mockRejectedValue(new Error('Database connection failed')); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should return 500 when fileHandler throws error', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + vi.mocked(mockFileHandler.get).mockRejectedValue(new Error('File not found in storage')); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should validate ID parameter format', async () => { + const response = await fastify.inject({ + method: 'GET', + url: '/file/invalid-id', + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + + it('should validate disposition parameter', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}?disposition=invalid`, + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + + it('should return 404 when get handler returns false', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + vi.mocked(mockFileHandler.get).mockResolvedValue(false); + + const response = await fastify.inject({ + method: 'GET', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested file could not be retrieved'); + }); + }); + + describe('HEAD /file/:id', () => { + it('should return headers without body using head handler', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + + const mockMetadata = { + contentType: 'audio/wav', + contentLength: 17, + etag: '"abc123"', + lastModified: new Date('2025-01-01'), + }; + vi.mocked(mockFileHandler.head).mockResolvedValue(mockMetadata); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type']).toBe('audio/wav'); + expect(response.headers['content-length']).toBe('17'); + expect(response.headers.etag).toBe('"abc123"'); + expect(response.body).toBe(''); // No body for HEAD + + expect(mockFileHandler.head).toHaveBeenCalledWith( + mockFile, + expect.objectContaining({ + request: expect.any(Object), + fastify: expect.any(Object), + }), + ); + }); + + it('should return 404 when file not found', async () => { + prisma.file.findFirst.mockResolvedValue(null); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/file/${encodeURIComponent('http://example.com/file/nonexistent')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(mockFileHandler.get).not.toHaveBeenCalled(); + }); + + it('should return 500 when head handler throws error', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + vi.mocked(mockFileHandler.head).mockRejectedValue(new Error('Failed to get metadata')); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should return 404 when head handler returns false', async () => { + prisma.file.findFirst.mockResolvedValue(mockFile); + vi.mocked(mockFileHandler.head).mockResolvedValue(false); + + const response = await fastify.inject({ + method: 'HEAD', + url: `/file/${encodeURIComponent('http://example.com/file/test.wav')}`, + }); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + + expect(response.statusCode).toBe(404); + expect(body.error.code).toBe('NOT_FOUND'); + expect(body.error.message).toBe('The requested file metadata was not found'); + }); + }); +}); diff --git a/src/routes/file.ts b/src/routes/file.ts new file mode 100644 index 0000000..646301c --- /dev/null +++ b/src/routes/file.ts @@ -0,0 +1,146 @@ +import { createReadStream } from 'node:fs'; +import type { FastifyPluginAsync, FastifyReply } from 'fastify'; +import type { ZodTypeProvider } from 'fastify-type-provider-zod'; +import { z } from 'zod/v4'; +import type { FileHandler, FileMetadata } from '../types/fileHandlers.js'; +import { createInternalError, createNotFoundError } from '../utils/errors.js'; + +const paramsSchema = z.object({ + id: z.string().regex(/^https?:\/\/.+/, 'Invalid URI format'), +}); + +const querySchema = z.object({ + disposition: z.enum(['inline', 'attachment']).optional().default('inline'), + filename: z.string().optional(), + noRedirect: z.coerce.boolean().optional().default(false), +}); + +type FileRouteOptions = { + fileHandler: FileHandler; +}; + +const setFileHeaders = ( + reply: FastifyReply, + metadata: { contentType: string; contentLength: number; etag?: string; lastModified?: Date }, +) => { + reply.header('Content-Type', metadata.contentType); + reply.header('Content-Length', metadata.contentLength.toString()); + + if (metadata.etag) { + reply.header('ETag', metadata.etag); + } + if (metadata.lastModified) { + reply.header('Last-Modified', metadata.lastModified.toUTCString()); + } +}; + +const file: FastifyPluginAsync = async (fastify, opts) => { + const { fileHandler } = opts; + + fastify.withTypeProvider().head( + '/file/:id', + { + schema: { + params: paramsSchema, + }, + }, + async (request, reply) => { + const { id } = request.params; + + try { + const file = await fastify.prisma.file.findFirst({ + where: { + fileId: id, + }, + }); + + if (!file) { + return reply.code(404).send(createNotFoundError('The requested file was not found', id)); + } + + const metadata: FileMetadata | false = await fileHandler.head(file, { + request, + fastify, + }); + + if (!metadata) { + return reply.code(404).send(createNotFoundError('The requested file metadata was not found', id)); + } + + setFileHeaders(reply, metadata); + + return reply.code(200).send(); + } catch (error) { + fastify.log.error('File metadata retrieval error:', error); + return reply.code(500).send(createInternalError()); + } + }, + ); + + fastify.withTypeProvider().get( + '/file/:id', + { + schema: { + params: paramsSchema, + querystring: querySchema, + }, + }, + async (request, reply) => { + const { id } = request.params; + + try { + const file = await fastify.prisma.file.findFirst({ + where: { + fileId: id, + }, + }); + + if (!file) { + return reply.code(404).send(createNotFoundError('The requested file was not found', id)); + } + + const result = await fileHandler.get(file, { + request, + fastify, + }); + + if (!result) { + return reply.code(404).send(createNotFoundError('The requested file could not be retrieved', id)); + } + + if (result.type === 'redirect') { + if (request.query.noRedirect) { + return reply.code(200).send({ location: result.url }); + } + + return reply.code(302).redirect(result.url); + } + + const { disposition, filename: customFilename } = request.query; + const filename = customFilename || file.filename; + setFileHeaders(reply, result.metadata); + reply.header('Content-Disposition', `${disposition}; filename="${filename}"`); + + // Handle stream response + if (result.type === 'stream') { + return reply.code(200).send(result.stream); + } + + if (result.type === 'file') { + if (result.accelPath) { + reply.header('X-Accel-Redirect', result.accelPath); + return reply.code(200).send(); + } + + const stream = createReadStream(result.path); + return reply.code(200).send(stream); + } + } catch (error) { + fastify.log.error('File retrieval error:', error); + return reply.code(500).send(createInternalError()); + } + }, + ); +}; + +export default file; diff --git a/src/routes/files.test.ts b/src/routes/files.test.ts new file mode 100644 index 0000000..a893ccf --- /dev/null +++ b/src/routes/files.test.ts @@ -0,0 +1,303 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { FileAccessTransformer, FileTransformer } from '../app.js'; +import { fastify, fastifyAfter, fastifyBefore, prisma } from '../test/helpers/fastify.js'; +import { AllPublicFileAccessTransformer } from '../transformers/default.js'; +import type { StandardErrorResponse } from '../utils/errors.js'; +import filesRoute from './files.js'; + +describe('Files Route', () => { + beforeEach(async () => { + await fastifyBefore(); + await fastify.register(filesRoute, { fileAccessTransformer: AllPublicFileAccessTransformer }); + }); + + afterEach(async () => { + await fastifyAfter(); + }); + + const mockFile1 = { + id: 1, + fileId: 'http://example.com/file1.wav', + filename: 'file1.wav', + mediaType: 'audio/wav', + size: BigInt(1024), + memberOf: 'http://example.com/collection/1', + rootCollection: 'http://example.com/collection/1', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + meta: {}, + createdAt: new Date('2025-01-01'), + updatedAt: new Date('2025-01-01'), + }; + + const mockFile2 = { + id: 2, + fileId: 'http://example.com/file2.txt', + filename: 'file2.txt', + mediaType: 'text/plain', + size: BigInt(512), + memberOf: 'http://example.com/collection/1', + rootCollection: 'http://example.com/collection/1', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + meta: {}, + createdAt: new Date('2025-01-02'), + updatedAt: new Date('2025-01-02'), + }; + + describe('GET /files', () => { + it('should list all files successfully', async () => { + prisma.file.findMany.mockResolvedValue([mockFile1, mockFile2]); + prisma.file.count.mockResolvedValue(2); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { total: number; files: unknown[] }; + expect(body.total).toBe(2); + expect(body.files).toHaveLength(2); + expect(body.files[0]).toMatchObject({ + id: 'http://example.com/file1.wav', + filename: 'file1.wav', + mediaType: 'audio/wav', + size: 1024, + memberOf: 'http://example.com/collection/1', + rootCollection: 'http://example.com/collection/1', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + access: { + content: true, + }, + }); + expect(body.files[0]).not.toHaveProperty('metadataLicenseId'); + }); + + it('should filter files by memberOf', async () => { + prisma.file.findMany.mockResolvedValue([mockFile1]); + prisma.file.count.mockResolvedValue(1); + + const response = await fastify.inject({ + method: 'GET', + url: '/files?memberOf=http://example.com/collection/1', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { total: number }; + expect(body.total).toBe(1); + expect(prisma.file.findMany).toHaveBeenCalledWith( + expect.objectContaining({ + where: { memberOf: 'http://example.com/collection/1' }, + }), + ); + }); + + it('should support pagination with limit and offset', async () => { + prisma.file.findMany.mockResolvedValue([mockFile2]); + prisma.file.count.mockResolvedValue(2); + + const response = await fastify.inject({ + method: 'GET', + url: '/files?limit=1&offset=1', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { total: number; files: unknown[] }; + expect(body.total).toBe(2); + expect(body.files).toHaveLength(1); + expect(prisma.file.findMany).toHaveBeenCalledWith( + expect.objectContaining({ + skip: 1, + take: 1, + }), + ); + }); + + it('should support sorting by filename ascending', async () => { + prisma.file.findMany.mockResolvedValue([mockFile1, mockFile2]); + prisma.file.count.mockResolvedValue(2); + + const response = await fastify.inject({ + method: 'GET', + url: '/files?sort=filename&order=asc', + }); + + expect(response.statusCode).toBe(200); + expect(prisma.file.findMany).toHaveBeenCalledWith( + expect.objectContaining({ + orderBy: { filename: 'asc' }, + }), + ); + }); + + it('should support sorting by id (maps to fileId)', async () => { + prisma.file.findMany.mockResolvedValue([mockFile1, mockFile2]); + prisma.file.count.mockResolvedValue(2); + + const response = await fastify.inject({ + method: 'GET', + url: '/files?sort=id&order=desc', + }); + + expect(response.statusCode).toBe(200); + expect(prisma.file.findMany).toHaveBeenCalledWith( + expect.objectContaining({ + orderBy: { fileId: 'desc' }, + }), + ); + }); + + it('should return empty list when no files found', async () => { + prisma.file.findMany.mockResolvedValue([]); + prisma.file.count.mockResolvedValue(0); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { total: number; files: unknown[] }; + expect(body.total).toBe(0); + expect(body.files).toHaveLength(0); + }); + + it('should not include metadataLicenseId in response', async () => { + prisma.file.findMany.mockResolvedValue([mockFile1]); + prisma.file.count.mockResolvedValue(1); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { files: Array<{ memberOf: string }> }; + expect(body.files[0]).toMatchObject({ + id: 'http://example.com/file1.wav', + filename: 'file1.wav', + memberOf: 'http://example.com/collection/1', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + }); + expect(body.files[0]).not.toHaveProperty('metadataLicenseId'); + }); + + it('should return 500 when database error occurs', async () => { + prisma.file.findMany.mockRejectedValue(new Error('Database connection failed')); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + const body = JSON.parse(response.body) as { error: { code: string } }; + + expect(response.statusCode).toBe(500); + expect(body.error.code).toBe('INTERNAL_ERROR'); + }); + + it('should validate limit parameter', async () => { + const response = await fastify.inject({ + method: 'GET', + url: '/files?limit=2000', + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + + it('should validate memberOf parameter format', async () => { + const response = await fastify.inject({ + method: 'GET', + url: '/files?memberOf=invalid-url', + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + + it('should validate sort parameter', async () => { + const response = await fastify.inject({ + method: 'GET', + url: '/files?sort=invalid', + }); + const body = JSON.parse(response.body) as StandardErrorResponse; + + expect(response.statusCode).toBe(400); + expect(body.error).toBe('Bad Request'); + }); + }); + + describe('Custom File Transformers', () => { + it('should apply custom file transformers', async () => { + await fastifyBefore(); + + // Custom transformer that adds extra fields + const customTransformer: FileTransformer = (file) => ({ + ...file, + customField: 'test-value', + uppercaseFilename: file.filename.toUpperCase(), + }); + + await fastify.register(filesRoute, { + fileAccessTransformer: AllPublicFileAccessTransformer, + fileTransformers: [customTransformer], + }); + + prisma.file.findMany.mockResolvedValue([mockFile1]); + prisma.file.count.mockResolvedValue(1); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { files: unknown[] }; + expect(body.files[0]).toMatchObject({ + id: 'http://example.com/file1.wav', + filename: 'file1.wav', + customField: 'test-value', + uppercaseFilename: 'FILE1.WAV', + }); + + await fastifyAfter(); + }); + + it('should apply custom file access transformer', async () => { + await fastifyBefore(); + + // Custom access transformer that restricts content access + const customFileAccessTransformer: FileAccessTransformer = (file) => ({ + ...file, + access: { + content: false, + contentAuthorizationUrl: 'https://example.com/request-access', + }, + }); + + await fastify.register(filesRoute, { + fileAccessTransformer: customFileAccessTransformer, + }); + + prisma.file.findMany.mockResolvedValue([mockFile1]); + prisma.file.count.mockResolvedValue(1); + + const response = await fastify.inject({ + method: 'GET', + url: '/files', + }); + + expect(response.statusCode).toBe(200); + const body = JSON.parse(response.body) as { files: unknown[] }; + expect(body.files[0]).toMatchObject({ + access: { + content: false, + contentAuthorizationUrl: 'https://example.com/request-access', + }, + }); + + await fastifyAfter(); + }); + }); +}); diff --git a/src/routes/files.ts b/src/routes/files.ts new file mode 100644 index 0000000..5c7bda1 --- /dev/null +++ b/src/routes/files.ts @@ -0,0 +1,87 @@ +import type { FastifyPluginAsync } from 'fastify'; +import type { ZodTypeProvider } from 'fastify-type-provider-zod'; +import { z } from 'zod/v4'; +import { baseFileTransformer } from '../transformers/default.js'; +import type { FileAccessTransformer, FileTransformer } from '../types/transformers.js'; +import { createInternalError } from '../utils/errors.js'; + +const querySchema = z.object({ + memberOf: z.url().optional(), + limit: z.coerce.number().int().min(1).max(1000).default(100), + offset: z.coerce.number().int().min(0).default(0), + sort: z.enum(['id', 'filename', 'createdAt', 'updatedAt']).default('id'), + order: z.enum(['asc', 'desc']).default('asc'), +}); + +type FilesRouteOptions = { + fileAccessTransformer: FileAccessTransformer; + fileTransformers?: FileTransformer[]; +}; + +const files: FastifyPluginAsync = async (fastify, opts) => { + const { fileAccessTransformer, fileTransformers = [] } = opts; + + fastify.withTypeProvider().get( + '/files', + { + schema: { + querystring: querySchema, + }, + }, + async (request, reply) => { + const { memberOf, limit, offset, sort, order } = request.query; + + try { + const where: NonNullable[0]>['where'] = {}; + + if (memberOf) { + where.memberOf = memberOf; + } + + const sortField = sort === 'id' ? 'fileId' : sort; + + const dbFiles = await fastify.prisma.file.findMany({ + where, + orderBy: { + [sortField]: order, + }, + skip: offset, + take: limit, + }); + + const total = await fastify.prisma.file.count({ where }); + + // Apply transformers to each entity: base -> access -> additional + const filesWithAccess = await Promise.all( + dbFiles.map(async (dbFile) => { + const standardFile = baseFileTransformer(dbFile); + const authorisedFile = await fileAccessTransformer(standardFile, { + request, + fastify, + }); + + let result = authorisedFile; + for (const transformer of fileTransformers) { + result = await transformer(result, { + request, + fastify, + }); + } + + return result; + }), + ); + + return { + total, + files: filesWithAccess, + }; + } catch (error) { + fastify.log.error('Database error:', error); + return reply.code(500).send(createInternalError()); + } + }, + ); +}; + +export default files; diff --git a/src/routes/search.test.ts b/src/routes/search.test.ts index 9712004..0c18069 100644 --- a/src/routes/search.test.ts +++ b/src/routes/search.test.ts @@ -537,6 +537,31 @@ describe('Search Route', () => { expect(body).toMatchSnapshot(); }); + it('should handle invalid search response with missing hits data', async () => { + const mockSearchResponse = { + body: { + took: 5, + // Missing hits object + }, + }; + + // @ts-expect-error TS is looking at the wrong function signature + opensearch.search.mockResolvedValue(mockSearchResponse); + + const response = await fastify.inject({ + method: 'POST', + url: '/search', + payload: { + query: 'test', + }, + }); + + expect(response.statusCode).toBe(500); + const body = JSON.parse(response.body) as { error: { code: string; message: string } }; + expect(body.error.code).toBe('INTERNAL_ERROR'); + expect(body.error.message).toBe('Search failed'); + }); + it('should apply custom entity transformers', async () => { // biome-ignore lint/suspicious/noExplicitAny: fine in tests const customTransformer = async (entity: any) => ({ diff --git a/src/routes/search.ts b/src/routes/search.ts index c69ccd3..e640b7f 100644 --- a/src/routes/search.ts +++ b/src/routes/search.ts @@ -201,6 +201,10 @@ const search: FastifyPluginAsync = async (fastify, opts) => const response = await fastify.opensearch.search(opensearchQuery); + if (!response.body?.hits?.hits) { + throw new Error('Invalid search response: missing hits data'); + } + const rocrateIds = response.body.hits.hits .map((hit) => hit._source?.rocrateId as string | undefined) .filter(Boolean); diff --git a/src/test/integration.setup.ts b/src/test/integration.setup.ts index 367fae6..ffb6d2f 100644 --- a/src/test/integration.setup.ts +++ b/src/test/integration.setup.ts @@ -1,7 +1,7 @@ import { Client } from '@opensearch-project/opensearch'; import type { FastifyInstance } from 'fastify'; import Fastify from 'fastify'; -import app, { AllPublicAccessTransformer } from '../app.js'; +import app, { AllPublicAccessTransformer, AllPublicFileAccessTransformer } from '../app.js'; import { PrismaClient } from '../generated/prisma/client.js'; let fastify: FastifyInstance; @@ -34,6 +34,23 @@ export async function setupIntegrationTests() { opensearch, disableCors: true, accessTransformer: AllPublicAccessTransformer, + fileAccessTransformer: AllPublicFileAccessTransformer, + fileHandler: { + get: async () => { + throw new Error('File handler not implemented in integration tests'); + }, + head: async () => { + throw new Error('File handler not implemented in integration tests'); + }, + }, + roCrateHandler: { + get: async () => { + throw new Error('RO-Crate handler not implemented in integration tests'); + }, + head: async () => { + throw new Error('RO-Crate head handler not implemented in integration tests'); + }, + }, }); await fastify.ready(); @@ -102,6 +119,42 @@ export async function seedTestData() { updatedAt: new Date(), rocrate: {}, }, + { + id: 4, + rocrateId: 'http://example.com/entity/4', + name: 'test-audio.wav', + description: 'Test audio file', + entityType: 'http://schema.org/MediaObject', + memberOf: 'http://example.com/entity/2', + rootCollection: 'http://example.com/entity/1', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: { + '@type': ['http://schema.org/MediaObject', 'MediaObject'], + encodingFormat: 'audio/wav', + contentSize: '88200', + }, + }, + { + id: 5, + rocrateId: 'http://example.com/entity/5', + name: 'collection-metadata.csv', + description: 'Collection metadata file', + entityType: 'http://schema.org/MediaObject', + memberOf: 'http://example.com/entity/1', + rootCollection: 'http://example.com/entity/1', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: { + '@type': ['http://schema.org/MediaObject', 'MediaObject'], + encodingFormat: 'text/csv', + contentSize: '1024', + }, + }, ]; await prisma.entity.createMany({ diff --git a/src/test/integration.test.ts b/src/test/integration.test.ts index 2bcbeb2..98a5078 100644 --- a/src/test/integration.test.ts +++ b/src/test/integration.test.ts @@ -40,6 +40,23 @@ describe('Integration Tests', () => { expect(body).toMatchSnapshot(); }); + it('should return File entity from database', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'GET', + url: `/entity/${encodeURIComponent('http://example.com/entity/4')}`, + }); + const body = JSON.parse(response.body) as AuthorisedEntity; + + expect(response.statusCode).toBe(200); + expect(body.id).toBe('http://example.com/entity/4'); + expect(body.name).toBe('test-audio.wav'); + expect(body.entityType).toBe('http://schema.org/MediaObject'); + expect(body.memberOf).toBe('http://example.com/entity/2'); + expect(body.rootCollection).toBe('http://example.com/entity/1'); + }); + it('should return 404 for non-existent entity', async () => { const app = getTestApp(); @@ -65,8 +82,8 @@ describe('Integration Tests', () => { const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; expect(response.statusCode).toBe(200); - expect(body.total).toBe(3); - expect(body.entities).toHaveLength(3); + expect(body.total).toBe(5); + expect(body.entities).toHaveLength(5); expect(body.entities[0]).toEqual({ id: 'http://example.com/entity/1', name: 'Test Collection', @@ -96,8 +113,8 @@ describe('Integration Tests', () => { const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; expect(response.statusCode).toBe(200); - expect(body.total).toBe(2); - expect(body.entities).toHaveLength(2); + expect(body.total).toBe(3); + expect(body.entities).toHaveLength(3); expect(body.entities[0].id).toBe('http://example.com/entity/2'); }); @@ -119,6 +136,67 @@ describe('Integration Tests', () => { expect(body.entities[0].id).toBe('http://example.com/entity/3'); }); + it('should filter entities by File entityType', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'GET', + url: '/entities', + query: { + entityType: 'http://schema.org/MediaObject', + }, + }); + const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; + + expect(response.statusCode).toBe(200); + expect(body.total).toBe(2); + expect(body.entities).toHaveLength(2); + expect(body.entities[0].entityType).toBe('http://schema.org/MediaObject'); + expect(body.entities[1].entityType).toBe('http://schema.org/MediaObject'); + }); + + it('should filter File entities by memberOf (Object parent)', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'GET', + url: '/entities', + query: { + memberOf: 'http://example.com/entity/2', + entityType: 'http://schema.org/MediaObject', + }, + }); + const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; + + expect(response.statusCode).toBe(200); + expect(body.total).toBe(1); + expect(body.entities).toHaveLength(1); + expect(body.entities[0].id).toBe('http://example.com/entity/4'); + expect(body.entities[0].name).toBe('test-audio.wav'); + expect(body.entities[0].memberOf).toBe('http://example.com/entity/2'); + }); + + it('should filter File entities by memberOf (Collection parent)', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'GET', + url: '/entities', + query: { + memberOf: 'http://example.com/entity/1', + entityType: 'http://schema.org/MediaObject', + }, + }); + const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; + + expect(response.statusCode).toBe(200); + expect(body.total).toBe(1); + expect(body.entities).toHaveLength(1); + expect(body.entities[0].id).toBe('http://example.com/entity/5'); + expect(body.entities[0].name).toBe('collection-metadata.csv'); + expect(body.entities[0].memberOf).toBe('http://example.com/entity/1'); + }); + it('should handle pagination', async () => { const app = getTestApp(); @@ -133,7 +211,7 @@ describe('Integration Tests', () => { const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; expect(response.statusCode).toBe(200); - expect(body.total).toBe(3); + expect(body.total).toBe(5); expect(body.entities).toHaveLength(2); }); @@ -151,9 +229,9 @@ describe('Integration Tests', () => { const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; expect(response.statusCode).toBe(200); - expect(body.entities[0].name).toBe('Test Person'); - expect(body.entities[1].name).toBe('Test Object'); - expect(body.entities[2].name).toBe('Test Collection'); + expect(body.entities[0].name).toBe('test-audio.wav'); + expect(body.entities[1].name).toBe('Test Person'); + expect(body.entities[2].name).toBe('Test Object'); }); }); @@ -248,6 +326,46 @@ describe('Integration Tests', () => { expect(body.entities[1].name).toBe('Test Object'); expect(body.entities[2].name).toBe('Test Person'); }); + + it('should filter search results by File entityType', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'POST', + url: '/search', + payload: { + query: 'test', + searchType: 'basic', + filters: { + entityType: ['http://schema.org/MediaObject'], + }, + }, + }); + const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; + + expect(response.statusCode).toBe(200); + expect(body.entities.every((e) => e.entityType === 'http://schema.org/MediaObject')).toBe(true); + }); + + it('should search for File entities by name', async () => { + const app = getTestApp(); + + const response = await app.inject({ + method: 'POST', + url: '/search', + payload: { + query: 'audio', + searchType: 'basic', + }, + }); + const body = JSON.parse(response.body) as { total: number; entities: AuthorisedEntity[] }; + + expect(response.statusCode).toBe(200); + expect(body.total).toBeGreaterThan(0); + const audioFile = body.entities.find((e) => e.name === 'test-audio.wav'); + expect(audioFile).toBeDefined(); + expect(audioFile?.entityType).toBe('http://schema.org/MediaObject'); + }); }); describe('Error Handling', () => { diff --git a/src/transformers/default.test.ts b/src/transformers/default.test.ts new file mode 100644 index 0000000..c0a2a98 --- /dev/null +++ b/src/transformers/default.test.ts @@ -0,0 +1,199 @@ +import { describe, expect, it } from 'vitest'; +import type { Entity } from '../generated/prisma/client.js'; +import { AllPublicAccessTransformer, baseEntityTransformer } from './default.js'; + +describe('baseEntityTransformer', () => { + it('should transform entity to standard entity shape', () => { + const entity: Entity = { + id: 1, + rocrateId: 'http://example.com/entity/123', + name: 'Test Entity', + description: 'A test entity description', + entityType: 'http://pcdm.org/models#Collection', + fileId: null, + memberOf: 'http://example.com/parent', + rootCollection: 'http://example.com/root', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by-sa/4.0/', + createdAt: new Date('2025-01-01'), + updatedAt: new Date('2025-01-02'), + rocrate: { '@context': 'test' }, + meta: { test: 'data' }, + }; + + const result = baseEntityTransformer(entity); + + expect(result).toEqual({ + id: 'http://example.com/entity/123', + name: 'Test Entity', + description: 'A test entity description', + entityType: 'http://pcdm.org/models#Collection', + memberOf: 'http://example.com/parent', + rootCollection: 'http://example.com/root', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by-sa/4.0/', + }); + }); + + it('should handle null memberOf and rootCollection', () => { + const entity: Entity = { + id: 1, + rocrateId: 'http://example.com/collection', + name: 'Top Collection', + description: 'A top-level collection', + entityType: 'http://pcdm.org/models#Collection', + fileId: null, + memberOf: null, + rootCollection: null, + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: {}, + meta: null, + }; + + const result = baseEntityTransformer(entity); + + expect(result.memberOf).toBeNull(); + expect(result.rootCollection).toBeNull(); + }); + + it('should exclude database-specific fields', () => { + const entity: Entity = { + id: 1, + rocrateId: 'http://example.com/entity/456', + name: 'Test', + description: 'Test', + entityType: 'http://pcdm.org/models#Object', + fileId: null, + memberOf: null, + rootCollection: null, + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: { test: 'value' }, + meta: { storage: 'path' }, + }; + + const result = baseEntityTransformer(entity); + + // Result should have 'id' (mapped from rocrateId), but not the numeric database id + expect(result.id).toBe('http://example.com/entity/456'); + expect(result).not.toHaveProperty('createdAt'); + expect(result).not.toHaveProperty('updatedAt'); + expect(result).not.toHaveProperty('rocrate'); + expect(result).not.toHaveProperty('meta'); + expect(Object.keys(result)).toEqual([ + 'id', + 'name', + 'description', + 'entityType', + 'memberOf', + 'rootCollection', + 'metadataLicenseId', + 'contentLicenseId', + ]); + }); + + it('should handle File entity (MediaObject) with fileId', () => { + const entity: Entity = { + id: 1, + rocrateId: 'http://example.com/file/audio.wav', + name: 'Audio File', + description: 'An audio recording', + entityType: 'http://schema.org/MediaObject', + fileId: 'http://example.com/files/audio.wav', + memberOf: 'http://example.com/collection', + rootCollection: 'http://example.com/collection', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + createdAt: new Date(), + updatedAt: new Date(), + rocrate: {}, + meta: null, + }; + + const result = baseEntityTransformer(entity); + + expect(result).toEqual({ + id: 'http://example.com/file/audio.wav', + name: 'Audio File', + description: 'An audio recording', + entityType: 'http://schema.org/MediaObject', + fileId: 'http://example.com/files/audio.wav', + memberOf: 'http://example.com/collection', + rootCollection: 'http://example.com/collection', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + }); + expect(result.fileId).toBe('http://example.com/files/audio.wav'); + }); +}); + +describe('AllPublicAccessTransformer', () => { + it('should grant full access to metadata and content', () => { + const standardEntity = { + id: 'http://example.com/entity/123', + name: 'Test Entity', + description: 'A test entity', + entityType: 'http://schema.org/MediaObject', + memberOf: 'http://example.com/parent', + rootCollection: 'http://example.com/root', + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + }; + + const result = AllPublicAccessTransformer(standardEntity); + + expect(result).toEqual({ + ...standardEntity, + access: { + metadata: true, + content: true, + }, + }); + }); + + it('should preserve all standard entity fields', () => { + const standardEntity = { + id: 'http://example.com/entity/789', + name: 'Another Entity', + description: 'Another description', + entityType: 'http://schema.org/Person', + memberOf: null, + rootCollection: null, + metadataLicenseId: 'https://creativecommons.org/publicdomain/zero/1.0/', + contentLicenseId: 'https://creativecommons.org/publicdomain/zero/1.0/', + }; + + const result = AllPublicAccessTransformer(standardEntity); + + expect(result.id).toBe(standardEntity.id); + expect(result.name).toBe(standardEntity.name); + expect(result.description).toBe(standardEntity.description); + expect(result.entityType).toBe(standardEntity.entityType); + expect(result.memberOf).toBe(standardEntity.memberOf); + expect(result.rootCollection).toBe(standardEntity.rootCollection); + expect(result.metadataLicenseId).toBe(standardEntity.metadataLicenseId); + expect(result.contentLicenseId).toBe(standardEntity.contentLicenseId); + }); + + it('should not add contentAuthorizationUrl for public access', () => { + const standardEntity = { + id: 'http://example.com/entity/public', + name: 'Public Entity', + description: 'Fully public', + entityType: 'http://pcdm.org/models#Collection', + memberOf: null, + rootCollection: null, + metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', + }; + + const result = AllPublicAccessTransformer(standardEntity); + + expect(result.access.contentAuthorizationUrl).toBeUndefined(); + }); +}); diff --git a/src/transformers/default.ts b/src/transformers/default.ts index 6479f0b..79b5b7d 100644 --- a/src/transformers/default.ts +++ b/src/transformers/default.ts @@ -1,4 +1,4 @@ -import type { Entity } from '../generated/prisma/client.js'; +import type { Entity, File } from '../generated/prisma/client.js'; /** * Standard entity shape - output of base transformation @@ -8,12 +8,11 @@ export type StandardEntity = { id: string; name: string; description: string; - entityType: string; memberOf: string | null; rootCollection: string | null; metadataLicenseId: string; contentLicenseId: string; -}; +} & ({ entityType: string; fileId?: never } | { entityType: 'http://schema.org/MediaObject'; fileId: string }); /** * Access information for an entity @@ -32,20 +31,69 @@ export type AuthorisedEntity = StandardEntity & { access: AccessInfo; }; +/** + * Access information for a file + * Files only control content access - metadata is always accessible + */ +type FileAccessInfo = { + content: boolean; + contentAuthorizationUrl?: string; +}; + +/** + * Standard file shape - output of base file transformation + * Does not include access information + * Files only have contentLicenseId (no metadataLicenseId) + */ +export type StandardFile = { + id: string; + filename: string; + mediaType: string; + size: number; + memberOf: string; + rootCollection: string; + contentLicenseId: string; +}; + +/** + * Authorised file - includes access information + * This is the output of the file access transformer + * File metadata is always accessible - only content access is controlled + */ +export type AuthorisedFile = StandardFile & { + access: FileAccessInfo; +}; + /** * Base entity transformer - always applied first * Transforms raw database entity to standard entity shape (without access) */ -export const baseEntityTransformer = (entity: Entity): StandardEntity => ({ - id: entity.rocrateId, - name: entity.name, - description: entity.description, - entityType: entity.entityType, - memberOf: entity.memberOf, - rootCollection: entity.rootCollection, - metadataLicenseId: entity.metadataLicenseId, - contentLicenseId: entity.contentLicenseId, -}); +export const baseEntityTransformer = (entity: Entity): StandardEntity => { + const base: StandardEntity = { + id: entity.rocrateId, + name: entity.name, + description: entity.description, + entityType: entity.entityType, + memberOf: entity.memberOf, + rootCollection: entity.rootCollection, + metadataLicenseId: entity.metadataLicenseId, + contentLicenseId: entity.contentLicenseId, + }; + + if (base.entityType === ('http://schema.org/MediaObject' as const)) { + if (!entity.fileId) { + return base; + } + + return { + ...base, + entityType: base.entityType, + fileId: entity.fileId, + }; + } + + return base; +}; /** * All Public Access Transformer - grants full access to metadata and content @@ -72,3 +120,46 @@ export const AllPublicAccessTransformer = (entity: StandardEntity): AuthorisedEn content: true, }, }); + +/** + * Base file transformer - always applied first + * Transforms raw database file to standard file shape (without access) + */ +export const baseFileTransformer = (file: File): StandardFile => ({ + id: file.fileId, + filename: file.filename, + mediaType: file.mediaType, + size: Number(file.size), + memberOf: file.memberOf, + rootCollection: file.rootCollection, + contentLicenseId: file.contentLicenseId, +}); + +/** + * All Public File Access Transformer - grants full access to file content + * + * WARNING: This transformer makes ALL file content publicly accessible without restrictions. + * Only use this for fully public datasets where no access control is needed. + * + * For repositories with restricted content, implement a custom fileAccessTransformer + * that checks user permissions and licenses. + * + * Note: File metadata (filename, size, mediaType, etc.) is always accessible. + * This transformer only controls content access. + * + * @example + * ```typescript + * await server.register(arocapi, { + * prisma, + * opensearch, + * accessTransformer: AllPublicAccessTransformer, + * fileAccessTransformer: AllPublicFileAccessTransformer, // Explicit choice for public data + * }); + * ``` + */ +export const AllPublicFileAccessTransformer = (file: StandardFile): AuthorisedFile => ({ + ...file, + access: { + content: true, + }, +}); diff --git a/src/transformers/transformer.test.ts b/src/transformers/transformer.test.ts index 7b349b8..8f65d2c 100644 --- a/src/transformers/transformer.test.ts +++ b/src/transformers/transformer.test.ts @@ -41,11 +41,13 @@ describe('Entity Transformers', () => { name: 'Test Entity', description: 'A test entity', entityType: 'http://schema.org/Person', + fileId: null, memberOf: 'http://example.com/collection', rootCollection: 'http://example.com/root', metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', rocrate: {}, + meta: {}, createdAt: new Date(), updatedAt: new Date(), }; @@ -186,11 +188,13 @@ describe('Entity Transformers', () => { name: 'Test Entity', description: 'A test entity', entityType: 'http://schema.org/Person', + fileId: null, memberOf: 'http://example.com/collection', rootCollection: 'http://example.com/root', metadataLicenseId: 'https://creativecommons.org/licenses/by/4.0/', contentLicenseId: 'https://creativecommons.org/licenses/by/4.0/', rocrate: {}, + meta: {}, createdAt: new Date(), updatedAt: new Date(), }; diff --git a/src/types/fileHandlers.ts b/src/types/fileHandlers.ts new file mode 100644 index 0000000..3958021 --- /dev/null +++ b/src/types/fileHandlers.ts @@ -0,0 +1,65 @@ +import type { Readable } from 'node:stream'; +import type { FastifyInstance, FastifyRequest } from 'fastify'; +import type { Entity, File } from '../generated/prisma/client.js'; + +export type FileHandlerContext = { + request: FastifyRequest; + fastify: FastifyInstance; +}; + +export type FileMetadata = { + contentType: string; + contentLength: number; + etag?: string; + lastModified?: Date; +}; + +export type FileRedirectResult = { + type: 'redirect'; + url: string; +}; + +export type FileStreamResult = { + type: 'stream'; + stream: Readable; + metadata: FileMetadata; +}; + +export type FilePathResult = { + type: 'file'; + path: string; // Absolute file path on disk + metadata: FileMetadata; + accelPath?: string; // Optional nginx internal path for X-Accel-Redirect +}; + +export type FileResult = FileRedirectResult | FileStreamResult | FilePathResult; + +export type GetFileHandler = ( + file: File, + context: FileHandlerContext, +) => Promise | FileResult | false; + +export type HeadFileHandler = ( + file: File, + context: FileHandlerContext, +) => Promise | FileMetadata | false; + +export type FileHandler = { + get: GetFileHandler; + head: HeadFileHandler; +}; + +export type GetRoCrateHandler = ( + entity: Entity, + context: FileHandlerContext, +) => Promise | FileResult | false; + +export type HeadRoCrateHandler = ( + entity: Entity, + context: FileHandlerContext, +) => Promise | FileMetadata | false; + +export type RoCrateHandler = { + get: GetRoCrateHandler; + head: HeadRoCrateHandler; +}; diff --git a/src/types/transformers.ts b/src/types/transformers.ts index dff46b9..32570d2 100644 --- a/src/types/transformers.ts +++ b/src/types/transformers.ts @@ -1,5 +1,5 @@ import type { FastifyInstance, FastifyRequest } from 'fastify'; -import type { AuthorisedEntity, StandardEntity } from '../transformers/default.js'; +import type { AuthorisedEntity, AuthorisedFile, StandardEntity, StandardFile } from '../transformers/default.js'; /** * Context provided to entity transformers @@ -28,3 +28,25 @@ export type EntityTransformer = ( entity: TInput, context: TransformerContext, ) => Promise | TOutput; + +/** + * File access transformer function - required + * Transforms StandardFile to AuthorisedFile by adding access information + * File metadata (filename, size, mediaType, etc.) is always accessible + * Only content access is controlled (access.content) + */ +export type FileAccessTransformer = ( + file: StandardFile, + context: TransformerContext, +) => Promise | AuthorisedFile; + +/** + * File transformer function + * Receives AuthorisedFile and transforms it further + * Transformers are applied as a pipeline, with each transformer receiving + * the output of the previous one + */ +export type FileTransformer = ( + file: TInput, + context: TransformerContext, +) => Promise | TOutput; diff --git a/src/utils/errors.ts b/src/utils/errors.ts index cf71a3a..4a73ba7 100644 --- a/src/utils/errors.ts +++ b/src/utils/errors.ts @@ -4,6 +4,7 @@ const ERROR_CODES = { RATE_LIMIT_EXCEEDED: 'RATE_LIMIT_EXCEEDED', INTERNAL_ERROR: 'INTERNAL_ERROR', INVALID_REQUEST: 'INVALID_REQUEST', + INVALID_ENTITY_TYPE: 'INVALID_ENTITY_TYPE', } as const; type ErrorCode = (typeof ERROR_CODES)[keyof typeof ERROR_CODES]; diff --git a/vitest.config.ts b/vitest.config.ts index f4e90ab..755b7fc 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -23,11 +23,16 @@ export default defineConfig({ // Not part of library 'src/index.ts', + 'src/index.dev.ts', // Only types 'src/types/*', 'src/reset.d.ts', + // Test helpers and setup + 'src/test/integration.setup.ts', + 'src/test/helpers/*', + // TODO 'src/express.ts', ],