-
Notifications
You must be signed in to change notification settings - Fork 3.8k
v2 file processing db schema #13328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: v2
Are you sure you want to change the base?
v2 file processing db schema #13328
Changes from all commits
3a23c7c
daafc72
a68e921
8934fbf
b7c7021
c00a413
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| import { describe, expect, it } from 'vitest' | ||
|
|
||
| import { | ||
| FeatureCapabilitySchema, | ||
| FileProcessorOverrideSchema, | ||
| FileProcessorTemplateSchema, | ||
| FileProcessorTemplatesSchema, | ||
| PRESETS_FILE_PROCESSORS | ||
| } from '../data/presets/file-processing' | ||
| import { FILE_TYPE } from '../data/types/file' | ||
|
|
||
| describe('FeatureCapabilitySchema', () => { | ||
| it('supports multiple input types for a single capability', () => { | ||
| const result = FeatureCapabilitySchema.safeParse({ | ||
| feature: 'text_extraction', | ||
| inputs: [FILE_TYPE.IMAGE, FILE_TYPE.DOCUMENT], | ||
| output: FILE_TYPE.TEXT | ||
| }) | ||
|
|
||
| expect(result.success).toBe(true) | ||
| }) | ||
| }) | ||
|
|
||
| describe('FileProcessorTemplatesSchema', () => { | ||
| it('validates built-in presets', () => { | ||
| expect(() => FileProcessorTemplatesSchema.parse(PRESETS_FILE_PROCESSORS)).not.toThrow() | ||
| }) | ||
|
|
||
| it('rejects duplicate features in a single processor template', () => { | ||
| const result = FileProcessorTemplateSchema.safeParse({ | ||
| id: 'paddleocr', | ||
| type: 'api', | ||
| capabilities: [ | ||
| { | ||
| feature: 'text_extraction', | ||
| inputs: [FILE_TYPE.IMAGE], | ||
| output: FILE_TYPE.TEXT | ||
| }, | ||
| { | ||
| feature: 'text_extraction', | ||
| inputs: [FILE_TYPE.DOCUMENT], | ||
| output: FILE_TYPE.TEXT | ||
| } | ||
| ] | ||
| }) | ||
|
|
||
| expect(result.success).toBe(false) | ||
| }) | ||
| }) | ||
|
|
||
| describe('FileProcessorOverrideSchema', () => { | ||
| it('accepts valid overrides', () => { | ||
| const result = FileProcessorOverrideSchema.safeParse({ | ||
| apiKeys: ['test-key'], | ||
| capabilities: { | ||
| text_extraction: { | ||
| apiHost: 'https://example.com', | ||
| modelId: 'model-1' | ||
| } | ||
| }, | ||
| options: { | ||
| langs: ['eng', 'chi_sim'] | ||
| } | ||
| }) | ||
|
|
||
| expect(result.success).toBe(true) | ||
| }) | ||
|
|
||
| it('rejects invalid urls', () => { | ||
| const result = FileProcessorOverrideSchema.safeParse({ | ||
| capabilities: { | ||
| markdown_conversion: { | ||
| apiHost: 'not-a-url' | ||
| } | ||
| } | ||
| }) | ||
|
|
||
| expect(result.success).toBe(false) | ||
| }) | ||
|
|
||
| it('rejects unknown feature overrides', () => { | ||
| const result = FileProcessorOverrideSchema.safeParse({ | ||
| capabilities: { | ||
| vision: { | ||
| apiHost: 'https://example.com' | ||
| } | ||
| } | ||
| }) | ||
|
|
||
| expect(result.success).toBe(false) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| import { describe, expect, it } from 'vitest' | ||
|
|
||
| import { FILE_TYPE, FileTypeSchema } from '../data/types/file' | ||
|
|
||
| describe('FileTypeSchema', () => { | ||
| it('accepts canonical file types', () => { | ||
| expect(FileTypeSchema.safeParse(FILE_TYPE.IMAGE).success).toBe(true) | ||
| expect(FileTypeSchema.safeParse(FILE_TYPE.DOCUMENT).success).toBe(true) | ||
| expect(FileTypeSchema.safeParse(FILE_TYPE.TEXT).success).toBe(true) | ||
| }) | ||
|
|
||
| it('rejects unknown file types', () => { | ||
| expect(FileTypeSchema.safeParse('markdown').success).toBe(false) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| /** | ||
| * File Processing API Schema definitions | ||
| * | ||
| * Contains file processing endpoints for: | ||
| * - Listing available processors | ||
| * - Reading and updating processor configuration | ||
| */ | ||
|
|
||
| import type { FileProcessorId, FileProcessorOverride } from '@shared/data/preference/preferenceTypes' | ||
| import type { FileProcessorMerged } from '@shared/data/presets/file-processing' | ||
|
|
||
| // ============================================================================ | ||
| // API Schema Definitions | ||
| // ============================================================================ | ||
|
|
||
| /** | ||
| * File Processing API Schema definitions | ||
| */ | ||
| export interface FileProcessingSchemas { | ||
| /** | ||
| * List available processors | ||
| * @example GET /file-processing/processors | ||
| */ | ||
| '/file-processing/processors': { | ||
| /** Get list of available processors */ | ||
| GET: { | ||
| response: FileProcessorMerged[] | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Get or update processor configuration | ||
| * @example GET /file-processing/processors/tesseract | ||
| * @example PATCH /file-processing/processors/tesseract { "apiKeys": ["xxx"] } | ||
| */ | ||
| '/file-processing/processors/:id': { | ||
| /** Get processor configuration */ | ||
| GET: { | ||
| params: { id: FileProcessorId } | ||
| response: FileProcessorMerged | ||
| } | ||
| /** Update processor configuration */ | ||
| PATCH: { | ||
| params: { id: FileProcessorId } | ||
| body: FileProcessorOverride | ||
| response: FileProcessorMerged | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,6 +105,44 @@ export type MultiModelMessageStyle = 'horizontal' | 'vertical' | 'fold' | 'grid' | |
|
|
||
| export type MultiModelGridPopoverTrigger = 'hover' | 'click' | ||
|
|
||
| export const FILE_PROCESSOR_TYPES = ['api', 'builtin'] as const | ||
|
|
||
| export type FileProcessorType = (typeof FILE_PROCESSOR_TYPES)[number] | ||
|
|
||
| export const FILE_PROCESSOR_FEATURES = ['text_extraction', 'markdown_conversion'] as const | ||
|
|
||
| export type FileProcessorFeature = (typeof FILE_PROCESSOR_FEATURES)[number] | ||
|
|
||
| export const FILE_PROCESSOR_IDS = [ | ||
| 'tesseract', | ||
| 'system', | ||
| 'paddleocr', | ||
| 'ovocr', | ||
| 'mineru', | ||
| 'doc2x', | ||
| 'mistral', | ||
| 'open-mineru' | ||
| ] as const | ||
|
|
||
| export type FileProcessorId = (typeof FILE_PROCESSOR_IDS)[number] | ||
|
|
||
| export type FileProcessorOptions = Record<string, unknown> | ||
|
|
||
| export type CapabilityOverride = { | ||
| apiHost?: string | ||
| modelId?: string | ||
| metadata?: Record<string, unknown> | ||
| } | ||
|
|
||
| export type FileProcessorCapabilityOverrides = Partial<Record<FileProcessorFeature, CapabilityOverride>> | ||
|
|
||
| export type FileProcessorOverride = { | ||
| apiKeys?: string[] | ||
| capabilities?: FileProcessorCapabilityOverrides | ||
| options?: FileProcessorOptions | ||
| } | ||
|
|
||
| export type FileProcessorOverrides = Partial<Record<FileProcessorId, FileProcessorOverride>> | ||
|
Comment on lines
+108
to
+145
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Agreeing with @EurFelux's earlier comment — since there are already Zod schemas defined in |
||
| // ============================================================================ | ||
| // WebSearch Types | ||
| // ============================================================================ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.