Skip to content

Commit 241d9fd

Browse files
waleedlatif1waleed
authored andcommitted
improvement(kb): encode non-ASCII headers for kb uploads (#1595)
* improvement(kb): encode non-ASCII headers for kb uploads * cleanup * increase timeouts to match trigger
1 parent 97a8778 commit 241d9fd

File tree

7 files changed

+42
-20
lines changed

7 files changed

+42
-20
lines changed

apps/sim/app/api/files/serve/[...path]/route.ts

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import { readFile } from 'fs/promises'
22
import type { NextRequest } from 'next/server'
33
import { NextResponse } from 'next/server'
4+
import { checkHybridAuth } from '@/lib/auth/hybrid'
45
import { createLogger } from '@/lib/logs/console/logger'
56
import { downloadFile, getStorageProvider, isUsingCloudStorage } from '@/lib/uploads'
67
import { S3_KB_CONFIG } from '@/lib/uploads/setup'
78
import '@/lib/uploads/setup.server'
8-
import { getSession } from '@/lib/auth'
99
import {
1010
createErrorResponse,
1111
createFileResponse,
@@ -29,23 +29,19 @@ export async function GET(
2929

3030
logger.info('File serve request:', { path })
3131

32-
const session = await getSession()
33-
if (!session?.user?.id) {
34-
logger.warn('Unauthorized file access attempt', { path })
32+
const authResult = await checkHybridAuth(request, { requireWorkflowId: false })
33+
34+
if (!authResult.success) {
35+
logger.warn('Unauthorized file access attempt', { path, error: authResult.error })
3536
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
3637
}
3738

38-
const userId = session.user.id
39+
const userId = authResult.userId
3940
const fullPath = path.join('/')
4041
const isS3Path = path[0] === 's3'
4142
const isBlobPath = path[0] === 'blob'
4243
const isCloudPath = isS3Path || isBlobPath
4344
const cloudKey = isCloudPath ? path.slice(1).join('/') : fullPath
44-
const isExecutionFile = cloudKey.split('/').length >= 3 && !cloudKey.startsWith('kb/')
45-
46-
if (!isExecutionFile) {
47-
logger.info('Authenticated file access granted', { userId, path: cloudKey })
48-
}
4945

5046
if (isUsingCloudStorage() || isCloudPath) {
5147
const bucketType = request.nextUrl.searchParams.get('bucket')
@@ -64,7 +60,7 @@ export async function GET(
6460
}
6561
}
6662

67-
async function handleLocalFile(filename: string, userId: string): Promise<NextResponse> {
63+
async function handleLocalFile(filename: string, userId?: string): Promise<NextResponse> {
6864
try {
6965
const filePath = findLocalFile(filename)
7066

apps/sim/app/api/files/upload/route.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,7 @@ export async function POST(request: NextRequest) {
123123
}
124124
}
125125

126-
// Create the serve path
127-
const servePath = `/api/files/serve/${result.key}`
126+
const servePath = result.path
128127

129128
const uploadResult = {
130129
name: originalName,

apps/sim/app/api/files/utils.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,22 @@ function getSecureFileHeaders(filename: string, originalContentType: string) {
307307
}
308308
}
309309

310+
/**
311+
* Encode filename for Content-Disposition header to support non-ASCII characters
312+
* Uses RFC 5987 encoding for international characters
313+
*/
314+
function encodeFilenameForHeader(filename: string): string {
315+
const hasNonAscii = /[^\x00-\x7F]/.test(filename)
316+
317+
if (!hasNonAscii) {
318+
return `filename="${filename}"`
319+
}
320+
321+
const encodedFilename = encodeURIComponent(filename)
322+
const asciiSafe = filename.replace(/[^\x00-\x7F]/g, '_')
323+
return `filename="${asciiSafe}"; filename*=UTF-8''${encodedFilename}`
324+
}
325+
310326
/**
311327
* Create a file response with appropriate security headers
312328
*/
@@ -317,7 +333,7 @@ export function createFileResponse(file: FileResponse): NextResponse {
317333
status: 200,
318334
headers: {
319335
'Content-Type': contentType,
320-
'Content-Disposition': `${disposition}; filename="${file.filename}"`,
336+
'Content-Disposition': `${disposition}; ${encodeFilenameForHeader(file.filename)}`,
321337
'Cache-Control': 'public, max-age=31536000', // Cache for 1 year
322338
'X-Content-Type-Options': 'nosniff',
323339
'Content-Security-Policy': "default-src 'none'; style-src 'unsafe-inline'; sandbox;",

apps/sim/background/knowledge-processing.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export type DocumentProcessingPayload = {
2626

2727
export const processDocument = task({
2828
id: 'knowledge-process-document',
29-
maxDuration: env.KB_CONFIG_MAX_DURATION || 300,
29+
maxDuration: env.KB_CONFIG_MAX_DURATION || 600,
3030
retry: {
3131
maxAttempts: env.KB_CONFIG_MAX_ATTEMPTS || 3,
3232
factor: env.KB_CONFIG_RETRY_FACTOR || 2,

apps/sim/lib/env.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ export const env = createEnv({
146146
RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('1000'), // Enterprise tier async API executions per minute
147147

148148
// Knowledge Base Processing Configuration - Shared across all processing methods
149-
KB_CONFIG_MAX_DURATION: z.number().optional().default(300), // Max processing duration in s
149+
KB_CONFIG_MAX_DURATION: z.number().optional().default(600), // Max processing duration in seconds (10 minutes)
150150
KB_CONFIG_MAX_ATTEMPTS: z.number().optional().default(3), // Max retry attempts
151151
KB_CONFIG_RETRY_FACTOR: z.number().optional().default(2), // Retry backoff factor
152152
KB_CONFIG_MIN_TIMEOUT: z.number().optional().default(1000), // Min timeout in ms

apps/sim/lib/knowledge/documents/document-processor.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,9 @@ async function parseDocument(
180180
}
181181

182182
async function handleFileForOCR(fileUrl: string, filename: string, mimeType: string) {
183-
if (fileUrl.startsWith('https://')) {
183+
const isExternalHttps = fileUrl.startsWith('https://') && !fileUrl.includes('/api/files/serve/')
184+
185+
if (isExternalHttps) {
184186
return { httpsUrl: fileUrl }
185187
}
186188

@@ -207,7 +209,16 @@ async function downloadFileWithTimeout(fileUrl: string): Promise<Buffer> {
207209
const timeoutId = setTimeout(() => controller.abort(), TIMEOUTS.FILE_DOWNLOAD)
208210

209211
try {
210-
const response = await fetch(fileUrl, { signal: controller.signal })
212+
const isInternalFileServe = fileUrl.includes('/api/files/serve/')
213+
const headers: HeadersInit = {}
214+
215+
if (isInternalFileServe) {
216+
const { generateInternalToken } = await import('@/lib/auth/internal')
217+
const token = await generateInternalToken()
218+
headers.Authorization = `Bearer ${token}`
219+
}
220+
221+
const response = await fetch(fileUrl, { signal: controller.signal, headers })
211222
clearTimeout(timeoutId)
212223

213224
if (!response.ok) {

apps/sim/lib/knowledge/documents/service.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ import type { DocumentSortField, SortOrder } from './types'
1717
const logger = createLogger('DocumentService')
1818

1919
const TIMEOUTS = {
20-
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Increased to 10 minutes to match Trigger's timeout
20+
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Default 10 minutes for KB document processing
2121
EMBEDDINGS_API: (env.KB_CONFIG_MAX_TIMEOUT || 10000) * 18,
2222
} as const
2323

2424
// Configuration for handling large documents
2525
const LARGE_DOC_CONFIG = {
2626
MAX_CHUNKS_PER_BATCH: 500, // Insert embeddings in batches of 500
27-
MAX_EMBEDDING_BATCH: 50, // Generate embeddings in batches of 50
27+
MAX_EMBEDDING_BATCH: 500, // Generate embeddings in batches of 500
2828
MAX_FILE_SIZE: 100 * 1024 * 1024, // 100MB max file size
2929
MAX_CHUNKS_PER_DOCUMENT: 100000, // Maximum chunks allowed per document
3030
}

0 commit comments

Comments
 (0)