Skip to content

Commit d4819b2

Browse files
lethemanhlethemanh
authored andcommitted
feat: Skip extract file content with txt or markdown file ✨
1 parent f66279b commit d4819b2

File tree

9 files changed

+109
-43
lines changed

9 files changed

+109
-43
lines changed

packages/cozy-viewer/src/Panel/AI/AIAssistantPanel.jsx

Lines changed: 30 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { useLocation, useNavigate } from 'react-router-dom'
55
import { useI18n } from 'twake-i18n'
66

77
import { useClient } from 'cozy-client'
8-
import { extractText, chatCompletion } from 'cozy-client/dist/models/ai'
8+
import { chatCompletion } from 'cozy-client/dist/models/ai'
99
import { fetchBlobFileById } from 'cozy-client/dist/models/file'
1010
import flag from 'cozy-flags'
1111
import logger from 'cozy-logger'
@@ -21,9 +21,9 @@ import Stack from 'cozy-ui/transpiled/react/Stack'
2121
import Typography from 'cozy-ui/transpiled/react/Typography'
2222
import { useAlert } from 'cozy-ui/transpiled/react/providers/Alert'
2323

24+
import { extractFileContent, validateContentSize } from './helpers'
2425
import { SUMMARY_SYSTEM_PROMPT, getSummaryUserPrompt } from './prompts'
2526
import styles from './styles.styl'
26-
import { roughTokensEstimation } from '../../helpers'
2727
import { useViewer } from '../../providers/ViewerProvider'
2828

2929
const AIAssistantPanel = ({ className }) => {
@@ -52,22 +52,10 @@ const AIAssistantPanel = ({ className }) => {
5252
const summarizeFile = async ({ client, file, stream = false, model }) => {
5353
try {
5454
const fileBlob = await fetchBlobFileById(client, file?._id)
55+
const textContent = await extractFileContent(client, fileBlob, file)
5556

56-
const rawTextContent = await extractText(client, fileBlob, {
57-
name: file.name,
58-
mime: file.mime
59-
})
60-
const textContent = rawTextContent ? JSON.stringify(rawTextContent) : ''
61-
62-
const summaryConfig = flag('drive.summary')
63-
if (
64-
summaryConfig?.maxTokens &&
65-
roughTokensEstimation(textContent) > summaryConfig.maxTokens
66-
) {
67-
const error = new Error('DOCUMENT_TOO_LARGE')
68-
error.code = 'DOCUMENT_TOO_LARGE'
69-
throw error
70-
}
57+
const { maxTokens } = flag('drive.summary') ?? {}
58+
validateContentSize(textContent, maxTokens)
7159

7260
const messages = [
7361
{ role: 'system', content: SUMMARY_SYSTEM_PROMPT },
@@ -89,23 +77,22 @@ const AIAssistantPanel = ({ className }) => {
8977
}
9078
}
9179

92-
const persistedSummary = async (
93-
fileMetadata,
94-
targetFileId,
95-
summaryContent
96-
) => {
97-
try {
98-
await client
99-
.collection('io.cozy.files')
100-
.updateMetadataAttribute(targetFileId, {
101-
...fileMetadata,
102-
description: summaryContent
103-
})
104-
fetchedFileIdRef.current = targetFileId
105-
} catch (error) {
106-
logger.error('Error when persisting summary to file metadata:', error)
107-
}
108-
}
80+
const persistedSummary = useCallback(
81+
async (fileMetadata, targetFileId, summaryContent) => {
82+
try {
83+
await client
84+
.collection('io.cozy.files')
85+
.updateMetadataAttribute(targetFileId, {
86+
...fileMetadata,
87+
description: summaryContent
88+
})
89+
fetchedFileIdRef.current = targetFileId
90+
} catch (error) {
91+
logger.error('Error when persisting summary to file metadata:', error)
92+
}
93+
},
94+
[client]
95+
)
10996

11097
useEffect(() => {
11198
activeFileIdRef.current = file?._id || null
@@ -142,10 +129,14 @@ const AIAssistantPanel = ({ className }) => {
142129
await persistedSummary(fileMetadata, targetFileId, summaryContent)
143130
} catch (err) {
144131
if (activeFileIdRef.current === targetFileId) {
145-
const errorMessage =
146-
err.code === 'DOCUMENT_TOO_LARGE'
147-
? t('Viewer.ai.error.documentTooLarge')
148-
: t('Viewer.ai.error.summary')
132+
let errorMessage = t('Viewer.ai.error.summary')
133+
134+
if (err.code === 'DOCUMENT_TOO_LARGE') {
135+
errorMessage = t('Viewer.ai.error.documentTooLarge')
136+
} else if (err.code === 'CONTENT_EXTRACTION_FAILED') {
137+
errorMessage = t('Viewer.ai.error.extractContent')
138+
}
139+
149140
setError(errorMessage)
150141
}
151142
} finally {
@@ -157,7 +148,7 @@ const AIAssistantPanel = ({ className }) => {
157148
}
158149
}
159150
},
160-
[client, file, t]
151+
[client, file, persistedSummary, t]
161152
)
162153

163154
const handleRefresh = () => {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { extractText } from 'cozy-client/dist/models/ai'
2+
3+
import { roughTokensEstimation, isTextMimeType } from '../../helpers'
4+
import { ContentExtractionError } from '../../helpers/ContentExtractionError'
5+
import { DocumentTooLargeError } from '../../helpers/DocumentTooLargeError'
6+
7+
/**
8+
* Extracts content from a file blob
9+
* For text-based files, reads content directly. For other files, uses AI extraction.
10+
* @param {object} client - Cozy client instance
11+
* @param {Blob} fileBlob - File blob to extract content from
12+
* @param {object} file - File metadata object with mime type and name
13+
* @returns {Promise<string>} JSON stringified content
14+
* @throws {ContentExtractionError} If content extraction fails or returns empty content
15+
*/
16+
export const extractFileContent = async (client, fileBlob, file) => {
17+
let content
18+
19+
if (isTextMimeType(file.mime)) {
20+
content = await fileBlob.text()
21+
} else {
22+
content = await extractText(client, fileBlob, {
23+
name: file.name,
24+
mime: file.mime
25+
})
26+
}
27+
28+
if (!content || content.trim().length === 0) {
29+
throw new ContentExtractionError()
30+
}
31+
32+
return JSON.stringify(content)
33+
}
34+
35+
/**
36+
* Validates that content size does not exceed the maximum token limit
37+
* @param {string} textContent - Content to validate
38+
* @param {number} maxTokens - Maximum number of tokens allowed
39+
* @throws {DocumentTooLargeError} If content exceeds the token limit
40+
*/
41+
export const validateContentSize = (textContent, maxTokens) => {
42+
if (!maxTokens) return
43+
44+
const tokens = roughTokensEstimation(textContent)
45+
46+
if (tokens > maxTokens) {
47+
throw new DocumentTooLargeError()
48+
}
49+
}

packages/cozy-viewer/src/helpers.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,14 @@ export const roughTokensEstimation = text => {
8484
return Math.ceil(text.length / 4)
8585
}
8686

87+
/**
88+
* Check if a file is a text-based file type
89+
* @param {string} mime - MIME type of the file
90+
* @returns {boolean} Whether the file is a text file
91+
*/
92+
export const isTextMimeType = mime =>
93+
typeof mime === 'string' && mime.toLowerCase().startsWith('text/')
94+
8795
/**
8896
* Check if a file is compatible with AI summary feature
8997
* Compatible file types are defined in the drive.summary flag
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
export class ContentExtractionError extends Error {
2+
constructor() {
3+
super('CONTENT_EXTRACTION_FAILED')
4+
this.code = 'CONTENT_EXTRACTION_FAILED'
5+
this.name = 'ContentExtractionError'
6+
}
7+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
export class DocumentTooLargeError extends Error {
2+
constructor() {
3+
super('DOCUMENT_TOO_LARGE')
4+
this.code = 'DOCUMENT_TOO_LARGE'
5+
this.name = 'DocumentTooLargeError'
6+
}
7+
}

packages/cozy-viewer/src/locales/en.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@
8686
"bodyText": "Summary",
8787
"error": {
8888
"summary": "Failed to generate summary. Please try again.",
89-
"documentTooLarge": "This document is too large to summarize. Please try with a shorter document."
89+
"documentTooLarge": "This document is too large to summarize. Please try with a shorter document.",
90+
"extractContent": "Failed to extract content from file"
9091
},
9192
"copied": "Summary copied to clipboard",
9293
"footerText": "This content is generated by AI and may contain errors.",

packages/cozy-viewer/src/locales/fr.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@
8686
"bodyText": "Résumé",
8787
"error": {
8888
"summary": "Échec de la génération du résumé. Veuillez réessayer.",
89-
"documentTooLarge": "Ce document est trop volumineux pour être résumé. Veuillez essayer avec un document plus court."
89+
"documentTooLarge": "Ce document est trop volumineux pour être résumé. Veuillez essayer avec un document plus court.",
90+
"extractContent": "Échec de l'extraction du contenu du fichier"
9091
},
9192
"copied": "Résumé copié dans le presse-papier",
9293
"footerText": "Ce contenu est généré par AI et peut contenir des erreurs.",

packages/cozy-viewer/src/locales/ru.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@
8686
"bodyText": "Резюме",
8787
"error": {
8888
"summary": "Не удалось создать резюме. Пожалуйста, попробуйте снова.",
89-
"documentTooLarge": "Этот документ слишком большой для резюмирования. Пожалуйста, попробуйте с более коротким документом."
89+
"documentTooLarge": "Этот документ слишком большой для резюмирования. Пожалуйста, попробуйте с более коротким документом.",
90+
"extractContent": "Не удалось извлечь содержимое из файла"
9091
},
9192
"copied": "Резюме скопировано в буфер обмена",
9293
"footerText": "Этот контент создан AI и может содержать ошибки.",

packages/cozy-viewer/src/locales/vi.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@
8686
"bodyText": "Tóm tắt",
8787
"error": {
8888
"summary": "Không thể tạo tóm tắt. Vui lòng thử lại.",
89-
"documentTooLarge": "Tài liệu này quá lớn để tóm tắt. Vui lòng thử với tài liệu ngắn hơn."
89+
"documentTooLarge": "Tài liệu này quá lớn để tóm tắt. Vui lòng thử với tài liệu ngắn hơn.",
90+
"extractContent": "Không thể trích xuất nội dung từ tệp"
9091
},
9192
"copied": "Đã sao chép tóm tắt vào khay nhớ tạm",
9293
"footerText": "Nội dung này được tạo bởi AI và có thể chứa lỗi.",

0 commit comments

Comments
 (0)