We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 523f583 commit 1802c46Copy full SHA for 1802c46
packages/indexer/src/lib/document-processor.ts
@@ -2,8 +2,8 @@ import { type BaseLogger } from 'pino';
2
import { getBlobNameFromFile } from './blob-storage.js';
3
import { type ContentPage, type ContentSection, type Section } from './document.js';
4
5
-const SENTENCE_ENDINGS = new Set(['.', '!', '?']);
6
-const WORD_BREAKS = new Set([',', ';', ':', ' ', '(', ')', '[', ']', '{', '}', '\t', '\n']);
+const SENTENCE_ENDINGS = new Set(['.', '。', '.', '!', '?', '‼', '⁇', '⁈', '⁉']);
+const WORD_BREAKS = new Set([',', '、', ';', ':', ' ', '(', ')', '[', ']', '{', '}', '\t', '\n']);
7
const MAX_SECTION_LENGTH = 1000;
8
const SENTENCE_SEARCH_LIMIT = 100;
9
const SECTION_OVERLAP = 100;
0 commit comments