Skip to content

Commit fe182ed

Browse files
author
Hi-Jiajun
committed
fix: address AliceLJY review comments on PR CortexReach#238
- Remove unused SAFE_CHAR_LIMITS, getSafeCharLimit, DEFAULT_SAFE_CHAR_LIMIT - Add comment explaining batch timeout asymmetry (embedBatchQuery/embedBatchPassage not wrapped) - Note: withTimeout already has .finally() cleanup, no change needed
1 parent 29f4d68 commit fe182ed

File tree

1 file changed

+4
-19
lines changed

1 file changed

+4
-19
lines changed

src/embedder.ts

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -264,25 +264,6 @@ const EMBED_TIMEOUT_MS = 10_000;
264264
*/
265265
const STRICT_REDUCTION_FACTOR = 0.5; // Each retry must be at most 50% of previous
266266

267-
/**
268-
* Safe character limits per model for forced truncation.
269-
* CJK characters typically consume ~3 tokens each, so the char limit is
270-
* conservative compared to the token limit.
271-
*/
272-
const SAFE_CHAR_LIMITS: Record<string, number> = {
273-
"nomic-embed-text": 2300,
274-
"mxbai-embed-large": 2300,
275-
"all-MiniLM-L6-v2": 1000,
276-
"all-mpnet-base-v2": 1500,
277-
};
278-
279-
const DEFAULT_SAFE_CHAR_LIMIT = 2000;
280-
281-
/** Return a safe character count for forced truncation given a model name. */
282-
function getSafeCharLimit(model: string): number {
283-
return SAFE_CHAR_LIMITS[model] ?? DEFAULT_SAFE_CHAR_LIMIT;
284-
}
285-
286267
export function getVectorDimensions(model: string, overrideDims?: number): number {
287268
if (overrideDims && overrideDims > 0) {
288269
return overrideDims;
@@ -493,6 +474,10 @@ export class Embedder {
493474
return this.withTimeout((signal) => this.embedSingle(text, this._taskPassage, signal), "embedPassage");
494475
}
495476

477+
// Note: embedBatchQuery/embedBatchPassage are NOT wrapped with withTimeout because
478+
// they handle multiple texts in a single API call. The timeout would fire after
479+
// EMBED_TIMEOUT_MS regardless of how many texts succeed. Individual text embedding
480+
// within the batch is protected by the SDK's own timeout handling.
496481
async embedBatchQuery(texts: string[]): Promise<number[][]> {
497482
return this.embedMany(texts, this._taskQuery);
498483
}

0 commit comments

Comments
 (0)